276 lines
10 KiB
Python
276 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
ACH File Parser - Extracts data from fixed-width ACH transaction report files.
|
|
"""
|
|
|
|
import re
|
|
from logging_config import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class ACHParser:
|
|
def __init__(self, file_path):
|
|
self.file_path = file_path
|
|
self.transactions = []
|
|
self.report_metadata = {}
|
|
self.summary_data = {}
|
|
|
|
def parse(self):
|
|
"""Main parsing method."""
|
|
try:
|
|
with open(self.file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Split by form feed to separate pages
|
|
pages = content.split('\f')
|
|
logger.info(f"Found {len(pages)} pages in the file")
|
|
|
|
for page_idx, page in enumerate(pages):
|
|
if page.strip():
|
|
self._parse_page(page, page_idx)
|
|
|
|
logger.info(f"Total transactions parsed: {len(self.transactions)}")
|
|
return self.transactions, self.report_metadata, self.summary_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error parsing file: {e}", exc_info=True)
|
|
raise
|
|
|
|
def _parse_page(self, page, page_idx):
|
|
"""Parse individual page content."""
|
|
lines = page.split('\n')
|
|
|
|
# Extract report metadata from header
|
|
for i, line in enumerate(lines[:10]):
|
|
if 'REPORT ID:' in line:
|
|
self._extract_header_metadata(line)
|
|
elif 'BRANCH:' in line:
|
|
self._extract_branch_info(line)
|
|
elif 'CURRENCY:' in line:
|
|
self._extract_currency_info(line)
|
|
|
|
# Find transaction data section
|
|
transaction_start = None
|
|
for i, line in enumerate(lines):
|
|
if 'SNO CUST ACCT' in line:
|
|
transaction_start = i + 2 # Skip header and separator
|
|
break
|
|
|
|
if transaction_start:
|
|
# Parse transactions until we hit summary or empty section
|
|
for i in range(transaction_start, len(lines)):
|
|
line = lines[i]
|
|
|
|
# Stop at summary section
|
|
if 'DEBITS' in line or '----' in line or line.strip() == '':
|
|
continue
|
|
|
|
if line.strip() and not line.startswith('==='):
|
|
transaction = self._parse_transaction_line(line)
|
|
if transaction:
|
|
self.transactions.append(transaction)
|
|
|
|
# Parse summary data
|
|
for i, line in enumerate(lines):
|
|
if 'TOT PROCESSED' in line or 'TOT TRANSACTIONS' in line:
|
|
self._extract_summary_line(line)
|
|
|
|
def _extract_header_metadata(self, line):
|
|
"""Extract metadata from header line."""
|
|
# REPORT ID: TF0504-01 MURSHIDABAD D C C B LTD. RUN DATE: 19/01/2026 10:32
|
|
report_id_match = re.search(r'REPORT ID:\s+(\S+)', line)
|
|
bank_name_match = re.search(r'([A-Z\s.]+)\s+RUN DATE:', line)
|
|
date_match = re.search(r'RUN DATE:\s+(\d{2}/\d{2}/\d{4}\s+\d{2}:\d{2})', line)
|
|
|
|
if report_id_match:
|
|
self.report_metadata['report_id'] = report_id_match.group(1)
|
|
if bank_name_match:
|
|
self.report_metadata['bank_name'] = bank_name_match.group(1).strip()
|
|
if date_match:
|
|
self.report_metadata['run_date'] = date_match.group(1)
|
|
|
|
def _extract_branch_info(self, line):
|
|
"""Extract branch and currency info."""
|
|
# BRANCH: 99944 ACH CR T R I C K L E F E E D T R A N S A C T I O N S
|
|
branch_match = re.search(r'BRANCH:\s+(\S+)', line)
|
|
if branch_match:
|
|
self.report_metadata['branch'] = branch_match.group(1)
|
|
|
|
def _extract_currency_info(self, line):
|
|
"""Extract currency and maker/checker info."""
|
|
# CURRENCY: INR MAKER-ID: 0009991 CHECKER-ID: 0000000
|
|
currency_match = re.search(r'CURRENCY:\s+(\S+)', line)
|
|
maker_match = re.search(r'MAKER-ID:\s+(\S+)', line)
|
|
checker_match = re.search(r'CHECKER-ID:\s+(\S+)', line)
|
|
|
|
if currency_match:
|
|
self.report_metadata['currency'] = currency_match.group(1)
|
|
if maker_match:
|
|
self.report_metadata['maker_id'] = maker_match.group(1)
|
|
if checker_match:
|
|
self.report_metadata['checker_id'] = checker_match.group(1)
|
|
|
|
def _parse_transaction_line(self, line):
|
|
"""Parse individual transaction line - fields separated by '-' delimiter."""
|
|
line = line.rstrip()
|
|
if len(line) < 20:
|
|
return None
|
|
|
|
try:
|
|
# Split by '-' delimiter to extract main fields
|
|
parts = [p.strip() for p in line.split('-')]
|
|
|
|
if len(parts) < 6:
|
|
return None
|
|
|
|
# Field extraction from split parts
|
|
sno = parts[0].strip()
|
|
if not sno or not sno.isdigit():
|
|
return None
|
|
|
|
cust_acct = parts[1].strip()
|
|
lpg_susp = parts[2].strip()
|
|
customer_name = parts[3].strip()
|
|
jrnl_no = parts[4].strip()
|
|
date_field = parts[5].strip()
|
|
|
|
# AMOUNT and remaining fields are in parts[6] onwards
|
|
# parts[6] typically contains: AMOUNT followed by SYS code
|
|
remaining = '-'.join(parts[6:]).strip()
|
|
|
|
# Extract amount (first numeric value in remaining)
|
|
amount_match = re.search(r'([\d.]+)', remaining)
|
|
amount = amount_match.group(1) if amount_match else ''
|
|
|
|
# Extract system/message field after the amount
|
|
# Skip past the amount and look for system code like "23-DEP-PROCESSED"
|
|
sys_field_match = None
|
|
if amount:
|
|
# Find text after the amount
|
|
after_amount = remaining.split(amount, 1)
|
|
if len(after_amount) > 1:
|
|
# Look for system status pattern: digits-CODE-STATUS (non-greedy)
|
|
# Matches: 23-DEP-PROCESSED, 26-APPROVED, etc.
|
|
sys_field_match = re.search(r'\s*(\d{1,2}(?:\-[A-Z]+)*)', after_amount[1])
|
|
|
|
message = sys_field_match.group(1).strip() if sys_field_match else ''
|
|
|
|
# Extract remarks as the last column/field
|
|
# The remarks can have different patterns: P0126049D07E0?IOCL LPG SUBSIDY or C012634266856?MDM BURWAN BLOCK
|
|
# So we take the last non-empty field
|
|
# First, extract everything after the system message field
|
|
if message:
|
|
# Find position after the message and take everything after it
|
|
msg_pos = remaining.find(message)
|
|
if msg_pos != -1:
|
|
after_msg = remaining[msg_pos + len(message):].strip()
|
|
# Remove leading dashes and extra spaces, take the last meaningful text
|
|
after_msg = re.sub(r'^[\s\-]+', '', after_msg) # Remove leading spaces/dashes
|
|
# Get the last column by splitting on multiple spaces
|
|
columns = re.split(r'\s{2,}', after_msg)
|
|
remarks = columns[-1].strip() if columns and columns[-1].strip() else ''
|
|
else:
|
|
remarks = ''
|
|
else:
|
|
# If no message found, just take the last part of remaining
|
|
columns = re.split(r'\s{2,}', remaining)
|
|
remarks = columns[-1].strip() if columns and columns[-1].strip() else ''
|
|
|
|
return {
|
|
'sno': sno,
|
|
'cust_acct': cust_acct,
|
|
'lpg_susp': lpg_susp,
|
|
'customer_name': customer_name,
|
|
'jrnl_no': jrnl_no,
|
|
'date': date_field,
|
|
'amount': amount,
|
|
'sys': message,
|
|
'message': message,
|
|
'cr_suspense': '',
|
|
'suspense_msg': '',
|
|
'remarks': remarks
|
|
}
|
|
except Exception as e:
|
|
logger.debug(f"Error parsing transaction line: {e}")
|
|
return None
|
|
|
|
def _extract_summary_line(self, line):
|
|
"""Extract summary totals."""
|
|
# Format: TOT PROCESSED 0 0.00 178 41132.29 178 41132.29
|
|
if 'TOT PROCESSED' in line:
|
|
parts = line.split()
|
|
try:
|
|
# Find numeric values
|
|
numbers = [p for p in parts if self._is_numeric(p)]
|
|
if len(numbers) >= 3:
|
|
self.summary_data['tot_processed'] = {
|
|
'debit_count': numbers[0],
|
|
'debit_amount': numbers[1],
|
|
'credit_count': numbers[2],
|
|
'credit_amount': numbers[3] if len(numbers) > 3 else 0,
|
|
}
|
|
except Exception as e:
|
|
logger.debug(f"Error parsing summary: {e}")
|
|
|
|
@staticmethod
|
|
def _is_numeric(value):
|
|
"""Check if string is numeric."""
|
|
try:
|
|
float(value)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
|
|
def print_transactions(transactions):
|
|
"""Print transactions to console."""
|
|
print("\n" + "="*150)
|
|
print(f"{'SNO':<6} {'CUST ACCT':<18} {'CUSTOMER NAME':<40} {'DATE':<12} {'AMOUNT':<12} {'REMARKS':<40}")
|
|
print("="*150)
|
|
|
|
for txn in transactions:
|
|
print(f"{txn['sno']:<6} {txn['cust_acct']:<18} {txn['customer_name']:<40} {txn['date']:<12} {txn['amount']:<12} {txn['remarks']:<40}")
|
|
|
|
print("="*150)
|
|
print(f"Total transactions: {len(transactions)}\n")
|
|
|
|
|
|
def print_metadata(metadata):
|
|
"""Print report metadata."""
|
|
print("\n" + "="*80)
|
|
print("REPORT METADATA")
|
|
print("="*80)
|
|
for key, value in metadata.items():
|
|
print(f"{key.upper():<20}: {value}")
|
|
print("="*80 + "\n")
|
|
|
|
|
|
def print_summary(summary):
|
|
"""Print summary data."""
|
|
if summary:
|
|
print("\n" + "="*80)
|
|
print("SUMMARY DATA")
|
|
print("="*80)
|
|
for key, value in summary.items():
|
|
print(f"{key.upper()}: {value}")
|
|
print("="*80 + "\n")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from logging_config import setup_logging
|
|
|
|
# Setup logging
|
|
setup_logging()
|
|
|
|
# Parse the ACH file
|
|
parser = ACHParser('/home/asif/projects/ach_ui_dbtl_file_based/ACH_99944_19012026103217_001.txt')
|
|
transactions, metadata, summary = parser.parse()
|
|
|
|
# Print results
|
|
print_metadata(metadata)
|
|
print_transactions(transactions)
|
|
print_summary(summary)
|
|
|
|
logger.info(f"Parsing complete. Extracted {len(transactions)} transactions")
|