#!/usr/bin/env python3 """ ACH File Parser - Extracts data from fixed-width ACH transaction report files. """ import re from logging_config import get_logger logger = get_logger(__name__) class ACHParser: def __init__(self, file_path): self.file_path = file_path self.transactions = [] self.report_metadata = {} self.summary_data = {} def parse(self): """Main parsing method.""" try: with open(self.file_path, 'r', encoding='utf-8') as f: content = f.read() # Split by form feed to separate pages pages = content.split('\f') logger.info(f"Found {len(pages)} pages in the file") for page_idx, page in enumerate(pages): if page.strip(): self._parse_page(page, page_idx) logger.info(f"Total transactions parsed: {len(self.transactions)}") return self.transactions, self.report_metadata, self.summary_data except Exception as e: logger.error(f"Error parsing file: {e}", exc_info=True) raise def _parse_page(self, page, page_idx): """Parse individual page content.""" lines = page.split('\n') # Extract report metadata from header for i, line in enumerate(lines[:10]): if 'REPORT ID:' in line: self._extract_header_metadata(line) elif 'BRANCH:' in line: self._extract_branch_info(line) elif 'CURRENCY:' in line: self._extract_currency_info(line) # Find transaction data section transaction_start = None for i, line in enumerate(lines): if 'SNO CUST ACCT' in line: transaction_start = i + 2 # Skip header and separator break if transaction_start: # Parse transactions until we hit summary or empty section for i in range(transaction_start, len(lines)): line = lines[i] # Stop at summary section if 'DEBITS' in line or '----' in line or line.strip() == '': continue if line.strip() and not line.startswith('==='): transaction = self._parse_transaction_line(line) if transaction: self.transactions.append(transaction) # Parse summary data for i, line in enumerate(lines): if 'TOT PROCESSED' in line or 'TOT TRANSACTIONS' in line: self._extract_summary_line(line) def _extract_header_metadata(self, line): """Extract metadata from header line.""" # REPORT ID: TF0504-01 MURSHIDABAD D C C B LTD. RUN DATE: 19/01/2026 10:32 report_id_match = re.search(r'REPORT ID:\s+(\S+)', line) bank_name_match = re.search(r'([A-Z\s.]+)\s+RUN DATE:', line) date_match = re.search(r'RUN DATE:\s+(\d{2}/\d{2}/\d{4}\s+\d{2}:\d{2})', line) if report_id_match: self.report_metadata['report_id'] = report_id_match.group(1) if bank_name_match: self.report_metadata['bank_name'] = bank_name_match.group(1).strip() if date_match: self.report_metadata['run_date'] = date_match.group(1) def _extract_branch_info(self, line): """Extract branch and currency info.""" # BRANCH: 99944 ACH CR T R I C K L E F E E D T R A N S A C T I O N S branch_match = re.search(r'BRANCH:\s+(\S+)', line) if branch_match: self.report_metadata['branch'] = branch_match.group(1) def _extract_currency_info(self, line): """Extract currency and maker/checker info.""" # CURRENCY: INR MAKER-ID: 0009991 CHECKER-ID: 0000000 currency_match = re.search(r'CURRENCY:\s+(\S+)', line) maker_match = re.search(r'MAKER-ID:\s+(\S+)', line) checker_match = re.search(r'CHECKER-ID:\s+(\S+)', line) if currency_match: self.report_metadata['currency'] = currency_match.group(1) if maker_match: self.report_metadata['maker_id'] = maker_match.group(1) if checker_match: self.report_metadata['checker_id'] = checker_match.group(1) def _parse_transaction_line(self, line): """Parse individual transaction line - fields separated by '-' delimiter.""" line = line.rstrip() if len(line) < 20: return None try: # Split by '-' delimiter to extract main fields parts = [p.strip() for p in line.split('-')] if len(parts) < 6: return None # Field extraction from split parts sno = parts[0].strip() if not sno or not sno.isdigit(): return None cust_acct = parts[1].strip() lpg_susp = parts[2].strip() customer_name = parts[3].strip() jrnl_no = parts[4].strip() date_field = parts[5].strip() # AMOUNT and remaining fields are in parts[6] onwards # parts[6] typically contains: AMOUNT followed by SYS code remaining = '-'.join(parts[6:]).strip() # Extract amount (first numeric value in remaining) amount_match = re.search(r'([\d.]+)', remaining) amount = amount_match.group(1) if amount_match else '' # Extract system/message field after the amount # Skip past the amount and look for system code like "23-DEP-PROCESSED" sys_field_match = None if amount: # Find text after the amount after_amount = remaining.split(amount, 1) if len(after_amount) > 1: # Look for system status pattern: digits-CODE-STATUS (non-greedy) # Matches: 23-DEP-PROCESSED, 26-APPROVED, etc. sys_field_match = re.search(r'\s*(\d{1,2}(?:\-[A-Z]+)*)', after_amount[1]) message = sys_field_match.group(1).strip() if sys_field_match else '' # Extract remarks as the last column/field # The remarks can have different patterns: P0126049D07E0?IOCL LPG SUBSIDY or C012634266856?MDM BURWAN BLOCK # So we take the last non-empty field # First, extract everything after the system message field if message: # Find position after the message and take everything after it msg_pos = remaining.find(message) if msg_pos != -1: after_msg = remaining[msg_pos + len(message):].strip() # Remove leading dashes and extra spaces, take the last meaningful text after_msg = re.sub(r'^[\s\-]+', '', after_msg) # Remove leading spaces/dashes # Get the last column by splitting on multiple spaces columns = re.split(r'\s{2,}', after_msg) remarks = columns[-1].strip() if columns and columns[-1].strip() else '' else: remarks = '' else: # If no message found, just take the last part of remaining columns = re.split(r'\s{2,}', remaining) remarks = columns[-1].strip() if columns and columns[-1].strip() else '' return { 'sno': sno, 'cust_acct': cust_acct, 'lpg_susp': lpg_susp, 'customer_name': customer_name, 'jrnl_no': jrnl_no, 'date': date_field, 'amount': amount, 'sys': message, 'message': message, 'cr_suspense': '', 'suspense_msg': '', 'remarks': remarks } except Exception as e: logger.debug(f"Error parsing transaction line: {e}") return None def _extract_summary_line(self, line): """Extract summary totals.""" # Format: TOT PROCESSED 0 0.00 178 41132.29 178 41132.29 if 'TOT PROCESSED' in line: parts = line.split() try: # Find numeric values numbers = [p for p in parts if self._is_numeric(p)] if len(numbers) >= 3: self.summary_data['tot_processed'] = { 'debit_count': numbers[0], 'debit_amount': numbers[1], 'credit_count': numbers[2], 'credit_amount': numbers[3] if len(numbers) > 3 else 0, } except Exception as e: logger.debug(f"Error parsing summary: {e}") @staticmethod def _is_numeric(value): """Check if string is numeric.""" try: float(value) return True except ValueError: return False def print_transactions(transactions): """Print transactions to console.""" print("\n" + "="*150) print(f"{'SNO':<6} {'CUST ACCT':<18} {'CUSTOMER NAME':<40} {'DATE':<12} {'AMOUNT':<12} {'REMARKS':<40}") print("="*150) for txn in transactions: print(f"{txn['sno']:<6} {txn['cust_acct']:<18} {txn['customer_name']:<40} {txn['date']:<12} {txn['amount']:<12} {txn['remarks']:<40}") print("="*150) print(f"Total transactions: {len(transactions)}\n") def print_metadata(metadata): """Print report metadata.""" print("\n" + "="*80) print("REPORT METADATA") print("="*80) for key, value in metadata.items(): print(f"{key.upper():<20}: {value}") print("="*80 + "\n") def print_summary(summary): """Print summary data.""" if summary: print("\n" + "="*80) print("SUMMARY DATA") print("="*80) for key, value in summary.items(): print(f"{key.upper()}: {value}") print("="*80 + "\n") if __name__ == '__main__': from logging_config import setup_logging # Setup logging setup_logging() # Parse the ACH file parser = ACHParser('/home/asif/projects/ach_ui_dbtl_file_based/ACH_99944_19012026103217_001.txt') transactions, metadata, summary = parser.parse() # Print results print_metadata(metadata) print_transactions(transactions) print_summary(summary) logger.info(f"Parsing complete. Extracted {len(transactions)} transactions")