#!/usr/bin/env python3 """ ACH File Parser - Extracts data from fixed-width ACH transaction report files. """ import re from logging_config import get_logger logger = get_logger(__name__) class UIHParser: def __init__(self, file_path): self.file_path = file_path self.transactions = [] self.report_metadata = {} self.summary_data = {} def parse(self): """Main parsing method.""" try: with open(self.file_path, 'r', encoding="cp1252") as f: content = f.read() # Split by form feed to separate pages pages = content.split('\f') logger.info(f"Found {len(pages)} pages in the file") for page_idx, page in enumerate(pages): if page.strip(): self._parse_page(page, page_idx) logger.info(f"Total transactions parsed: {len(self.transactions)}") return self.transactions, self.report_metadata, self.summary_data except Exception as e: logger.error(f"Error parsing file: {e}", exc_info=True) raise def _parse_page(self, page, page_idx): """Parse individual page content.""" lines = page.split('\n') # Extract report metadata from header for i, line in enumerate(lines[:10]): if 'REPORT ID:' in line: self._extract_header_metadata(line) elif 'BRANCH:' in line: self._extract_branch_info(line) elif 'CURRENCY:' in line: self._extract_currency_info(line) # Find transaction data section (variant header row) transaction_start = None for i, line in enumerate(lines): # Your variant uses the UID(AADHAAR) header; adjust if your header changes if ' UID(AADHAAR) NO' in line: transaction_start = i + 2 # Skip header and separator break if transaction_start is not None: # Parse transactions, skip separators/summary/blank lines for i in range(transaction_start, len(lines)): line = lines[i] if 'DEBITS' in line or '----' in line or line.strip() == '': continue if line.strip() and not line.startswith('==='): transaction = self._parse_transaction_line(line) if transaction: self.transactions.append(transaction) # Parse summary data for i, line in enumerate(lines): if 'TOT PROCESSED' in line or 'TOT TRANSACTIONS' in line: self._extract_summary_line(line) def _extract_header_metadata(self, line): """Extract metadata from header line.""" # REPORT ID: TF0504-01 ... RUN DATE: 19/01/2026 10:32 report_id_match = re.search(r'REPORT ID:\s+(\S+)', line) bank_name_match = re.search(r'([A-Z\s.]+)\s+RUN DATE:', line) date_match = re.search(r'RUN DATE:\s+(\d{2}/\d{2}/\d{4}\s+\d{2}:\d{2})', line) if report_id_match: self.report_metadata['report_id'] = report_id_match.group(1) if bank_name_match: self.report_metadata['bank_name'] = bank_name_match.group(1).strip() if date_match: self.report_metadata['run_date'] = date_match.group(1) def _extract_branch_info(self, line): """Extract branch info.""" # BRANCH: 99944 ACH CR T R I C K L E F E E D T R A N S A C T I O N S branch_match = re.search(r'BRANCH:\s+(\S+)', line) if branch_match: self.report_metadata['branch'] = branch_match.group(1) def _extract_currency_info(self, line): """Extract currency and maker/checker info.""" # CURRENCY: INR MAKER-ID: 0009991 CHECKER-ID: 0000000 currency_match = re.search(r'CURRENCY:\s+(\S+)', line) maker_match = re.search(r'MAKER-ID:\s+(\S+)', line) checker_match = re.search(r'CHECKER-ID:\s+(\S+)', line) if currency_match: self.report_metadata['currency'] = currency_match.group(1) if maker_match: self.report_metadata['maker_id'] = maker_match.group(1) if checker_match: self.report_metadata['checker_id'] = checker_match.group(1) def _parse_transaction_line(self, line): """ Parse a single transaction line. Strategy: - First 8 columns (SNO..AMOUNT) are split by the first '-' each time. - The tail (after AMOUNT) is split from the RIGHT using tolerant separators: SEP = r'(?:[\\s\\u00A0]-[\\s\\u00A0]*|-{1}[\\s\\u00A0]+)' (whitespace/NBSP before the hyphen OR spaces after the hyphen) Order from the right: ... -> REMARKS (last sep) -> SUSPENSE MSG (prev) -> CR SUSPENSE (prev) -> SYS/MESSAGE (rest) - Internal hyphens inside SYS/MESSAGE are preserved (e.g., CR-DEP-PROCESSED). - Trim spaces; empty optional fields -> ''. """ import re line = line.rstrip("\n") if len(line) < 20: return None # Normalize CP1252 non-breaking spaces and tabs to regular spaces (defensive) line = line.replace('\xa0', ' ').replace('\t', ' ') # Helper: pop text up to the next '-' (treat this '-' as the column separator). def pop_until_hyphen(s: str): idx = s.find('-') if idx == -1: field = s.strip() rest_ = '' else: field = s[:idx].strip() rest_ = s[idx + 1:] # drop the separator hyphen itself return field, rest_ # Helper: split once from the RIGHT by a tolerant separator regex; # fallback only splits at a '-' that has whitespace on at least one side. def rsplit_once_tolerant(s: str, pattern: re.Pattern): last = None for m in pattern.finditer(s): last = m if last: return s[:last.start()], s[last.end():] # Fallback: split at the last '-' that has whitespace on either side for i in range(len(s) - 1, -1, -1): if s[i] == '-': before_ws = (i > 0 and s[i - 1].isspace()) after_ws = (i + 1 < len(s) and s[i + 1].isspace()) if before_ws or after_ws: return s[:i], s[i + 1:] return s, '' # no separator found try: s = line.strip() # Parse 1..7: SNO, UID, CUST ACCT/RT BGL, UID SUSP, CUSTOMER NAME, JRNL NO, DATE fields = [] for _ in range(7): f, s = pop_until_hyphen(s) fields.append(f) # 8: AMOUNT (normalize numeric if possible) amount_raw, s = pop_until_hyphen(s) amount_raw = amount_raw.strip() amount = '' if amount_raw: m = re.search(r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b|\b\d+(?:\.\d+)?\b', amount_raw) amount = m.group(0).replace(',', '') if m else amount_raw # Tail after AMOUNT t = s.lstrip() # Tolerant separators (space/NBSP before '-' OR spaces after '-') SEP = re.compile(r'(?:[\s\u00A0]-[\s\u00A0]*|-{1}[\s\u00A0]+)') # From the RIGHT: # 13) REMARKS left, remarks = rsplit_once_tolerant(t, SEP) remarks = remarks.strip() if remarks.startswith('-'): # defensive trim remarks = remarks[1:].strip() # 12) SUSPENSE MSG left, suspense_msg = rsplit_once_tolerant(left, SEP) suspense_msg = suspense_msg.strip() # 11) CR SUSPENSE DETAILS sys_part, cr_suspense = rsplit_once_tolerant(left, SEP) cr_suspense = cr_suspense.strip() # 9/10) SYS & MESSAGE (same value) — strip one leading separator hyphen if present sys_message = sys_part.strip() if sys_message.startswith('-'): sys_message = sys_message[1:].lstrip() # Unpack required fields sno = fields[0].strip() uid = fields[1].strip() cust_acct = fields[2].strip() uid_susp = fields[3].strip() customer_name = fields[4].strip() jrnl_no = fields[5].strip() date_field = fields[6].strip() # Validate SNO if not sno or not sno.isdigit(): return None return { 'sno': sno, 'uid': uid, 'cust_acct': cust_acct, 'uid_susp': uid_susp, 'customer_name': customer_name, 'jrnl_no': jrnl_no, 'date': date_field, 'amount': amount, 'sys': sys_message, 'message': sys_message, # duplicate per requirement 'cr_suspense': cr_suspense or '', 'suspense_msg': suspense_msg or '', 'remarks': remarks or '', } except Exception as e: logger.debug(f"Error parsing transaction line: {e}") return None def _extract_summary_line(self, line): """Extract summary totals.""" # Format: TOT PROCESSED 0 0.00 178 41132.29 178 41132.29 if 'TOT PROCESSED' in line: parts = line.split() try: # Normalize commas before numeric check cleaned = [p.replace(',', '') for p in parts] # Find numeric values numbers = [p for p in cleaned if self._is_numeric(p)] if len(numbers) >= 4: self.summary_data['tot_processed'] = { 'debit_count': numbers[0], 'debit_amount': numbers[1], 'credit_count': numbers[2], 'credit_amount': numbers[3], } except Exception as e: logger.debug(f"Error parsing summary: {e}") @staticmethod def _is_numeric(value): """Check if string is numeric.""" try: float(value) return True except ValueError: return False def print_transactions(transactions): """Print transactions to console.""" print("\n" + "=" * 180) print( f"{'SNO':<6} " f"{'UID':<18} " f"{'CUST ACCT':<18} " f"{'UID SUSP':<18} " f"{'CUSTOMER NAME':<40} " f"{'JRNL NO':<10} " f"{'DATE':<12} " f"{'AMOUNT':<12} " f"{'SYS':<45} " f"{'REMARKS':<50}" ) print("=" * 180) for txn in transactions: print( f"{txn['sno']:<6} " f"{txn['uid']:<18} " f"{txn['cust_acct']:<18} " f"{txn['uid_susp']:<18} " f"{txn['customer_name']:<40} " f"{txn['jrnl_no']:<10} " f"{txn['date']:<12} " f"{txn['amount']:<12} " f"{txn['sys']:<45} " f"{txn['remarks']:<50}" ) print("=" * 180) print(f"Total transactions: {len(transactions)}\n") def print_metadata(metadata): """Print report metadata.""" print("\n" + "=" * 80) print("REPORT METADATA") print("=" * 80) for key, value in metadata.items(): print(f"{key.upper():<20}: {value}") print("=" * 80 + "\n") def print_summary(summary): """Print summary data.""" if summary: print("\n" + "=" * 80) print("SUMMARY DATA") print("=" * 80) for key, value in summary.items(): print(f"{key.upper()}: {value}") print("=" * 80 + "\n") if __name__ == '__main__': from logging_config import setup_logging # Setup logging setup_logging() # Parse the UIH file parser = UIHParser('/home/ipkssupport/test_parser/UIH_99944_11022026102913_001_a.txt') transactions, metadata, summary = parser.parse() # Print results print_metadata(metadata) print_transactions(transactions) print_summary(summary) logger.info(f"Parsing complete. Extracted {len(transactions)} transactions")