From 45cff45e34e13f594e2d31cbdb26aea8813913e8 Mon Sep 17 00:00:00 2001 From: Bishwajeet Kumar Rajak Date: Fri, 27 Feb 2026 15:32:33 +0530 Subject: [PATCH 1/3] integrated UIHParser --- config.py | 8 +- processors/file_processor.py | 13 +- scheduler.py | 5 +- uih_parser.py | 342 +++++++++++++++++++++++++++++++++++ 4 files changed, 362 insertions(+), 6 deletions(-) create mode 100644 uih_parser.py diff --git a/config.py b/config.py index 499a582..835d932 100644 --- a/config.py +++ b/config.py @@ -25,20 +25,20 @@ class Config: """Check if .env file exists.""" if not Path('.env').exists(): logger.warning(".env file not found. Using environment variables or defaults.") - + def _load_database_config(self): """Load database configuration.""" self.db_user = os.getenv('DB_USER', 'pacs_db') self.db_password = os.getenv('DB_PASSWORD', 'pacs_db') - self.db_host = os.getenv('DB_HOST', 'ipksprod3.c7q7defafeea.ap-south-1.rds.amazonaws.com') + self.db_host = os.getenv('DB_HOST', 'testipksdb.c7q7defafeea.ap-south-1.rds.amazonaws.com') self.db_port = int(os.getenv('DB_PORT', '1521')) - self.db_service_name = os.getenv('DB_SERVICE_NAME', 'IPKS') + self.db_service_name = os.getenv('DB_SERVICE_NAME', 'IPKSDB') self.db_pool_min = int(os.getenv('DB_POOL_MIN', '2')) self.db_pool_max = int(os.getenv('DB_POOL_MAX', '10')) def _load_sftp_config(self): """Load SFTP configuration.""" - self.sftp_host = os.getenv('SFTP_HOST', '142.79.249.123') + self.sftp_host = os.getenv('SFTP_HOST', '43.225.3.224') self.sftp_port = int(os.getenv('SFTP_PORT', '4650')) self.sftp_username = os.getenv('SFTP_USERNAME', 'ipkssftp') self.sftp_password = os.getenv('SFTP_PASSWORD', 'Wnb10U11BE7N26') diff --git a/processors/file_processor.py b/processors/file_processor.py index aba1f97..60787a5 100644 --- a/processors/file_processor.py +++ b/processors/file_processor.py @@ -72,7 +72,18 @@ class FileProcessor: raise Exception(f"Failed to download file: {remote_path}") # Step 3: Parse file - parser = ACHParser(local_path) + #parser = ACHParser(local_path) + + # Choose parser by filename prefix + parser = None + if filename.startswith('ACH_'): + parser = ACHParser(local_path) + elif filename.startswith('UIH_'): + parser = UIHParser(local_path) + else: + logger.warning(f"Unknown file type for parser: {filename}") + return False + transactions, metadata, summary = parser.parse() if not transactions: diff --git a/scheduler.py b/scheduler.py index 5826027..bf6cb36 100644 --- a/scheduler.py +++ b/scheduler.py @@ -79,7 +79,10 @@ class Scheduler: # Get list of files already processed for this specific bank bank_processed = repository.get_processed_files(bank_code) remote_path = f"{self.config.sftp_base_path}/{bank_code}/NACH" - files = sftp_client.list_files(remote_path, pattern=f'ACH_99944_{today_str}*.txt') + ach_files = sftp_client.list_files(remote_path, pattern=f'ACH_99944_{today_str}*.txt') + uih_files = sftp_client.list_files(remote_path, pattern=f'UIH_99944_{today_str}*.txt') + + files= ach_files + uih_files for filename in files: if filename not in bank_processed: diff --git a/uih_parser.py b/uih_parser.py new file mode 100644 index 0000000..b3a3c3f --- /dev/null +++ b/uih_parser.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python3 +""" +ACH File Parser - Extracts data from fixed-width ACH transaction report files. +""" + +import re +from logging_config import get_logger + +logger = get_logger(__name__) + + +class UIHParser: + def __init__(self, file_path): + self.file_path = file_path + self.transactions = [] + self.report_metadata = {} + self.summary_data = {} + + def parse(self): + """Main parsing method.""" + try: + with open(self.file_path, 'r', encoding="cp1252") as f: + content = f.read() + + # Split by form feed to separate pages + pages = content.split('\f') + logger.info(f"Found {len(pages)} pages in the file") + + for page_idx, page in enumerate(pages): + if page.strip(): + self._parse_page(page, page_idx) + + logger.info(f"Total transactions parsed: {len(self.transactions)}") + return self.transactions, self.report_metadata, self.summary_data + + except Exception as e: + logger.error(f"Error parsing file: {e}", exc_info=True) + raise + + def _parse_page(self, page, page_idx): + """Parse individual page content.""" + lines = page.split('\n') + + # Extract report metadata from header + for i, line in enumerate(lines[:10]): + if 'REPORT ID:' in line: + self._extract_header_metadata(line) + elif 'BRANCH:' in line: + self._extract_branch_info(line) + elif 'CURRENCY:' in line: + self._extract_currency_info(line) + + # Find transaction data section (variant header row) + transaction_start = None + for i, line in enumerate(lines): + # Your variant uses the UID(AADHAAR) header; adjust if your header changes + if ' UID(AADHAAR) NO' in line: + transaction_start = i + 2 # Skip header and separator + break + + if transaction_start is not None: + # Parse transactions, skip separators/summary/blank lines + for i in range(transaction_start, len(lines)): + line = lines[i] + + if 'DEBITS' in line or '----' in line or line.strip() == '': + continue + + if line.strip() and not line.startswith('==='): + transaction = self._parse_transaction_line(line) + if transaction: + self.transactions.append(transaction) + + # Parse summary data + for i, line in enumerate(lines): + if 'TOT PROCESSED' in line or 'TOT TRANSACTIONS' in line: + self._extract_summary_line(line) + + def _extract_header_metadata(self, line): + """Extract metadata from header line.""" + # REPORT ID: TF0504-01 ... RUN DATE: 19/01/2026 10:32 + report_id_match = re.search(r'REPORT ID:\s+(\S+)', line) + bank_name_match = re.search(r'([A-Z\s.]+)\s+RUN DATE:', line) + date_match = re.search(r'RUN DATE:\s+(\d{2}/\d{2}/\d{4}\s+\d{2}:\d{2})', line) + + if report_id_match: + self.report_metadata['report_id'] = report_id_match.group(1) + if bank_name_match: + self.report_metadata['bank_name'] = bank_name_match.group(1).strip() + if date_match: + self.report_metadata['run_date'] = date_match.group(1) + + def _extract_branch_info(self, line): + """Extract branch info.""" + # BRANCH: 99944 ACH CR T R I C K L E F E E D T R A N S A C T I O N S + branch_match = re.search(r'BRANCH:\s+(\S+)', line) + if branch_match: + self.report_metadata['branch'] = branch_match.group(1) + + def _extract_currency_info(self, line): + """Extract currency and maker/checker info.""" + # CURRENCY: INR MAKER-ID: 0009991 CHECKER-ID: 0000000 + currency_match = re.search(r'CURRENCY:\s+(\S+)', line) + maker_match = re.search(r'MAKER-ID:\s+(\S+)', line) + checker_match = re.search(r'CHECKER-ID:\s+(\S+)', line) + + if currency_match: + self.report_metadata['currency'] = currency_match.group(1) + if maker_match: + self.report_metadata['maker_id'] = maker_match.group(1) + if checker_match: + self.report_metadata['checker_id'] = checker_match.group(1) + + def _parse_transaction_line(self, line): + """ + Parse a single transaction line. + + Strategy: + - First 8 columns (SNO..AMOUNT) are split by the first '-' each time. + - The tail (after AMOUNT) is split from the RIGHT using tolerant separators: + SEP = r'(?:[\\s\\u00A0]-[\\s\\u00A0]*|-{1}[\\s\\u00A0]+)' + (whitespace/NBSP before the hyphen OR spaces after the hyphen) + Order from the right: + ... -> REMARKS (last sep) -> SUSPENSE MSG (prev) -> CR SUSPENSE (prev) -> SYS/MESSAGE (rest) + - Internal hyphens inside SYS/MESSAGE are preserved (e.g., CR-DEP-PROCESSED). + - Trim spaces; empty optional fields -> ''. + """ + import re + + line = line.rstrip("\n") + if len(line) < 20: + return None + + # Normalize CP1252 non-breaking spaces and tabs to regular spaces (defensive) + line = line.replace('\xa0', ' ').replace('\t', ' ') + + # Helper: pop text up to the next '-' (treat this '-' as the column separator). + def pop_until_hyphen(s: str): + idx = s.find('-') + if idx == -1: + field = s.strip() + rest_ = '' + else: + field = s[:idx].strip() + rest_ = s[idx + 1:] # drop the separator hyphen itself + return field, rest_ + + # Helper: split once from the RIGHT by a tolerant separator regex; + # fallback only splits at a '-' that has whitespace on at least one side. + def rsplit_once_tolerant(s: str, pattern: re.Pattern): + last = None + for m in pattern.finditer(s): + last = m + if last: + return s[:last.start()], s[last.end():] + # Fallback: split at the last '-' that has whitespace on either side + for i in range(len(s) - 1, -1, -1): + if s[i] == '-': + before_ws = (i > 0 and s[i - 1].isspace()) + after_ws = (i + 1 < len(s) and s[i + 1].isspace()) + if before_ws or after_ws: + return s[:i], s[i + 1:] + return s, '' # no separator found + + try: + s = line.strip() + + # Parse 1..7: SNO, UID, CUST ACCT/RT BGL, UID SUSP, CUSTOMER NAME, JRNL NO, DATE + fields = [] + for _ in range(7): + f, s = pop_until_hyphen(s) + fields.append(f) + + # 8: AMOUNT (normalize numeric if possible) + amount_raw, s = pop_until_hyphen(s) + amount_raw = amount_raw.strip() + amount = '' + if amount_raw: + m = re.search(r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b|\b\d+(?:\.\d+)?\b', amount_raw) + amount = m.group(0).replace(',', '') if m else amount_raw + + # Tail after AMOUNT + t = s.lstrip() + + # Tolerant separators (space/NBSP before '-' OR spaces after '-') + SEP = re.compile(r'(?:[\s\u00A0]-[\s\u00A0]*|-{1}[\s\u00A0]+)') + + # From the RIGHT: + # 13) REMARKS + left, remarks = rsplit_once_tolerant(t, SEP) + remarks = remarks.strip() + if remarks.startswith('-'): # defensive trim + remarks = remarks[1:].strip() + + # 12) SUSPENSE MSG + left, suspense_msg = rsplit_once_tolerant(left, SEP) + suspense_msg = suspense_msg.strip() + + # 11) CR SUSPENSE DETAILS + sys_part, cr_suspense = rsplit_once_tolerant(left, SEP) + cr_suspense = cr_suspense.strip() + + # 9/10) SYS & MESSAGE (same value) — strip one leading separator hyphen if present + sys_message = sys_part.strip() + if sys_message.startswith('-'): + sys_message = sys_message[1:].lstrip() + + # Unpack required fields + sno = fields[0].strip() + uid = fields[1].strip() + cust_acct = fields[2].strip() + uid_susp = fields[3].strip() + customer_name = fields[4].strip() + jrnl_no = fields[5].strip() + date_field = fields[6].strip() + + # Validate SNO + if not sno or not sno.isdigit(): + return None + + return { + 'sno': sno, + 'uid': uid, + 'cust_acct': cust_acct, + 'uid_susp': uid_susp, + 'customer_name': customer_name, + 'jrnl_no': jrnl_no, + 'date': date_field, + 'amount': amount, + 'sys': sys_message, + 'message': sys_message, # duplicate per requirement + 'cr_suspense': cr_suspense or '', + 'suspense_msg': suspense_msg or '', + 'remarks': remarks or '', + } + + except Exception as e: + logger.debug(f"Error parsing transaction line: {e}") + return None + + def _extract_summary_line(self, line): + """Extract summary totals.""" + # Format: TOT PROCESSED 0 0.00 178 41132.29 178 41132.29 + if 'TOT PROCESSED' in line: + parts = line.split() + try: + # Normalize commas before numeric check + cleaned = [p.replace(',', '') for p in parts] + # Find numeric values + numbers = [p for p in cleaned if self._is_numeric(p)] + if len(numbers) >= 4: + self.summary_data['tot_processed'] = { + 'debit_count': numbers[0], + 'debit_amount': numbers[1], + 'credit_count': numbers[2], + 'credit_amount': numbers[3], + } + except Exception as e: + logger.debug(f"Error parsing summary: {e}") + + @staticmethod + def _is_numeric(value): + """Check if string is numeric.""" + try: + float(value) + return True + except ValueError: + return False + + +def print_transactions(transactions): + """Print transactions to console.""" + print("\n" + "=" * 180) + print( + f"{'SNO':<6} " + f"{'UID':<18} " + f"{'CUST ACCT':<18} " + f"{'UID SUSP':<18} " + f"{'CUSTOMER NAME':<40} " + f"{'JRNL NO':<10} " + f"{'DATE':<12} " + f"{'AMOUNT':<12} " + f"{'SYS':<45} " + f"{'REMARKS':<50}" + ) + print("=" * 180) + + for txn in transactions: + print( + f"{txn['sno']:<6} " + f"{txn['uid']:<18} " + f"{txn['cust_acct']:<18} " + f"{txn['uid_susp']:<18} " + f"{txn['customer_name']:<40} " + f"{txn['jrnl_no']:<10} " + f"{txn['date']:<12} " + f"{txn['amount']:<12} " + f"{txn['sys']:<45} " + f"{txn['remarks']:<50}" + ) + + print("=" * 180) + print(f"Total transactions: {len(transactions)}\n") + + +def print_metadata(metadata): + """Print report metadata.""" + print("\n" + "=" * 80) + print("REPORT METADATA") + print("=" * 80) + for key, value in metadata.items(): + print(f"{key.upper():<20}: {value}") + print("=" * 80 + "\n") + + +def print_summary(summary): + """Print summary data.""" + if summary: + print("\n" + "=" * 80) + print("SUMMARY DATA") + print("=" * 80) + for key, value in summary.items(): + print(f"{key.upper()}: {value}") + print("=" * 80 + "\n") + + +if __name__ == '__main__': + from logging_config import setup_logging + + # Setup logging + setup_logging() + + # Parse the UIH file + parser = UIHParser('/home/ipkssupport/test_parser/UIH_99944_11022026102913_001_a.txt') + transactions, metadata, summary = parser.parse() + + # Print results + print_metadata(metadata) + print_transactions(transactions) + print_summary(summary) + + logger.info(f"Parsing complete. Extracted {len(transactions)} transactions") \ No newline at end of file From 7fdeacf0f05cb08d0a5143e79c735d4695a990fc Mon Sep 17 00:00:00 2001 From: Bishwajeet Kumar Rajak Date: Fri, 27 Feb 2026 16:30:31 +0530 Subject: [PATCH 2/3] updated processor --- processors/file_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/processors/file_processor.py b/processors/file_processor.py index 60787a5..aad48f8 100644 --- a/processors/file_processor.py +++ b/processors/file_processor.py @@ -9,6 +9,7 @@ import tempfile from pathlib import Path from logging_config import get_logger from ach_parser import ACHParser +from uih_parser import UIHParser from db.repository import Repository from db.models import ProcessedFile from sftp.sftp_client import SFTPClient From 9f05306d6e3a47f885245a88eff12ca30f5d9c26 Mon Sep 17 00:00:00 2001 From: Bishwajeet Date: Sun, 8 Mar 2026 12:11:12 +0530 Subject: [PATCH 3/3] updated config --- config.py | 6 +++--- db/repository.py | 1 + processors/file_processor.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/config.py b/config.py index 835d932..947cfae 100644 --- a/config.py +++ b/config.py @@ -30,15 +30,15 @@ class Config: """Load database configuration.""" self.db_user = os.getenv('DB_USER', 'pacs_db') self.db_password = os.getenv('DB_PASSWORD', 'pacs_db') - self.db_host = os.getenv('DB_HOST', 'testipksdb.c7q7defafeea.ap-south-1.rds.amazonaws.com') + self.db_host = os.getenv('DB_HOST', 'ipksprod3.c7q7defafeea.ap-south-1.rds.amazonaws.com') self.db_port = int(os.getenv('DB_PORT', '1521')) - self.db_service_name = os.getenv('DB_SERVICE_NAME', 'IPKSDB') + self.db_service_name = os.getenv('DB_SERVICE_NAME', 'IPKS') self.db_pool_min = int(os.getenv('DB_POOL_MIN', '2')) self.db_pool_max = int(os.getenv('DB_POOL_MAX', '10')) def _load_sftp_config(self): """Load SFTP configuration.""" - self.sftp_host = os.getenv('SFTP_HOST', '43.225.3.224') + self.sftp_host = os.getenv('SFTP_HOST', '142.79.249.123') self.sftp_port = int(os.getenv('SFTP_PORT', '4650')) self.sftp_username = os.getenv('SFTP_USERNAME', 'ipkssftp') self.sftp_password = os.getenv('SFTP_PASSWORD', 'Wnb10U11BE7N26') diff --git a/db/repository.py b/db/repository.py index ebb4da9..4e0c481 100644 --- a/db/repository.py +++ b/db/repository.py @@ -80,6 +80,7 @@ class Repository: # Prepare batch data batch_data = [txn.to_dict() for txn in valid_transactions] + #logger.info("Batch data: %s", batch_data) # Execute batch insert insert_sql = """ diff --git a/processors/file_processor.py b/processors/file_processor.py index aad48f8..c5f6d74 100644 --- a/processors/file_processor.py +++ b/processors/file_processor.py @@ -77,9 +77,9 @@ class FileProcessor: # Choose parser by filename prefix parser = None - if filename.startswith('ACH_'): + if filename.startswith('ACH_'): parser = ACHParser(local_path) - elif filename.startswith('UIH_'): + elif filename.startswith('UIH_'): parser = UIHParser(local_path) else: logger.warning(f"Unknown file type for parser: {filename}")