integrated UIHParser
This commit is contained in:
@@ -30,15 +30,15 @@ class Config:
|
|||||||
"""Load database configuration."""
|
"""Load database configuration."""
|
||||||
self.db_user = os.getenv('DB_USER', 'pacs_db')
|
self.db_user = os.getenv('DB_USER', 'pacs_db')
|
||||||
self.db_password = os.getenv('DB_PASSWORD', 'pacs_db')
|
self.db_password = os.getenv('DB_PASSWORD', 'pacs_db')
|
||||||
self.db_host = os.getenv('DB_HOST', 'ipksprod3.c7q7defafeea.ap-south-1.rds.amazonaws.com')
|
self.db_host = os.getenv('DB_HOST', 'testipksdb.c7q7defafeea.ap-south-1.rds.amazonaws.com')
|
||||||
self.db_port = int(os.getenv('DB_PORT', '1521'))
|
self.db_port = int(os.getenv('DB_PORT', '1521'))
|
||||||
self.db_service_name = os.getenv('DB_SERVICE_NAME', 'IPKS')
|
self.db_service_name = os.getenv('DB_SERVICE_NAME', 'IPKSDB')
|
||||||
self.db_pool_min = int(os.getenv('DB_POOL_MIN', '2'))
|
self.db_pool_min = int(os.getenv('DB_POOL_MIN', '2'))
|
||||||
self.db_pool_max = int(os.getenv('DB_POOL_MAX', '10'))
|
self.db_pool_max = int(os.getenv('DB_POOL_MAX', '10'))
|
||||||
|
|
||||||
def _load_sftp_config(self):
|
def _load_sftp_config(self):
|
||||||
"""Load SFTP configuration."""
|
"""Load SFTP configuration."""
|
||||||
self.sftp_host = os.getenv('SFTP_HOST', '142.79.249.123')
|
self.sftp_host = os.getenv('SFTP_HOST', '43.225.3.224')
|
||||||
self.sftp_port = int(os.getenv('SFTP_PORT', '4650'))
|
self.sftp_port = int(os.getenv('SFTP_PORT', '4650'))
|
||||||
self.sftp_username = os.getenv('SFTP_USERNAME', 'ipkssftp')
|
self.sftp_username = os.getenv('SFTP_USERNAME', 'ipkssftp')
|
||||||
self.sftp_password = os.getenv('SFTP_PASSWORD', 'Wnb10U11BE7N26')
|
self.sftp_password = os.getenv('SFTP_PASSWORD', 'Wnb10U11BE7N26')
|
||||||
|
|||||||
@@ -72,7 +72,18 @@ class FileProcessor:
|
|||||||
raise Exception(f"Failed to download file: {remote_path}")
|
raise Exception(f"Failed to download file: {remote_path}")
|
||||||
|
|
||||||
# Step 3: Parse file
|
# Step 3: Parse file
|
||||||
parser = ACHParser(local_path)
|
#parser = ACHParser(local_path)
|
||||||
|
|
||||||
|
# Choose parser by filename prefix
|
||||||
|
parser = None
|
||||||
|
if filename.startswith('ACH_'):
|
||||||
|
parser = ACHParser(local_path)
|
||||||
|
elif filename.startswith('UIH_'):
|
||||||
|
parser = UIHParser(local_path)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unknown file type for parser: {filename}")
|
||||||
|
return False
|
||||||
|
|
||||||
transactions, metadata, summary = parser.parse()
|
transactions, metadata, summary = parser.parse()
|
||||||
|
|
||||||
if not transactions:
|
if not transactions:
|
||||||
|
|||||||
@@ -79,7 +79,10 @@ class Scheduler:
|
|||||||
# Get list of files already processed for this specific bank
|
# Get list of files already processed for this specific bank
|
||||||
bank_processed = repository.get_processed_files(bank_code)
|
bank_processed = repository.get_processed_files(bank_code)
|
||||||
remote_path = f"{self.config.sftp_base_path}/{bank_code}/NACH"
|
remote_path = f"{self.config.sftp_base_path}/{bank_code}/NACH"
|
||||||
files = sftp_client.list_files(remote_path, pattern=f'ACH_99944_{today_str}*.txt')
|
ach_files = sftp_client.list_files(remote_path, pattern=f'ACH_99944_{today_str}*.txt')
|
||||||
|
uih_files = sftp_client.list_files(remote_path, pattern=f'UIH_99944_{today_str}*.txt')
|
||||||
|
|
||||||
|
files= ach_files + uih_files
|
||||||
|
|
||||||
for filename in files:
|
for filename in files:
|
||||||
if filename not in bank_processed:
|
if filename not in bank_processed:
|
||||||
|
|||||||
342
uih_parser.py
Normal file
342
uih_parser.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
ACH File Parser - Extracts data from fixed-width ACH transaction report files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from logging_config import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class UIHParser:
|
||||||
|
def __init__(self, file_path):
|
||||||
|
self.file_path = file_path
|
||||||
|
self.transactions = []
|
||||||
|
self.report_metadata = {}
|
||||||
|
self.summary_data = {}
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
"""Main parsing method."""
|
||||||
|
try:
|
||||||
|
with open(self.file_path, 'r', encoding="cp1252") as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Split by form feed to separate pages
|
||||||
|
pages = content.split('\f')
|
||||||
|
logger.info(f"Found {len(pages)} pages in the file")
|
||||||
|
|
||||||
|
for page_idx, page in enumerate(pages):
|
||||||
|
if page.strip():
|
||||||
|
self._parse_page(page, page_idx)
|
||||||
|
|
||||||
|
logger.info(f"Total transactions parsed: {len(self.transactions)}")
|
||||||
|
return self.transactions, self.report_metadata, self.summary_data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing file: {e}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _parse_page(self, page, page_idx):
|
||||||
|
"""Parse individual page content."""
|
||||||
|
lines = page.split('\n')
|
||||||
|
|
||||||
|
# Extract report metadata from header
|
||||||
|
for i, line in enumerate(lines[:10]):
|
||||||
|
if 'REPORT ID:' in line:
|
||||||
|
self._extract_header_metadata(line)
|
||||||
|
elif 'BRANCH:' in line:
|
||||||
|
self._extract_branch_info(line)
|
||||||
|
elif 'CURRENCY:' in line:
|
||||||
|
self._extract_currency_info(line)
|
||||||
|
|
||||||
|
# Find transaction data section (variant header row)
|
||||||
|
transaction_start = None
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
# Your variant uses the UID(AADHAAR) header; adjust if your header changes
|
||||||
|
if ' UID(AADHAAR) NO' in line:
|
||||||
|
transaction_start = i + 2 # Skip header and separator
|
||||||
|
break
|
||||||
|
|
||||||
|
if transaction_start is not None:
|
||||||
|
# Parse transactions, skip separators/summary/blank lines
|
||||||
|
for i in range(transaction_start, len(lines)):
|
||||||
|
line = lines[i]
|
||||||
|
|
||||||
|
if 'DEBITS' in line or '----' in line or line.strip() == '':
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line.strip() and not line.startswith('==='):
|
||||||
|
transaction = self._parse_transaction_line(line)
|
||||||
|
if transaction:
|
||||||
|
self.transactions.append(transaction)
|
||||||
|
|
||||||
|
# Parse summary data
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if 'TOT PROCESSED' in line or 'TOT TRANSACTIONS' in line:
|
||||||
|
self._extract_summary_line(line)
|
||||||
|
|
||||||
|
def _extract_header_metadata(self, line):
|
||||||
|
"""Extract metadata from header line."""
|
||||||
|
# REPORT ID: TF0504-01 ... RUN DATE: 19/01/2026 10:32
|
||||||
|
report_id_match = re.search(r'REPORT ID:\s+(\S+)', line)
|
||||||
|
bank_name_match = re.search(r'([A-Z\s.]+)\s+RUN DATE:', line)
|
||||||
|
date_match = re.search(r'RUN DATE:\s+(\d{2}/\d{2}/\d{4}\s+\d{2}:\d{2})', line)
|
||||||
|
|
||||||
|
if report_id_match:
|
||||||
|
self.report_metadata['report_id'] = report_id_match.group(1)
|
||||||
|
if bank_name_match:
|
||||||
|
self.report_metadata['bank_name'] = bank_name_match.group(1).strip()
|
||||||
|
if date_match:
|
||||||
|
self.report_metadata['run_date'] = date_match.group(1)
|
||||||
|
|
||||||
|
def _extract_branch_info(self, line):
|
||||||
|
"""Extract branch info."""
|
||||||
|
# BRANCH: 99944 ACH CR T R I C K L E F E E D T R A N S A C T I O N S
|
||||||
|
branch_match = re.search(r'BRANCH:\s+(\S+)', line)
|
||||||
|
if branch_match:
|
||||||
|
self.report_metadata['branch'] = branch_match.group(1)
|
||||||
|
|
||||||
|
def _extract_currency_info(self, line):
|
||||||
|
"""Extract currency and maker/checker info."""
|
||||||
|
# CURRENCY: INR MAKER-ID: 0009991 CHECKER-ID: 0000000
|
||||||
|
currency_match = re.search(r'CURRENCY:\s+(\S+)', line)
|
||||||
|
maker_match = re.search(r'MAKER-ID:\s+(\S+)', line)
|
||||||
|
checker_match = re.search(r'CHECKER-ID:\s+(\S+)', line)
|
||||||
|
|
||||||
|
if currency_match:
|
||||||
|
self.report_metadata['currency'] = currency_match.group(1)
|
||||||
|
if maker_match:
|
||||||
|
self.report_metadata['maker_id'] = maker_match.group(1)
|
||||||
|
if checker_match:
|
||||||
|
self.report_metadata['checker_id'] = checker_match.group(1)
|
||||||
|
|
||||||
|
def _parse_transaction_line(self, line):
|
||||||
|
"""
|
||||||
|
Parse a single transaction line.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
- First 8 columns (SNO..AMOUNT) are split by the first '-' each time.
|
||||||
|
- The tail (after AMOUNT) is split from the RIGHT using tolerant separators:
|
||||||
|
SEP = r'(?:[\\s\\u00A0]-[\\s\\u00A0]*|-{1}[\\s\\u00A0]+)'
|
||||||
|
(whitespace/NBSP before the hyphen OR spaces after the hyphen)
|
||||||
|
Order from the right:
|
||||||
|
... -> REMARKS (last sep) -> SUSPENSE MSG (prev) -> CR SUSPENSE (prev) -> SYS/MESSAGE (rest)
|
||||||
|
- Internal hyphens inside SYS/MESSAGE are preserved (e.g., CR-DEP-PROCESSED).
|
||||||
|
- Trim spaces; empty optional fields -> ''.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
line = line.rstrip("\n")
|
||||||
|
if len(line) < 20:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Normalize CP1252 non-breaking spaces and tabs to regular spaces (defensive)
|
||||||
|
line = line.replace('\xa0', ' ').replace('\t', ' ')
|
||||||
|
|
||||||
|
# Helper: pop text up to the next '-' (treat this '-' as the column separator).
|
||||||
|
def pop_until_hyphen(s: str):
|
||||||
|
idx = s.find('-')
|
||||||
|
if idx == -1:
|
||||||
|
field = s.strip()
|
||||||
|
rest_ = ''
|
||||||
|
else:
|
||||||
|
field = s[:idx].strip()
|
||||||
|
rest_ = s[idx + 1:] # drop the separator hyphen itself
|
||||||
|
return field, rest_
|
||||||
|
|
||||||
|
# Helper: split once from the RIGHT by a tolerant separator regex;
|
||||||
|
# fallback only splits at a '-' that has whitespace on at least one side.
|
||||||
|
def rsplit_once_tolerant(s: str, pattern: re.Pattern):
|
||||||
|
last = None
|
||||||
|
for m in pattern.finditer(s):
|
||||||
|
last = m
|
||||||
|
if last:
|
||||||
|
return s[:last.start()], s[last.end():]
|
||||||
|
# Fallback: split at the last '-' that has whitespace on either side
|
||||||
|
for i in range(len(s) - 1, -1, -1):
|
||||||
|
if s[i] == '-':
|
||||||
|
before_ws = (i > 0 and s[i - 1].isspace())
|
||||||
|
after_ws = (i + 1 < len(s) and s[i + 1].isspace())
|
||||||
|
if before_ws or after_ws:
|
||||||
|
return s[:i], s[i + 1:]
|
||||||
|
return s, '' # no separator found
|
||||||
|
|
||||||
|
try:
|
||||||
|
s = line.strip()
|
||||||
|
|
||||||
|
# Parse 1..7: SNO, UID, CUST ACCT/RT BGL, UID SUSP, CUSTOMER NAME, JRNL NO, DATE
|
||||||
|
fields = []
|
||||||
|
for _ in range(7):
|
||||||
|
f, s = pop_until_hyphen(s)
|
||||||
|
fields.append(f)
|
||||||
|
|
||||||
|
# 8: AMOUNT (normalize numeric if possible)
|
||||||
|
amount_raw, s = pop_until_hyphen(s)
|
||||||
|
amount_raw = amount_raw.strip()
|
||||||
|
amount = ''
|
||||||
|
if amount_raw:
|
||||||
|
m = re.search(r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b|\b\d+(?:\.\d+)?\b', amount_raw)
|
||||||
|
amount = m.group(0).replace(',', '') if m else amount_raw
|
||||||
|
|
||||||
|
# Tail after AMOUNT
|
||||||
|
t = s.lstrip()
|
||||||
|
|
||||||
|
# Tolerant separators (space/NBSP before '-' OR spaces after '-')
|
||||||
|
SEP = re.compile(r'(?:[\s\u00A0]-[\s\u00A0]*|-{1}[\s\u00A0]+)')
|
||||||
|
|
||||||
|
# From the RIGHT:
|
||||||
|
# 13) REMARKS
|
||||||
|
left, remarks = rsplit_once_tolerant(t, SEP)
|
||||||
|
remarks = remarks.strip()
|
||||||
|
if remarks.startswith('-'): # defensive trim
|
||||||
|
remarks = remarks[1:].strip()
|
||||||
|
|
||||||
|
# 12) SUSPENSE MSG
|
||||||
|
left, suspense_msg = rsplit_once_tolerant(left, SEP)
|
||||||
|
suspense_msg = suspense_msg.strip()
|
||||||
|
|
||||||
|
# 11) CR SUSPENSE DETAILS
|
||||||
|
sys_part, cr_suspense = rsplit_once_tolerant(left, SEP)
|
||||||
|
cr_suspense = cr_suspense.strip()
|
||||||
|
|
||||||
|
# 9/10) SYS & MESSAGE (same value) — strip one leading separator hyphen if present
|
||||||
|
sys_message = sys_part.strip()
|
||||||
|
if sys_message.startswith('-'):
|
||||||
|
sys_message = sys_message[1:].lstrip()
|
||||||
|
|
||||||
|
# Unpack required fields
|
||||||
|
sno = fields[0].strip()
|
||||||
|
uid = fields[1].strip()
|
||||||
|
cust_acct = fields[2].strip()
|
||||||
|
uid_susp = fields[3].strip()
|
||||||
|
customer_name = fields[4].strip()
|
||||||
|
jrnl_no = fields[5].strip()
|
||||||
|
date_field = fields[6].strip()
|
||||||
|
|
||||||
|
# Validate SNO
|
||||||
|
if not sno or not sno.isdigit():
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'sno': sno,
|
||||||
|
'uid': uid,
|
||||||
|
'cust_acct': cust_acct,
|
||||||
|
'uid_susp': uid_susp,
|
||||||
|
'customer_name': customer_name,
|
||||||
|
'jrnl_no': jrnl_no,
|
||||||
|
'date': date_field,
|
||||||
|
'amount': amount,
|
||||||
|
'sys': sys_message,
|
||||||
|
'message': sys_message, # duplicate per requirement
|
||||||
|
'cr_suspense': cr_suspense or '',
|
||||||
|
'suspense_msg': suspense_msg or '',
|
||||||
|
'remarks': remarks or '',
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error parsing transaction line: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_summary_line(self, line):
|
||||||
|
"""Extract summary totals."""
|
||||||
|
# Format: TOT PROCESSED 0 0.00 178 41132.29 178 41132.29
|
||||||
|
if 'TOT PROCESSED' in line:
|
||||||
|
parts = line.split()
|
||||||
|
try:
|
||||||
|
# Normalize commas before numeric check
|
||||||
|
cleaned = [p.replace(',', '') for p in parts]
|
||||||
|
# Find numeric values
|
||||||
|
numbers = [p for p in cleaned if self._is_numeric(p)]
|
||||||
|
if len(numbers) >= 4:
|
||||||
|
self.summary_data['tot_processed'] = {
|
||||||
|
'debit_count': numbers[0],
|
||||||
|
'debit_amount': numbers[1],
|
||||||
|
'credit_count': numbers[2],
|
||||||
|
'credit_amount': numbers[3],
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error parsing summary: {e}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_numeric(value):
|
||||||
|
"""Check if string is numeric."""
|
||||||
|
try:
|
||||||
|
float(value)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def print_transactions(transactions):
|
||||||
|
"""Print transactions to console."""
|
||||||
|
print("\n" + "=" * 180)
|
||||||
|
print(
|
||||||
|
f"{'SNO':<6} "
|
||||||
|
f"{'UID':<18} "
|
||||||
|
f"{'CUST ACCT':<18} "
|
||||||
|
f"{'UID SUSP':<18} "
|
||||||
|
f"{'CUSTOMER NAME':<40} "
|
||||||
|
f"{'JRNL NO':<10} "
|
||||||
|
f"{'DATE':<12} "
|
||||||
|
f"{'AMOUNT':<12} "
|
||||||
|
f"{'SYS':<45} "
|
||||||
|
f"{'REMARKS':<50}"
|
||||||
|
)
|
||||||
|
print("=" * 180)
|
||||||
|
|
||||||
|
for txn in transactions:
|
||||||
|
print(
|
||||||
|
f"{txn['sno']:<6} "
|
||||||
|
f"{txn['uid']:<18} "
|
||||||
|
f"{txn['cust_acct']:<18} "
|
||||||
|
f"{txn['uid_susp']:<18} "
|
||||||
|
f"{txn['customer_name']:<40} "
|
||||||
|
f"{txn['jrnl_no']:<10} "
|
||||||
|
f"{txn['date']:<12} "
|
||||||
|
f"{txn['amount']:<12} "
|
||||||
|
f"{txn['sys']:<45} "
|
||||||
|
f"{txn['remarks']:<50}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("=" * 180)
|
||||||
|
print(f"Total transactions: {len(transactions)}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def print_metadata(metadata):
|
||||||
|
"""Print report metadata."""
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
print("REPORT METADATA")
|
||||||
|
print("=" * 80)
|
||||||
|
for key, value in metadata.items():
|
||||||
|
print(f"{key.upper():<20}: {value}")
|
||||||
|
print("=" * 80 + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def print_summary(summary):
|
||||||
|
"""Print summary data."""
|
||||||
|
if summary:
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
print("SUMMARY DATA")
|
||||||
|
print("=" * 80)
|
||||||
|
for key, value in summary.items():
|
||||||
|
print(f"{key.upper()}: {value}")
|
||||||
|
print("=" * 80 + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from logging_config import setup_logging
|
||||||
|
|
||||||
|
# Setup logging
|
||||||
|
setup_logging()
|
||||||
|
|
||||||
|
# Parse the UIH file
|
||||||
|
parser = UIHParser('/home/ipkssupport/test_parser/UIH_99944_11022026102913_001_a.txt')
|
||||||
|
transactions, metadata, summary = parser.parse()
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print_metadata(metadata)
|
||||||
|
print_transactions(transactions)
|
||||||
|
print_summary(summary)
|
||||||
|
|
||||||
|
logger.info(f"Parsing complete. Extracted {len(transactions)} transactions")
|
||||||
Reference in New Issue
Block a user