updated
This commit is contained in:
+270
@@ -0,0 +1,270 @@
|
||||
import os
|
||||
import logging
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
from decimal import Decimal, InvalidOperation
|
||||
|
||||
# -------------------------
|
||||
# Logger
|
||||
# -------------------------
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------
|
||||
# Helpers
|
||||
# -------------------------
|
||||
def normalize_text(val: str) -> str:
|
||||
return str(val or "").strip()
|
||||
|
||||
|
||||
def to_decimal(val: str) -> Optional[Decimal]:
|
||||
try:
|
||||
return Decimal(val.strip())
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# -------------------------
|
||||
# Parser Class
|
||||
# -------------------------
|
||||
class UPIParser:
|
||||
"""
|
||||
Parser for UPI Bank Switch Report (NPCI Cycle)
|
||||
"""
|
||||
|
||||
EXPECTED_HEADER = [
|
||||
"rrn",
|
||||
"type",
|
||||
"txn_datetime",
|
||||
"credit_account",
|
||||
"status",
|
||||
"amount",
|
||||
"note",
|
||||
]
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
self.file_path = file_path
|
||||
self.transactions: List[Dict] = []
|
||||
self.file_metadata: Dict = {}
|
||||
self.summary_data: Dict = {}
|
||||
|
||||
# -------------------------
|
||||
# MAIN PARSE
|
||||
# -------------------------
|
||||
def parse(self) -> Tuple[List[Dict], Dict, Dict]:
|
||||
try:
|
||||
rows = self._read_rows()
|
||||
|
||||
self.file_metadata = {
|
||||
"source_file": os.path.basename(self.file_path),
|
||||
"row_count": len(rows),
|
||||
"columns": self.EXPECTED_HEADER,
|
||||
}
|
||||
|
||||
for idx, row in enumerate(rows, start=1):
|
||||
txn = self._row_to_transaction(row, idx)
|
||||
if txn:
|
||||
self.transactions.append(txn)
|
||||
|
||||
self.summary_data = self._build_summary(self.transactions)
|
||||
|
||||
logger.info(f"Parsed {len(self.transactions)} rows from {self.file_path}")
|
||||
return self.transactions, self.file_metadata, self.summary_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing file: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
# -------------------------
|
||||
# READ FILE
|
||||
# -------------------------
|
||||
def _read_rows(self) -> List[List[str]]:
|
||||
rows = []
|
||||
|
||||
with open(self.file_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
|
||||
# Skip unwanted lines
|
||||
if (
|
||||
not line
|
||||
or "UPI BANK SWITCH REPORT" in line
|
||||
or line.startswith("===")
|
||||
or line.startswith("---")
|
||||
or line.startswith("RRN")
|
||||
):
|
||||
continue
|
||||
|
||||
parts = line.split('|')
|
||||
|
||||
if len(parts) < 7:
|
||||
logger.debug(f"Skipping malformed row: {line}")
|
||||
continue
|
||||
|
||||
rows.append(parts[:7])
|
||||
|
||||
return rows
|
||||
|
||||
# -------------------------
|
||||
# ROW -> TRANSACTION
|
||||
# -------------------------
|
||||
def _row_to_transaction(self, row: List[str], row_num: int) -> Optional[Dict]:
|
||||
|
||||
txn = {
|
||||
"rrn": normalize_text(row[0]),
|
||||
"type": normalize_text(row[1]),
|
||||
"txn_datetime": normalize_text(row[2]),
|
||||
"credit_account": normalize_text(row[3]),
|
||||
"status": normalize_text(row[4]),
|
||||
"amount": "",
|
||||
"note": normalize_text(row[6]),
|
||||
}
|
||||
|
||||
# Amount conversion
|
||||
amt = to_decimal(row[5])
|
||||
txn["amount"] = amt if amt is not None else ""
|
||||
|
||||
# Split datetime
|
||||
if txn["txn_datetime"]:
|
||||
try:
|
||||
date, time = txn["txn_datetime"].split(" ")
|
||||
txn["txn_date"] = date
|
||||
txn["txn_time"] = time
|
||||
except Exception:
|
||||
txn["txn_date"] = txn["txn_datetime"]
|
||||
txn["txn_time"] = ""
|
||||
|
||||
if not txn["rrn"]:
|
||||
logger.debug(f"Skipping row {row_num}: Missing RRN")
|
||||
return None
|
||||
|
||||
return txn
|
||||
|
||||
# -------------------------
|
||||
# SUMMARY
|
||||
# -------------------------
|
||||
def _build_summary(self, txns: List[Dict]) -> Dict:
|
||||
total_count = len(txns)
|
||||
total_amount = Decimal("0")
|
||||
|
||||
by_status: Dict[str, Dict] = {}
|
||||
by_type: Dict[str, Dict] = {}
|
||||
|
||||
for t in txns:
|
||||
amt = t.get("amount") or Decimal("0")
|
||||
|
||||
if isinstance(amt, str):
|
||||
try:
|
||||
amt = Decimal(amt)
|
||||
except:
|
||||
amt = Decimal("0")
|
||||
|
||||
total_amount += amt
|
||||
|
||||
status = t.get("status", "").upper()
|
||||
txn_type = t.get("type", "").upper()
|
||||
|
||||
# Status grouping
|
||||
if status not in by_status:
|
||||
by_status[status] = {"count": 0, "amount": Decimal("0")}
|
||||
by_status[status]["count"] += 1
|
||||
by_status[status]["amount"] += amt
|
||||
|
||||
# Type grouping
|
||||
if txn_type not in by_type:
|
||||
by_type[txn_type] = {"count": 0, "amount": Decimal("0")}
|
||||
by_type[txn_type]["count"] += 1
|
||||
by_type[txn_type]["amount"] += amt
|
||||
|
||||
# Convert Decimal → string
|
||||
by_status_final = {
|
||||
k: {"count": v["count"], "amount": f"{v['amount']:.2f}"}
|
||||
for k, v in by_status.items()
|
||||
}
|
||||
|
||||
by_type_final = {
|
||||
k: {"count": v["count"], "amount": f"{v['amount']:.2f}"}
|
||||
for k, v in by_type.items()
|
||||
}
|
||||
|
||||
return {
|
||||
"total_count": total_count,
|
||||
"total_amount": f"{total_amount:.2f}",
|
||||
"by_status": by_status_final,
|
||||
"by_type": by_type_final,
|
||||
}
|
||||
|
||||
|
||||
# -------------------------
|
||||
# PRINT FUNCTIONS
|
||||
# -------------------------
|
||||
def print_transactions(transactions: List[Dict], limit: Optional[int] = 50):
|
||||
|
||||
headers = [
|
||||
("RRN", 15),
|
||||
("TYPE", 8),
|
||||
("DATE", 10),
|
||||
("TIME", 10),
|
||||
("ACCOUNT", 20),
|
||||
("STATUS", 10),
|
||||
("AMOUNT", 12),
|
||||
("NOTE", 25),
|
||||
]
|
||||
|
||||
header_line = " ".join([f"{h:<{w}}" for h, w in headers])
|
||||
print("\n" + "=" * len(header_line))
|
||||
print(header_line)
|
||||
print("=" * len(header_line))
|
||||
|
||||
for i, txn in enumerate(transactions):
|
||||
row = [
|
||||
txn.get("rrn", ""),
|
||||
txn.get("type", ""),
|
||||
txn.get("txn_date", ""),
|
||||
txn.get("txn_time", ""),
|
||||
txn.get("credit_account", ""),
|
||||
txn.get("status", ""),
|
||||
f"{txn.get('amount')}" if txn.get("amount") else "",
|
||||
txn.get("note", ""),
|
||||
]
|
||||
|
||||
print(" ".join(f"{str(val)[:w]:<{w}}" for val, (h, w) in zip(row, headers)))
|
||||
|
||||
if limit and i + 1 >= limit:
|
||||
print(f"... ({len(transactions) - limit} more rows)")
|
||||
break
|
||||
|
||||
print("=" * len(header_line))
|
||||
print(f"Total: {len(transactions)} records\n")
|
||||
|
||||
|
||||
def print_metadata(metadata: Dict):
|
||||
print("\n===== FILE METADATA =====")
|
||||
for k, v in metadata.items():
|
||||
print(f"{k.upper():20}: {v}")
|
||||
print("=========================\n")
|
||||
|
||||
|
||||
def print_summary(summary: Dict):
|
||||
print("\n===== SUMMARY =====")
|
||||
for k, v in summary.items():
|
||||
print(f"{k.upper()}: {v}")
|
||||
print("===================\n")
|
||||
|
||||
|
||||
# -------------------------
|
||||
# MAIN RUNNER
|
||||
# -------------------------
|
||||
if __name__ == "__main__":
|
||||
|
||||
file_path = r"C:\Users\2780475\Desktop\Test\TUMLUK_31052026_8C_UPI.txt.gz"
|
||||
|
||||
parser = UPISwitchParser(file_path)
|
||||
|
||||
transactions, metadata, summary = parser.parse()
|
||||
|
||||
print_metadata(metadata)
|
||||
print_transactions(transactions, limit=50)
|
||||
print_summary(summary)
|
||||
|
||||
logger.info(f"Parsing complete. Extracted {len(transactions)} transactions")
|
||||
Reference in New Issue
Block a user