updated

2026-06-13 14:19:02 +05:30
parent 5e16a9d22a
commit 518d51cf6a
2 changed files with 339 additions and 0 deletions
@@ -0,0 +1,270 @@
+import os
+import logging
+from typing import List, Dict, Tuple, Optional
+from decimal import Decimal, InvalidOperation
+
+# -------------------------
+# Logger
+# -------------------------
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+# -------------------------
+# Helpers
+# -------------------------
+def normalize_text(val: str) -> str:
+    return str(val or "").strip()
+
+
+def to_decimal(val: str) -> Optional[Decimal]:
+    try:
+        return Decimal(val.strip())
+    except Exception:
+        return None
+
+
+# -------------------------
+# Parser Class
+# -------------------------
+class UPIParser:
+    """
+    Parser for UPI Bank Switch Report (NPCI Cycle)
+    """
+
+    EXPECTED_HEADER = [
+        "rrn",
+        "type",
+        "txn_datetime",
+        "credit_account",
+        "status",
+        "amount",
+        "note",
+    ]
+
+    def __init__(self, file_path: str):
+        self.file_path = file_path
+        self.transactions: List[Dict] = []
+        self.file_metadata: Dict = {}
+        self.summary_data: Dict = {}
+
+    # -------------------------
+    # MAIN PARSE
+    # -------------------------
+    def parse(self) -> Tuple[List[Dict], Dict, Dict]:
+        try:
+            rows = self._read_rows()
+
+            self.file_metadata = {
+                "source_file": os.path.basename(self.file_path),
+                "row_count": len(rows),
+                "columns": self.EXPECTED_HEADER,
+            }
+
+            for idx, row in enumerate(rows, start=1):
+                txn = self._row_to_transaction(row, idx)
+                if txn:
+                    self.transactions.append(txn)
+
+            self.summary_data = self._build_summary(self.transactions)
+
+            logger.info(f"Parsed {len(self.transactions)} rows from {self.file_path}")
+            return self.transactions, self.file_metadata, self.summary_data
+
+        except Exception as e:
+            logger.error(f"Error parsing file: {e}", exc_info=True)
+            raise
+
+    # -------------------------
+    # READ FILE
+    # -------------------------
+    def _read_rows(self) -> List[List[str]]:
+        rows = []
+
+        with open(self.file_path, 'r', encoding='utf-8', errors='replace') as f:
+            for line in f:
+                line = line.strip()
+
+                # Skip unwanted lines
+                if (
+                    not line
+                    or "UPI BANK SWITCH REPORT" in line
+                    or line.startswith("===")
+                    or line.startswith("---")
+                    or line.startswith("RRN")
+                ):
+                    continue
+
+                parts = line.split('|')
+
+                if len(parts) < 7:
+                    logger.debug(f"Skipping malformed row: {line}")
+                    continue
+
+                rows.append(parts[:7])
+
+        return rows
+
+    # -------------------------
+    # ROW -> TRANSACTION
+    # -------------------------
+    def _row_to_transaction(self, row: List[str], row_num: int) -> Optional[Dict]:
+
+        txn = {
+            "rrn": normalize_text(row[0]),
+            "type": normalize_text(row[1]),
+            "txn_datetime": normalize_text(row[2]),
+            "credit_account": normalize_text(row[3]),
+            "status": normalize_text(row[4]),
+            "amount": "",
+            "note": normalize_text(row[6]),
+        }
+
+        # Amount conversion
+        amt = to_decimal(row[5])
+        txn["amount"] = amt if amt is not None else ""
+
+        # Split datetime
+        if txn["txn_datetime"]:
+            try:
+                date, time = txn["txn_datetime"].split(" ")
+                txn["txn_date"] = date
+                txn["txn_time"] = time
+            except Exception:
+                txn["txn_date"] = txn["txn_datetime"]
+                txn["txn_time"] = ""
+
+        if not txn["rrn"]:
+            logger.debug(f"Skipping row {row_num}: Missing RRN")
+            return None
+
+        return txn
+
+    # -------------------------
+    # SUMMARY
+    # -------------------------
+    def _build_summary(self, txns: List[Dict]) -> Dict:
+        total_count = len(txns)
+        total_amount = Decimal("0")
+
+        by_status: Dict[str, Dict] = {}
+        by_type: Dict[str, Dict] = {}
+
+        for t in txns:
+            amt = t.get("amount") or Decimal("0")
+
+            if isinstance(amt, str):
+                try:
+                    amt = Decimal(amt)
+                except:
+                    amt = Decimal("0")
+
+            total_amount += amt
+
+            status = t.get("status", "").upper()
+            txn_type = t.get("type", "").upper()
+
+            # Status grouping
+            if status not in by_status:
+                by_status[status] = {"count": 0, "amount": Decimal("0")}
+            by_status[status]["count"] += 1
+            by_status[status]["amount"] += amt
+
+            # Type grouping
+            if txn_type not in by_type:
+                by_type[txn_type] = {"count": 0, "amount": Decimal("0")}
+            by_type[txn_type]["count"] += 1
+            by_type[txn_type]["amount"] += amt
+
+        # Convert Decimal → string
+        by_status_final = {
+            k: {"count": v["count"], "amount": f"{v['amount']:.2f}"}
+            for k, v in by_status.items()
+        }
+
+        by_type_final = {
+            k: {"count": v["count"], "amount": f"{v['amount']:.2f}"}
+            for k, v in by_type.items()
+        }
+
+        return {
+            "total_count": total_count,
+            "total_amount": f"{total_amount:.2f}",
+            "by_status": by_status_final,
+            "by_type": by_type_final,
+        }
+
+
+# -------------------------
+# PRINT FUNCTIONS
+# -------------------------
+def print_transactions(transactions: List[Dict], limit: Optional[int] = 50):
+
+    headers = [
+        ("RRN", 15),
+        ("TYPE", 8),
+        ("DATE", 10),
+        ("TIME", 10),
+        ("ACCOUNT", 20),
+        ("STATUS", 10),
+        ("AMOUNT", 12),
+        ("NOTE", 25),
+    ]
+
+    header_line = " ".join([f"{h:<{w}}" for h, w in headers])
+    print("\n" + "=" * len(header_line))
+    print(header_line)
+    print("=" * len(header_line))
+
+    for i, txn in enumerate(transactions):
+        row = [
+            txn.get("rrn", ""),
+            txn.get("type", ""),
+            txn.get("txn_date", ""),
+            txn.get("txn_time", ""),
+            txn.get("credit_account", ""),
+            txn.get("status", ""),
+            f"{txn.get('amount')}" if txn.get("amount") else "",
+            txn.get("note", ""),
+        ]
+
+        print(" ".join(f"{str(val)[:w]:<{w}}" for val, (h, w) in zip(row, headers)))
+
+        if limit and i + 1 >= limit:
+            print(f"... ({len(transactions) - limit} more rows)")
+            break
+
+    print("=" * len(header_line))
+    print(f"Total: {len(transactions)} records\n")
+
+
+def print_metadata(metadata: Dict):
+    print("\n===== FILE METADATA =====")
+    for k, v in metadata.items():
+        print(f"{k.upper():20}: {v}")
+    print("=========================\n")
+
+
+def print_summary(summary: Dict):
+    print("\n===== SUMMARY =====")
+    for k, v in summary.items():
+        print(f"{k.upper()}: {v}")
+    print("===================\n")
+
+
+# -------------------------
+# MAIN RUNNER
+# -------------------------
+if __name__ == "__main__":
+
+    file_path = r"C:\Users\2780475\Desktop\Test\TUMLUK_31052026_8C_UPI.txt.gz"
+
+    parser = UPISwitchParser(file_path)
+
+    transactions, metadata, summary = parser.parse()
+
+    print_metadata(metadata)
+    print_transactions(transactions, limit=50)
+    print_summary(summary)
+
+    logger.info(f"Parsing complete. Extracted {len(transactions)} transactions")