#!/usr/bin/env python3
"""
ACH File Parser - Extracts data from fixed-width ACH transaction report files.
"""

import re
from logging_config import get_logger

logger = get_logger(__name__)


class UIHParser:
    def __init__(self, file_path):
        self.file_path = file_path
        self.transactions = []
        self.report_metadata = {}
        self.summary_data = {}

    def parse(self):
        """Main parsing method."""
        try:
            with open(self.file_path, 'r', encoding="cp1252") as f:
                content = f.read()

            # Split by form feed to separate pages
            pages = content.split('\f')
            logger.info(f"Found {len(pages)} pages in the file")

            for page_idx, page in enumerate(pages):
                if page.strip():
                    self._parse_page(page, page_idx)

            logger.info(f"Total transactions parsed: {len(self.transactions)}")
            return self.transactions, self.report_metadata, self.summary_data

        except Exception as e:
            logger.error(f"Error parsing file: {e}", exc_info=True)
            raise

    def _parse_page(self, page, page_idx):
        """Parse individual page content."""
        lines = page.split('\n')

        # Extract report metadata from header
        for i, line in enumerate(lines[:10]):
            if 'REPORT ID:' in line:
                self._extract_header_metadata(line)
            elif 'BRANCH:' in line:
                self._extract_branch_info(line)
            elif 'CURRENCY:' in line:
                self._extract_currency_info(line)

        # Find transaction data section (variant header row)
        transaction_start = None
        for i, line in enumerate(lines):
            # Your variant uses the UID(AADHAAR) header; adjust if your header changes
            if '     UID(AADHAAR) NO' in line:
                transaction_start = i + 2  # Skip header and separator
                break

        if transaction_start is not None:
            # Parse transactions, skip separators/summary/blank lines
            for i in range(transaction_start, len(lines)):
                line = lines[i]

                if 'DEBITS' in line or '----' in line or line.strip() == '':
                    continue

                if line.strip() and not line.startswith('==='):
                    transaction = self._parse_transaction_line(line)
                    if transaction:
                        self.transactions.append(transaction)

        # Parse summary data
        for i, line in enumerate(lines):
            if 'TOT PROCESSED' in line or 'TOT TRANSACTIONS' in line:
                self._extract_summary_line(line)

    def _extract_header_metadata(self, line):
        """Extract metadata from header line."""
        # REPORT ID: TF0504-01                                ... RUN DATE: 19/01/2026  10:32
        report_id_match = re.search(r'REPORT ID:\s+(\S+)', line)
        bank_name_match = re.search(r'([A-Z\s.]+)\s+RUN DATE:', line)
        date_match = re.search(r'RUN DATE:\s+(\d{2}/\d{2}/\d{4}\s+\d{2}:\d{2})', line)

        if report_id_match:
            self.report_metadata['report_id'] = report_id_match.group(1)
        if bank_name_match:
            self.report_metadata['bank_name'] = bank_name_match.group(1).strip()
        if date_match:
            self.report_metadata['run_date'] = date_match.group(1)

    def _extract_branch_info(self, line):
        """Extract branch info."""
        # BRANCH:    99944 ACH CR                   T R I C K L E  F E E D  T R A N S A C T I O N S
        branch_match = re.search(r'BRANCH:\s+(\S+)', line)
        if branch_match:
            self.report_metadata['branch'] = branch_match.group(1)

    def _extract_currency_info(self, line):
        """Extract currency and maker/checker info."""
        # CURRENCY:  INR     MAKER-ID: 0009991   CHECKER-ID: 0000000
        currency_match = re.search(r'CURRENCY:\s+(\S+)', line)
        maker_match = re.search(r'MAKER-ID:\s+(\S+)', line)
        checker_match = re.search(r'CHECKER-ID:\s+(\S+)', line)

        if currency_match:
            self.report_metadata['currency'] = currency_match.group(1)
        if maker_match:
            self.report_metadata['maker_id'] = maker_match.group(1)
        if checker_match:
            self.report_metadata['checker_id'] = checker_match.group(1)

    def _parse_transaction_line(self, line):
        """
        Parse a single transaction line.

        Strategy:
        - First 8 columns (SNO..AMOUNT) are split by the first '-' each time.
        - The tail (after AMOUNT) is split from the RIGHT using tolerant separators:
            SEP = r'(?:[\\s\\u00A0]-[\\s\\u00A0]*|-{1}[\\s\\u00A0]+)'
            (whitespace/NBSP before the hyphen OR spaces after the hyphen)
          Order from the right:
            ... -> REMARKS (last sep) -> SUSPENSE MSG (prev) -> CR SUSPENSE (prev) -> SYS/MESSAGE (rest)
        - Internal hyphens inside SYS/MESSAGE are preserved (e.g., CR-DEP-PROCESSED).
        - Trim spaces; empty optional fields -> ''.
        """
        import re

        line = line.rstrip("\n")
        if len(line) < 20:
            return None

        # Normalize CP1252 non-breaking spaces and tabs to regular spaces (defensive)
        line = line.replace('\xa0', ' ').replace('\t', ' ')

        # Helper: pop text up to the next '-' (treat this '-' as the column separator).
        def pop_until_hyphen(s: str):
            idx = s.find('-')
            if idx == -1:
                field = s.strip()
                rest_ = ''
            else:
                field = s[:idx].strip()
                rest_ = s[idx + 1:]  # drop the separator hyphen itself
            return field, rest_

        # Helper: split once from the RIGHT by a tolerant separator regex;
        # fallback only splits at a '-' that has whitespace on at least one side.
        def rsplit_once_tolerant(s: str, pattern: re.Pattern):
            last = None
            for m in pattern.finditer(s):
                last = m
            if last:
                return s[:last.start()], s[last.end():]
            # Fallback: split at the last '-' that has whitespace on either side
            for i in range(len(s) - 1, -1, -1):
                if s[i] == '-':
                    before_ws = (i > 0 and s[i - 1].isspace())
                    after_ws = (i + 1 < len(s) and s[i + 1].isspace())
                    if before_ws or after_ws:
                        return s[:i], s[i + 1:]
            return s, ''  # no separator found

        try:
            s = line.strip()

            # Parse 1..7: SNO, UID, CUST ACCT/RT BGL, UID SUSP, CUSTOMER NAME, JRNL NO, DATE
            fields = []
            for _ in range(7):
                f, s = pop_until_hyphen(s)
                fields.append(f)

            # 8: AMOUNT (normalize numeric if possible)
            amount_raw, s = pop_until_hyphen(s)
            amount_raw = amount_raw.strip()
            amount = ''
            if amount_raw:
                m = re.search(r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b|\b\d+(?:\.\d+)?\b', amount_raw)
                amount = m.group(0).replace(',', '') if m else amount_raw

            # Tail after AMOUNT
            t = s.lstrip()

            # Tolerant separators (space/NBSP before '-' OR spaces after '-')
            SEP = re.compile(r'(?:[\s\u00A0]-[\s\u00A0]*|-{1}[\s\u00A0]+)')

            # From the RIGHT:
            # 13) REMARKS
            left, remarks = rsplit_once_tolerant(t, SEP)
            remarks = remarks.strip()
            if remarks.startswith('-'):  # defensive trim
                remarks = remarks[1:].strip()

            # 12) SUSPENSE MSG
            left, suspense_msg = rsplit_once_tolerant(left, SEP)
            suspense_msg = suspense_msg.strip()

            # 11) CR SUSPENSE DETAILS
            sys_part, cr_suspense = rsplit_once_tolerant(left, SEP)
            cr_suspense = cr_suspense.strip()

            # 9/10) SYS & MESSAGE (same value) — strip one leading separator hyphen if present
            sys_message = sys_part.strip()
            if sys_message.startswith('-'):
                sys_message = sys_message[1:].lstrip()

            # Unpack required fields
            sno = fields[0].strip()
            uid = fields[1].strip()
            cust_acct = fields[2].strip()
            uid_susp = fields[3].strip()
            customer_name = fields[4].strip()
            jrnl_no = fields[5].strip()
            date_field = fields[6].strip()

            # Validate SNO
            if not sno or not sno.isdigit():
                return None

            return {
                'sno': sno,
                'uid': uid,
                'cust_acct': cust_acct,
                'uid_susp': uid_susp,
                'customer_name': customer_name,
                'jrnl_no': jrnl_no,
                'date': date_field,
                'amount': amount,
                'sys': sys_message,
                'message': sys_message,            # duplicate per requirement
                'cr_suspense': cr_suspense or '',
                'suspense_msg': suspense_msg or '',
                'remarks': remarks or '',
            }

        except Exception as e:
            logger.debug(f"Error parsing transaction line: {e}")
            return None

    def _extract_summary_line(self, line):
        """Extract summary totals."""
        # Format: TOT PROCESSED  0  0.00  178  41132.29  178  41132.29
        if 'TOT PROCESSED' in line:
            parts = line.split()
            try:
                # Normalize commas before numeric check
                cleaned = [p.replace(',', '') for p in parts]
                # Find numeric values
                numbers = [p for p in cleaned if self._is_numeric(p)]
                if len(numbers) >= 4:
                    self.summary_data['tot_processed'] = {
                        'debit_count': numbers[0],
                        'debit_amount': numbers[1],
                        'credit_count': numbers[2],
                        'credit_amount': numbers[3],
                    }
            except Exception as e:
                logger.debug(f"Error parsing summary: {e}")

    @staticmethod
    def _is_numeric(value):
        """Check if string is numeric."""
        try:
            float(value)
            return True
        except ValueError:
            return False


def print_transactions(transactions):
    """Print transactions to console."""
    print("\n" + "=" * 180)
    print(
        f"{'SNO':<6} "
        f"{'UID':<18} "
        f"{'CUST ACCT':<18} "
        f"{'UID SUSP':<18} "
        f"{'CUSTOMER NAME':<40} "
        f"{'JRNL NO':<10} "
        f"{'DATE':<12} "
        f"{'AMOUNT':<12} "
        f"{'SYS':<45} "
        f"{'REMARKS':<50}"
    )
    print("=" * 180)

    for txn in transactions:
        print(
            f"{txn['sno']:<6} "
            f"{txn['uid']:<18} "
            f"{txn['cust_acct']:<18} "
            f"{txn['uid_susp']:<18} "
            f"{txn['customer_name']:<40} "
            f"{txn['jrnl_no']:<10} "
            f"{txn['date']:<12} "
            f"{txn['amount']:<12} "
            f"{txn['sys']:<45} "
            f"{txn['remarks']:<50}"
        )

    print("=" * 180)
    print(f"Total transactions: {len(transactions)}\n")


def print_metadata(metadata):
    """Print report metadata."""
    print("\n" + "=" * 80)
    print("REPORT METADATA")
    print("=" * 80)
    for key, value in metadata.items():
        print(f"{key.upper():<20}: {value}")
    print("=" * 80 + "\n")


def print_summary(summary):
    """Print summary data."""
    if summary:
        print("\n" + "=" * 80)
        print("SUMMARY DATA")
        print("=" * 80)
        for key, value in summary.items():
            print(f"{key.upper()}: {value}")
        print("=" * 80 + "\n")


if __name__ == '__main__':
    from logging_config import setup_logging

    # Setup logging
    setup_logging()

    # Parse the UIH file
    parser = UIHParser('/home/ipkssupport/test_parser/UIH_99944_11022026102913_001_a.txt')
    transactions, metadata, summary = parser.parse()

    # Print results
    print_metadata(metadata)
    print_transactions(transactions)
    print_summary(summary)

    logger.info(f"Parsing complete. Extracted {len(transactions)} transactions")