ach_ui_dbtl_file_based/processors/file_processor.py

#!/usr/bin/env python3
"""
Main file processor for end-to-end ACH file processing.
Orchestrates download, parsing, mapping, and database insertion.
"""

import os
import tempfile
from pathlib import Path
from logging_config import get_logger
from ach_parser import ACHParser
from db.repository import Repository
from db.models import ProcessedFile
from sftp.sftp_client import SFTPClient
from .data_mapper import DataMapper

logger = get_logger(__name__)


class FileProcessor:
    """Processes ACH files end-to-end."""

    def __init__(self, repository: Repository = None, sftp_client: SFTPClient = None):
        """
        Initialize file processor.

        Args:
            repository: Repository instance (optional)
            sftp_client: SFTPClient instance (optional)
        """
        self.repository = repository or Repository()
        self.sftp_client = sftp_client or SFTPClient()
        self.temp_dir = tempfile.gettempdir()

    def process_file(
        self,
        filename: str,
        bankcode: str,
        remote_path: str
    ) -> bool:
        """
        Process a single ACH file end-to-end.

        Workflow:
        1. Download file from SFTP
        2. Parse using ACHParser
        3. Map to database format
        4. Insert to database
        5. Mark as processed
        6. Cleanup local file

        Args:
            filename: Name of file to process
            bankcode: Bank code for this file
            remote_path: Full remote path on SFTP

        Returns:
            True if successful, False otherwise
        """
        local_path = os.path.join(self.temp_dir, filename)

        try:
            logger.info(f"Starting processing: {filename} (bank: {bankcode})")

            # Step 1: Check if already processed
            if self.repository.is_file_processed(filename):
                logger.info(f"File already processed: {filename}")
                return True

            # Step 2: Download file
            if not self.sftp_client.download_file(remote_path, local_path):
                raise Exception(f"Failed to download file: {remote_path}")

            # Step 3: Parse file
            parser = ACHParser(local_path)
            transactions, metadata, summary = parser.parse()
            logger.info(f"Parsed {len(transactions)} transactions from {filename}")

            if not transactions:
                logger.warning(f"No transactions found in {filename}")
                # Still mark as processed but with 0 transactions
                processed_file = ProcessedFile(
                    filename=filename,
                    bankcode=bankcode,
                    file_path=remote_path,
                    transaction_count=0,
                    status='SUCCESS'
                )
                self.repository.mark_file_processed(processed_file)
                return True

            # Step 4: Map transactions
            mapped_records = DataMapper.map_transactions(transactions, bankcode)
            logger.info(f"Mapped {len(mapped_records)} transactions")

            # Step 5: Insert to database (with account validation)
            inserted_count, skipped_count = self.repository.bulk_insert_transactions(mapped_records)

            # Step 6: Mark file as processed
            processed_file = ProcessedFile(
                filename=filename,
                bankcode=bankcode,
                file_path=remote_path,
                transaction_count=inserted_count,
                status='SUCCESS'
            )
            self.repository.mark_file_processed(processed_file)

            logger.info(f"Successfully processed {filename}: {inserted_count} inserted, {skipped_count} skipped (invalid accounts)")
            return True

        except Exception as e:
            logger.error(f"Error processing {filename}: {e}", exc_info=True)

            # Mark file as failed
            try:
                processed_file = ProcessedFile(
                    filename=filename,
                    bankcode=bankcode,
                    file_path=remote_path,
                    transaction_count=0,
                    status='FAILED',
                    error_message=str(e)[:2000]
                )
                self.repository.mark_file_processed(processed_file)
            except Exception as mark_error:
                logger.error(f"Failed to mark file as failed: {mark_error}")

            return False

        finally:
            # Cleanup local file
            try:
                if os.path.exists(local_path):
                    os.remove(local_path)
                    logger.debug(f"Cleaned up local file: {local_path}")
            except Exception as e:
                logger.warning(f"Error cleaning up local file {local_path}: {e}")

    def process_files(self, files_to_process: list) -> dict:
        """
        Process multiple files.

        Args:
            files_to_process: List of (filename, bankcode, remote_path) tuples

        Returns:
            Dictionary with processing statistics
        """
        stats = {
            'total': len(files_to_process),
            'successful': 0,
            'failed': 0,
            'files': []
        }

        for filename, bankcode, remote_path in files_to_process:
            success = self.process_file(filename, bankcode, remote_path)
            stats['successful'] += 1 if success else 0
            stats['failed'] += 0 if success else 1
            stats['files'].append({
                'filename': filename,
                'bankcode': bankcode,
                'success': success
            })

        logger.info(f"Processing complete: {stats['successful']}/{stats['total']} successful")
        return stats

    def __enter__(self):
        """Context manager entry."""
        if self.sftp_client and not self.sftp_client.sftp:
            self.sftp_client.connect()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        if self.sftp_client:
            self.sftp_client.disconnect()