Files
ach_ui_dbtl_file_based/processors/file_processor.py
2026-02-02 13:06:07 +05:30

180 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""
Main file processor for end-to-end ACH file processing.
Orchestrates download, parsing, mapping, and database insertion.
"""
import os
import tempfile
from pathlib import Path
from logging_config import get_logger
from ach_parser import ACHParser
from db.repository import Repository
from db.models import ProcessedFile
from sftp.sftp_client import SFTPClient
from .data_mapper import DataMapper
logger = get_logger(__name__)
class FileProcessor:
"""Processes ACH files end-to-end."""
def __init__(self, repository: Repository = None, sftp_client: SFTPClient = None):
"""
Initialize file processor.
Args:
repository: Repository instance (optional)
sftp_client: SFTPClient instance (optional)
"""
self.repository = repository or Repository()
self.sftp_client = sftp_client or SFTPClient()
self.temp_dir = tempfile.gettempdir()
def process_file(
self,
filename: str,
bankcode: str,
remote_path: str
) -> bool:
"""
Process a single ACH file end-to-end.
Workflow:
1. Download file from SFTP
2. Parse using ACHParser
3. Map to database format
4. Insert to database
5. Mark as processed
6. Cleanup local file
Args:
filename: Name of file to process
bankcode: Bank code for this file
remote_path: Full remote path on SFTP
Returns:
True if successful, False otherwise
"""
local_path = os.path.join(self.temp_dir, filename)
try:
logger.info(f"Starting processing: {filename} (bank: {bankcode})")
# Step 1: Check if already processed
if self.repository.is_file_processed(filename):
logger.info(f"File already processed: {filename}")
return True
# Step 2: Download file
if not self.sftp_client.download_file(remote_path, local_path):
raise Exception(f"Failed to download file: {remote_path}")
# Step 3: Parse file
parser = ACHParser(local_path)
transactions, metadata, summary = parser.parse()
logger.info(f"Parsed {len(transactions)} transactions from {filename}")
if not transactions:
logger.warning(f"No transactions found in {filename}")
# Still mark as processed but with 0 transactions
processed_file = ProcessedFile(
filename=filename,
bankcode=bankcode,
file_path=remote_path,
transaction_count=0,
status='SUCCESS'
)
self.repository.mark_file_processed(processed_file)
return True
# Step 4: Map transactions
mapped_records = DataMapper.map_transactions(transactions, bankcode)
logger.info(f"Mapped {len(mapped_records)} transactions")
# Step 5: Insert to database
inserted_count = self.repository.bulk_insert_transactions(mapped_records)
# Step 6: Mark file as processed
processed_file = ProcessedFile(
filename=filename,
bankcode=bankcode,
file_path=remote_path,
transaction_count=inserted_count,
status='SUCCESS'
)
self.repository.mark_file_processed(processed_file)
logger.info(f"Successfully processed {filename}: {inserted_count} transactions inserted")
return True
except Exception as e:
logger.error(f"Error processing {filename}: {e}", exc_info=True)
# Mark file as failed
try:
processed_file = ProcessedFile(
filename=filename,
bankcode=bankcode,
file_path=remote_path,
transaction_count=0,
status='FAILED',
error_message=str(e)[:2000]
)
self.repository.mark_file_processed(processed_file)
except Exception as mark_error:
logger.error(f"Failed to mark file as failed: {mark_error}")
return False
finally:
# Cleanup local file
try:
if os.path.exists(local_path):
os.remove(local_path)
logger.debug(f"Cleaned up local file: {local_path}")
except Exception as e:
logger.warning(f"Error cleaning up local file {local_path}: {e}")
def process_files(self, files_to_process: list) -> dict:
"""
Process multiple files.
Args:
files_to_process: List of (filename, bankcode, remote_path) tuples
Returns:
Dictionary with processing statistics
"""
stats = {
'total': len(files_to_process),
'successful': 0,
'failed': 0,
'files': []
}
for filename, bankcode, remote_path in files_to_process:
success = self.process_file(filename, bankcode, remote_path)
stats['successful'] += 1 if success else 0
stats['failed'] += 0 if success else 1
stats['files'].append({
'filename': filename,
'bankcode': bankcode,
'success': success
})
logger.info(f"Processing complete: {stats['successful']}/{stats['total']} successful")
return stats
def __enter__(self):
"""Context manager entry."""
if self.sftp_client and not self.sftp_client.sftp:
self.sftp_client.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
if self.sftp_client:
self.sftp_client.disconnect()