#!/usr/bin/env python3 """ File monitoring and discovery for ACH files. Scans SFTP directories for new files across multiple banks. """ import re from typing import List, Tuple from logging_config import get_logger from config import get_config from .sftp_client import SFTPClient logger = get_logger(__name__) class FileMonitor: """Monitors SFTP for new ACH files.""" def __init__(self, sftp_client: SFTPClient = None): """ Initialize file monitor. Args: sftp_client: SFTPClient instance (optional) """ self.config = get_config() self.sftp_client = sftp_client or SFTPClient() def scan_for_new_files(self, processed_filenames: List[str]) -> List[Tuple[str, str, str]]: """ Scan all bank directories for new ACH files. Args: processed_filenames: List of already processed filenames to skip Returns: List of (filename, bankcode, full_remote_path) tuples """ new_files = [] for bank_code in self.config.bank_codes: remote_path = f"{self.config.sftp_base_path}/{bank_code}/NACH" files = self.sftp_client.list_files(remote_path, pattern='ACH_*.txt') for filename in files: if filename not in processed_filenames: full_path = f"{remote_path}/{filename}" new_files.append((filename, bank_code, full_path)) logger.info(f"Found new file: {filename} (bank: {bank_code})") else: logger.debug(f"Skipping already processed file: {filename}") logger.info(f"Scan complete: Found {len(new_files)} new files") return new_files @staticmethod def parse_filename(filename: str) -> dict: """ Parse ACH filename to extract metadata. Expected format: ACH_{branch}_{DDMMYYYYHHMMSS}_{seq}.txt Example: ACH_99944_05122025102947_001.txt Args: filename: Filename to parse Returns: Dictionary with extracted metadata or empty dict if parse fails """ pattern = r'ACH_(\d+)_(\d{2})(\d{2})(\d{4})(\d{2})(\d{2})(\d{2})_(\d+)\.txt' match = re.match(pattern, filename) if not match: logger.warning(f"Could not parse filename: {filename}") return {} branch, day, month, year, hour, minute, second, seq = match.groups() return { 'filename': filename, 'branch': branch, 'day': day, 'month': month, 'year': year, 'hour': hour, 'minute': minute, 'second': second, 'sequence': seq, 'timestamp': f"{day}/{month}/{year} {hour}:{minute}:{second}" } def __enter__(self): """Context manager entry.""" if not self.sftp_client.sftp: self.sftp_client.connect() return self def __exit__(self, exc_type, exc_val, exc_tb): """Context manager exit.""" self.sftp_client.disconnect()