Files
ach_ui_dbtl_file_based/sftp/file_monitor.py
2026-02-02 13:06:07 +05:30

101 lines
3.0 KiB
Python

#!/usr/bin/env python3
"""
File monitoring and discovery for ACH files.
Scans SFTP directories for new files across multiple banks.
"""
import re
from typing import List, Tuple
from logging_config import get_logger
from config import get_config
from .sftp_client import SFTPClient
logger = get_logger(__name__)
class FileMonitor:
"""Monitors SFTP for new ACH files."""
def __init__(self, sftp_client: SFTPClient = None):
"""
Initialize file monitor.
Args:
sftp_client: SFTPClient instance (optional)
"""
self.config = get_config()
self.sftp_client = sftp_client or SFTPClient()
def scan_for_new_files(self, processed_filenames: List[str]) -> List[Tuple[str, str, str]]:
"""
Scan all bank directories for new ACH files.
Args:
processed_filenames: List of already processed filenames to skip
Returns:
List of (filename, bankcode, full_remote_path) tuples
"""
new_files = []
for bank_code in self.config.bank_codes:
remote_path = f"{self.config.sftp_base_path}/{bank_code}/NACH"
files = self.sftp_client.list_files(remote_path, pattern='ACH_*.txt')
for filename in files:
if filename not in processed_filenames:
full_path = f"{remote_path}/{filename}"
new_files.append((filename, bank_code, full_path))
logger.info(f"Found new file: {filename} (bank: {bank_code})")
else:
logger.debug(f"Skipping already processed file: {filename}")
logger.info(f"Scan complete: Found {len(new_files)} new files")
return new_files
@staticmethod
def parse_filename(filename: str) -> dict:
"""
Parse ACH filename to extract metadata.
Expected format: ACH_{branch}_{DDMMYYYYHHMMSS}_{seq}.txt
Example: ACH_99944_05122025102947_001.txt
Args:
filename: Filename to parse
Returns:
Dictionary with extracted metadata or empty dict if parse fails
"""
pattern = r'ACH_(\d+)_(\d{2})(\d{2})(\d{4})(\d{2})(\d{2})(\d{2})_(\d+)\.txt'
match = re.match(pattern, filename)
if not match:
logger.warning(f"Could not parse filename: {filename}")
return {}
branch, day, month, year, hour, minute, second, seq = match.groups()
return {
'filename': filename,
'branch': branch,
'day': day,
'month': month,
'year': year,
'hour': hour,
'minute': minute,
'second': second,
'sequence': seq,
'timestamp': f"{day}/{month}/{year} {hour}:{minute}:{second}"
}
def __enter__(self):
"""Context manager entry."""
if not self.sftp_client.sftp:
self.sftp_client.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.sftp_client.disconnect()