101 lines
3.0 KiB
Python
101 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
File monitoring and discovery for ACH files.
|
|
Scans SFTP directories for new files across multiple banks.
|
|
"""
|
|
|
|
import re
|
|
from typing import List, Tuple
|
|
from logging_config import get_logger
|
|
from config import get_config
|
|
from .sftp_client import SFTPClient
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class FileMonitor:
|
|
"""Monitors SFTP for new ACH files."""
|
|
|
|
def __init__(self, sftp_client: SFTPClient = None):
|
|
"""
|
|
Initialize file monitor.
|
|
|
|
Args:
|
|
sftp_client: SFTPClient instance (optional)
|
|
"""
|
|
self.config = get_config()
|
|
self.sftp_client = sftp_client or SFTPClient()
|
|
|
|
def scan_for_new_files(self, processed_filenames: List[str]) -> List[Tuple[str, str, str]]:
|
|
"""
|
|
Scan all bank directories for new ACH files.
|
|
|
|
Args:
|
|
processed_filenames: List of already processed filenames to skip
|
|
|
|
Returns:
|
|
List of (filename, bankcode, full_remote_path) tuples
|
|
"""
|
|
new_files = []
|
|
|
|
for bank_code in self.config.bank_codes:
|
|
remote_path = f"{self.config.sftp_base_path}/{bank_code}/NACH"
|
|
files = self.sftp_client.list_files(remote_path, pattern='ACH_*.txt')
|
|
|
|
for filename in files:
|
|
if filename not in processed_filenames:
|
|
full_path = f"{remote_path}/{filename}"
|
|
new_files.append((filename, bank_code, full_path))
|
|
logger.info(f"Found new file: {filename} (bank: {bank_code})")
|
|
else:
|
|
logger.debug(f"Skipping already processed file: {filename}")
|
|
|
|
logger.info(f"Scan complete: Found {len(new_files)} new files")
|
|
return new_files
|
|
|
|
@staticmethod
|
|
def parse_filename(filename: str) -> dict:
|
|
"""
|
|
Parse ACH filename to extract metadata.
|
|
|
|
Expected format: ACH_{branch}_{DDMMYYYYHHMMSS}_{seq}.txt
|
|
Example: ACH_99944_05122025102947_001.txt
|
|
|
|
Args:
|
|
filename: Filename to parse
|
|
|
|
Returns:
|
|
Dictionary with extracted metadata or empty dict if parse fails
|
|
"""
|
|
pattern = r'ACH_(\d+)_(\d{2})(\d{2})(\d{4})(\d{2})(\d{2})(\d{2})_(\d+)\.txt'
|
|
match = re.match(pattern, filename)
|
|
|
|
if not match:
|
|
logger.warning(f"Could not parse filename: {filename}")
|
|
return {}
|
|
|
|
branch, day, month, year, hour, minute, second, seq = match.groups()
|
|
|
|
return {
|
|
'filename': filename,
|
|
'branch': branch,
|
|
'day': day,
|
|
'month': month,
|
|
'year': year,
|
|
'hour': hour,
|
|
'minute': minute,
|
|
'second': second,
|
|
'sequence': seq,
|
|
'timestamp': f"{day}/{month}/{year} {hour}:{minute}:{second}"
|
|
}
|
|
|
|
def __enter__(self):
|
|
"""Context manager entry."""
|
|
if not self.sftp_client.sftp:
|
|
self.sftp_client.connect()
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
"""Context manager exit."""
|
|
self.sftp_client.disconnect()
|