|
1 | 1 | """ |
2 | | -Simple HMM Futures Analysis CLI |
| 2 | +HMM Futures Analysis CLI - Comprehensive Orchestration |
3 | 3 |
|
4 | | -A simplified command-line interface for HMM analysis focusing on core functionality. |
| 4 | +A comprehensive command-line interface for HMM futures market analysis |
| 5 | +with full orchestration, error handling, progress monitoring, and memory management. |
5 | 6 | """ |
6 | 7 |
|
7 | 8 | import sys |
8 | 9 | import os |
9 | 10 | import logging |
10 | 11 | import traceback |
| 12 | +import gc |
| 13 | +import psutil |
11 | 14 | from pathlib import Path |
12 | | -from typing import Optional |
| 15 | +from typing import Optional, Dict, Any |
13 | 16 | import time |
| 17 | +import json |
14 | 18 |
|
15 | 19 | import click |
16 | 20 | import pandas as pd |
|
20 | 24 | # Add src to path for imports |
21 | 25 | sys.path.insert(0, str(Path(__file__).parent / 'src')) |
22 | 26 |
|
| 27 | +# Import utilities |
23 | 28 | from utils import get_logger, setup_logging |
24 | 29 | from data_processing.csv_parser import process_csv |
25 | 30 | from data_processing.data_validation import validate_data |
| 31 | +from data_processing.feature_engineering import add_features |
| 32 | +from processing_engines.index import ProcessingEngineFactory |
| 33 | +from model_training.hmm_trainer import train_model, validate_features_for_hmm |
| 34 | +from model_training.inference_engine import StateInference |
| 35 | +from model_training.model_persistence import save_model, load_model |
26 | 36 |
|
27 | | -# Global logger |
| 37 | +# Global logger and configuration |
28 | 38 | logger = None |
| 39 | +current_memory_usage = 0.0 |
| 40 | +MEMORY_WARNING_THRESHOLD = 0.8 # 80% of available RAM |
| 41 | + |
| 42 | + |
| 43 | +def get_memory_usage() -> float: |
| 44 | + """Get current memory usage as percentage of available RAM.""" |
| 45 | + try: |
| 46 | + process = psutil.Process() |
| 47 | + memory_info = process.memory_info() |
| 48 | + memory_percent = process.memory_percent() |
| 49 | + return memory_percent / 100.0 |
| 50 | + except Exception: |
| 51 | + return 0.0 |
| 52 | + |
| 53 | + |
| 54 | +def check_memory_usage(operation: str = "operation"): |
| 55 | + """Check memory usage and log warnings if threshold exceeded.""" |
| 56 | + global current_memory_usage |
| 57 | + current_memory_usage = get_memory_usage() |
| 58 | + |
| 59 | + if current_memory_usage > MEMORY_WARNING_THRESHOLD: |
| 60 | + logger.warning( |
| 61 | + f"High memory usage during {operation}: {current_memory_usage:.1%} of available RAM" |
| 62 | + ) |
| 63 | + # Trigger garbage collection |
| 64 | + gc.collect() |
| 65 | + # Re-check after collection |
| 66 | + new_usage = get_memory_usage() |
| 67 | + if new_usage < current_memory_usage: |
| 68 | + logger.info(f"Memory reduced after garbage collection: {new_usage:.1%}") |
| 69 | + |
| 70 | + |
| 71 | +def log_performance_metrics(start_time: float, operation: str, additional_info: Dict[str, Any] = None): |
| 72 | + """Log performance metrics for completed operations.""" |
| 73 | + elapsed_time = time.time() - start_time |
| 74 | + memory_usage = get_memory_usage() |
| 75 | + |
| 76 | + metrics = { |
| 77 | + 'operation': operation, |
| 78 | + 'elapsed_time_seconds': elapsed_time, |
| 79 | + 'memory_usage_percent': memory_usage, |
| 80 | + 'timestamp': time.time() |
| 81 | + } |
| 82 | + |
| 83 | + if additional_info: |
| 84 | + metrics.update(additional_info) |
| 85 | + |
| 86 | + logger.info(f"Performance - {operation}: {elapsed_time:.2f}s, Memory: {memory_usage:.1%}") |
| 87 | + |
| 88 | + return metrics |
| 89 | + |
| 90 | + |
| 91 | +class HMMConfig: |
| 92 | + """Configuration class for HMM analysis parameters.""" |
| 93 | + |
| 94 | + def __init__(self, n_states: int = 3, covariance_type: str = 'full', |
| 95 | + n_iter: int = 100, random_state: int = 42, tol: float = 1e-3, |
| 96 | + num_restarts: int = 3): |
| 97 | + self.n_states = n_states |
| 98 | + self.covariance_type = covariance_type |
| 99 | + self.n_iter = n_iter |
| 100 | + self.random_state = random_state |
| 101 | + self.tol = tol |
| 102 | + self.num_restarts = num_restarts |
| 103 | + |
| 104 | + def to_dict(self) -> Dict[str, Any]: |
| 105 | + """Convert config to dictionary.""" |
| 106 | + return { |
| 107 | + 'n_states': self.n_states, |
| 108 | + 'covariance_type': self.covariance_type, |
| 109 | + 'n_iter': self.n_iter, |
| 110 | + 'random_state': self.random_state, |
| 111 | + 'tol': self.tol, |
| 112 | + 'num_restarts': self.num_restarts |
| 113 | + } |
| 114 | + |
| 115 | + |
| 116 | +class ProcessingConfig: |
| 117 | + """Configuration class for data processing parameters.""" |
| 118 | + |
| 119 | + def __init__(self, engine_type: str = 'streaming', chunk_size: int = 100000, |
| 120 | + indicators: Optional[Dict[str, Any]] = None): |
| 121 | + self.engine_type = engine_type |
| 122 | + self.chunk_size = chunk_size |
| 123 | + self.indicators = indicators or { |
| 124 | + 'sma_5': {'window': 5}, |
| 125 | + 'sma_10': {'window': 10}, |
| 126 | + 'sma_20': {'window': 20}, |
| 127 | + 'volatility_14': {'window': 14}, |
| 128 | + 'returns': {} |
| 129 | + } |
29 | 130 |
|
30 | 131 |
|
31 | 132 | @click.group() |
32 | | -@click.version_option(version="1.0.0", prog_name="hmm-analysis") |
| 133 | +@click.version_option(version="1.0.0", prog_name="hmm-futures-analysis") |
| 134 | +@click.option('--config-file', type=click.Path(exists=True), |
| 135 | + help='Configuration file (JSON/YAML)') |
33 | 136 | @click.option('--log-level', |
34 | 137 | type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR'], case_sensitive=False), |
35 | 138 | default='INFO', |
36 | 139 | help='Set logging level (default: INFO)') |
| 140 | +@click.option('--memory-monitor/--no-memory-monitor', default=True, |
| 141 | + help='Enable memory monitoring (default: enabled)') |
| 142 | +@click.pass_context |
| 143 | +def cli(ctx, config_file, log_level, memory_monitor): |
| 144 | + """ |
| 145 | + HMM Futures Analysis CLI - Comprehensive Orchestration |
| 146 | +
|
| 147 | + A production-ready command-line tool for comprehensive HMM futures market analysis |
| 148 | + with multi-engine processing, advanced error handling, and performance monitoring. |
| 149 | + """ |
| 150 | + # Set up logging |
| 151 | + setup_logging(level=log_level.upper()) |
| 152 | + global logger |
| 153 | + logger = get_logger(__name__) |
| 154 | + |
| 155 | + logger.info("🚀 HMM Futures Analysis CLI started") |
| 156 | + logger.info(f"📊 Log level: {log_level}") |
| 157 | + logger.info(f"🧠 Memory monitoring: {'enabled' if memory_monitor else 'disabled'}") |
| 158 | + |
| 159 | + # Load configuration if provided |
| 160 | + config = {} |
| 161 | + if config_file: |
| 162 | + try: |
| 163 | + with open(config_file, 'r') as f: |
| 164 | + if config_file.endswith('.json'): |
| 165 | + config = json.load(f) |
| 166 | + else: |
| 167 | + # Simple YAML parsing (basic) |
| 168 | + import yaml |
| 169 | + config = yaml.safe_load(f) |
| 170 | + logger.info(f"✅ Configuration loaded from {config_file}") |
| 171 | + except Exception as e: |
| 172 | + logger.error(f"❌ Failed to load configuration: {e}") |
| 173 | + raise click.ClickException(f"Configuration loading failed: {e}") |
| 174 | + |
| 175 | + # Store global config in context |
| 176 | + ctx.ensure_object(dict) |
| 177 | + ctx.obj['log_level'] = log_level |
| 178 | + ctx.obj['logger'] = logger |
| 179 | + ctx.obj['config'] = config |
| 180 | + ctx.obj['memory_monitor'] = memory_monitor |
| 181 | + |
| 182 | + # Initial memory check |
| 183 | + if memory_monitor: |
| 184 | + check_memory_usage("CLI initialization") |
37 | 185 | @click.pass_context |
38 | 186 | def cli(ctx, log_level): |
39 | 187 | """ |
|
0 commit comments