From d765b137c36ce7916b286bc1eb9e07c3ad3ce7e3 Mon Sep 17 00:00:00 2001 From: kabanosk <56364007+Kabanosk@users.noreply.github.com> Date: Thu, 29 Jan 2026 23:01:35 +0100 Subject: [PATCH] Add penalty for minimal number of stocks considered in final portfolio --- main.py | 40 +++++++++++++++++++++++++++------------- src/evolution.py | 22 ++++++++++++++++++++++ 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/main.py b/main.py index d2e0206..ec7199a 100644 --- a/main.py +++ b/main.py @@ -13,8 +13,6 @@ from src.evolution import setup_deap, run_nsga2 from src.plots import ( plot_pareto_vs_markowitz, - plot_portfolio_vs_baseline, - plot_final_portfolio, plot_performance_summary, ) from src.utils import optimize_markowitz, maximum_drawdown, sharpe_ratio @@ -165,6 +163,7 @@ def main(): exp_id = f"experiment-{now.strftime('%Y%m%d')}-{now.strftime('%H%M%S')}" output_dir = os.path.join("plots", exp_id) os.makedirs(output_dir, exist_ok=True) + os.makedirs(os.path.join(output_dir, "portfolios"), exist_ok=True) print(f"Saving plots to: {output_dir}") tickers = TICKER_SETS[args.ticker_set] @@ -237,6 +236,7 @@ def main(): stock_covariances.values, historical_returns=historical_returns, risk_metric=args.risk_metric, + min_positions=min(3, len(stock_names)//2), ) benchmark_prices = load_benchmark(benchmark_ticker, start_date) @@ -282,8 +282,10 @@ def main(): ) pareto_front = tools.sortNondominated(pop, len(pop), first_front_only=True)[0] - best = max(pareto_front, key=lambda ind: ind.fitness.values[0]) - print(f"Final best: return={best.fitness.values[0]:.4f}, risk={best.fitness.values[1]:.4f}") + sorted_front = sorted(pareto_front, key=lambda ind: ind.fitness.values[0], reverse=True) + n_top = max(5, int(args.pop_size * 0.1)) + top_candidates = sorted_front[:n_top] + print(f"Plotting top {len(top_candidates)} portfolios from the Pareto front...") plot_pareto_vs_markowitz( pareto_front, @@ -298,15 +300,27 @@ def main(): covariances=stock_covariances.values, ) - plot_performance_summary( - prices, - np.array(best), - np.array(stock_names), - index_prices=benchmark_prices, - title="Final Portfolio Performance", - output_dir=output_dir, - show=not args.no_plots, - ) + for i, ind in enumerate(top_candidates): + ret = ind.fitness.values[0] + risk = ind.fitness.values[1] + + weights = np.array(ind) + holdings = [(name, w) for name, w in zip(stock_names, weights) if w > 0.1] + holdings.sort(key=lambda x: x[1], reverse=True) + + print(f"\nCandidate #{i + 1}: Return={ret:.4f}, Risk={risk:.4f}") + print(f"Top holdings: {', '.join([f'{h[0]}: {h[1] * 100:.1f}%' for h in holdings[:5]])}") + + plot_performance_summary( + prices, + weights, + np.array(stock_names), + index_prices=benchmark_prices, + title=f"Rank #{i + 1} Portfolio (Ret: {ret * 100:.1f}%)", + output_dir=output_dir, + show=not args.no_plots, + filename=f"portfolios/final_portfolio_rank_{i + 1:02d}.png" + ) if __name__ == "__main__": diff --git a/src/evolution.py b/src/evolution.py index 623aa46..b6ea464 100644 --- a/src/evolution.py +++ b/src/evolution.py @@ -24,6 +24,9 @@ def evaluate_portfolio( covariances: np.ndarray, hist_returns: np.ndarray, metric: str, + min_positions: int = 0, + penalty_factor: float = 0.1, + min_weight: float = 0.01, ): if np.any(np.isnan(portfolio)) or np.any(np.isinf(portfolio)): return float("nan"), float("nan") @@ -49,6 +52,16 @@ def evaluate_portfolio( else: raise ValueError(f"Unknown risk metric: {metric}") + # Add penalty if portfolio is too small + active_positions = np.sum(portfolio > min_weight) + if active_positions < min_positions: + missing = min_positions - active_positions + tax_rate = missing * penalty_factor + penalty_amount = np.abs(portfolio_return) * tax_rate + + portfolio_return -= penalty_amount + risk_value += (risk_value * tax_rate) + return portfolio_return, risk_value @@ -81,6 +94,9 @@ def setup_deap( stock_covariances, historical_returns=None, risk_metric="std", + min_positions=0, + min_weight=0.01, + penalty_factor=0.1, mutation_kwargs=None, crossover_kwargs=None, ): @@ -92,6 +108,9 @@ def setup_deap( stock_covariances: Covariance matrix of stock returns historical_returns: Historical returns array (n_days, n_assets) - required for mdd/sharpe risk_metric: Risk metric to use - 'std' (volatility), 'mdd' (max drawdown), or 'sharpe' + min_positions: Minimum number of assets to use for portfolio before adding penalty. + min_weight: Minimum weight threshold for an asset to be considered an "active position" (e.g., 0.01 for 1% of portfolio). + penalty_factor: The penalty multiplier applied to return and risk for each missing position (e.g., 0.1 means 10% penalty per missing asset). mutation_kwargs: Dictionary of kwargs for the mutation function crossover_kwargs: Dictionary of kwargs for the crossover function """ @@ -118,6 +137,9 @@ def create_individual(n_assets: int): covariances=stock_covariances, hist_returns=historical_returns, metric=risk_metric, + min_positions=min_positions, + penalty_factor=penalty_factor, + min_weight=min_weight, ) mut_kwargs = mutation_kwargs or {}