-
Notifications
You must be signed in to change notification settings - Fork 5
[Alpha] Add new alpha and alpha evaluation methods #69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
2253d84
e9cc56b
b7273a3
d8f609f
416b2f3
4ec2a30
bedb761
e01c079
7514cb3
cdb75e4
9d675ae
5f94d40
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| import torch | ||
| from torch import nn | ||
| from torch.utils.data import Dataset, DataLoader | ||
| import numpy as np | ||
| from pathlib import Path | ||
|
|
||
| class AR(nn.Module): | ||
| def __init__(self, lag): | ||
| super().__init__() | ||
| self.lag = lag | ||
| self.linear = nn.Linear(lag, 1, bias=True) | ||
|
|
||
| def forward(self, x): | ||
| return self.linear(x) | ||
|
|
||
| def train_one_epoch(loader: DataLoader, model: nn.Module, loss_fn, optimizer): | ||
| model.train() | ||
| size = len(loader.dataset) | ||
| running_loss = 0.0 | ||
| for _, (xb, yb) in enumerate(loader): | ||
| xb = xb.float() | ||
| yb = yb.float() | ||
| optimizer.zero_grad() | ||
| pred = model(xb) | ||
| loss = loss_fn(pred, yb) | ||
| loss.backward() | ||
| optimizer.step() | ||
| running_loss += loss.item() * xb.size(0) | ||
| epoch_loss = running_loss / size | ||
| print(f"Average traning loss: {epoch_loss: .10e}") | ||
| return epoch_loss | ||
|
|
||
| def train(train_dataset: Dataset, epoches: int, ar_model: nn.Module): | ||
| loss_fn = nn.L1Loss() | ||
| optimizer = torch.optim.Adam(ar_model.parameters(), lr=1e-3) | ||
| loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=False) | ||
| for _ in range(epoches): | ||
|
CYX22222003 marked this conversation as resolved.
|
||
| train_one_epoch(loader, ar_model, loss_fn, optimizer) | ||
|
|
||
| def build_train(data, T): | ||
| X = [] | ||
| y = [] | ||
| n = len(data) | ||
| for i in range(n - T): | ||
| window = data[i : i + T] | ||
| target = data[i + T] | ||
| X.append(window) | ||
| y.append(target) | ||
|
|
||
| return np.array(X), np.array(y) | ||
|
|
||
| class ArDataset(Dataset): | ||
| def __init__(self, X, y): | ||
| X = torch.tensor(X, dtype=torch.float32) | ||
| y = torch.tensor(y, dtype=torch.float32) | ||
| self.X = X.float() | ||
| self.y = y.float() | ||
| self.y = self.y.unsqueeze(1) | ||
|
|
||
| def __len__(self): | ||
| return self.X.shape[0] | ||
|
|
||
| def __getitem__(self, idx): | ||
| return self.X[idx], self.y[idx] | ||
|
|
||
| def infer_mu(model, X): | ||
| model.eval() | ||
| with torch.no_grad(): | ||
| X_tensor = torch.tensor(X, dtype=torch.float32) | ||
| mu = model(X_tensor).squeeze(1).cpu().numpy() | ||
| return mu | ||
|
|
||
| def load_ar_model(model_path="./ar_model.pth", lag=90): | ||
| model = AR(lag=lag) | ||
| model.load_state_dict(torch.load(model_path, weights_only=True)) | ||
| model.eval() | ||
| return model | ||
|
|
||
| base_dir = Path(__file__).resolve().parent | ||
| model_path = base_dir / "ar_model.pth" | ||
| trained_ar_model = load_ar_model(model_path) | ||
|
|
||
|
CYX22222003 marked this conversation as resolved.
Outdated
|
||
| # if __name__ == "__main__": | ||
| # print("1. Define model") | ||
| # ar_model = AR(lag=90) | ||
|
|
||
| # print("2. Load data") | ||
| # data = load_data() | ||
| # log_ret = data["log_return"].to_numpy() | ||
| # log_ret = log_ret[np.isfinite(log_ret)] | ||
| # X, y = build_train(log_ret, 90) | ||
| # print(X[0], y[0]) | ||
|
|
||
| # print("3. Prepare training") | ||
| # train_idx = int(len(X) * 0.85) | ||
| # X_train = X[:train_idx] | ||
| # y_train = y[:train_idx] | ||
| # dataset = ArDataset(X_train, y_train) | ||
|
|
||
| # print("4. Train") | ||
| # train(dataset, 10, ar_model) | ||
|
|
||
| # print("5. Test inference") | ||
| # ar_model.eval() | ||
| # X_test = X[train_idx:] | ||
| # y_test = y[train_idx:] | ||
|
|
||
| # mu_test = infer_mu(ar_model, X_test) | ||
| # residuals = y_test - mu_test | ||
|
|
||
| # mae = np.mean(np.abs(residuals)) | ||
| # print("Test MAE:", mae) | ||
|
|
||
| # print("6. Save model") | ||
| # torch.save(ar_model.state_dict(), "./ar_model.pth") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| import numpy as np | ||
| import pandas as pd | ||
|
|
||
| def load_data(): | ||
| data_path = "./binance/BTC_USDT-5m.feather" # Configure to the correct data paths | ||
| df = pd.read_feather(data_path) | ||
| df = df[df["date"] > "2021-01-01"] | ||
|
CYX22222003 marked this conversation as resolved.
Outdated
|
||
| df["log_return"] = np.log(df['close'] / df['close'].shift(1)) | ||
| print(df.head()) | ||
|
CYX22222003 marked this conversation as resolved.
|
||
| return df | ||
|
|
||
| def get_log_return_series(): | ||
| df = load_data() | ||
| log_ret = df["log_return"].to_numpy() | ||
| log_ret = log_ret[np.isfinite(log_ret)] | ||
| return log_ret | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| import numpy as np | ||
| import torch | ||
| from pandas import DataFrame | ||
| from alpha.interface import IAlpha | ||
| from alpha.AutoRegression.autoregression import trained_ar_model, build_train | ||
|
CYX22222003 marked this conversation as resolved.
|
||
|
|
||
|
CYX22222003 marked this conversation as resolved.
|
||
| AR_MODEL_PATH = "./ar_model.pth" | ||
|
CYX22222003 marked this conversation as resolved.
|
||
| AR_LAG = 90 | ||
|
|
||
| class AutoregressionAlpha(IAlpha): | ||
| def process(self) -> DataFrame: | ||
| df = self.dataframe.copy() | ||
|
|
||
| df["log_return"] = np.log(df["close"] / df["close"].shift(1)) | ||
| log_ret = df["log_return"].to_numpy() | ||
| log_ret = log_ret[np.isfinite(log_ret)] | ||
|
CYX22222003 marked this conversation as resolved.
|
||
|
|
||
| if len(log_ret) <= AR_LAG: | ||
| df["ar_pred"] = np.nan | ||
| return df | ||
| X, _ = build_train(log_ret, AR_LAG) | ||
| # Load AR model | ||
| ar_model = trained_ar_model | ||
| ar_model.eval() | ||
| with torch.no_grad(): | ||
| X_tensor = torch.tensor(X, dtype=torch.float32) | ||
| preds = ar_model(X_tensor).squeeze(1).cpu().numpy() | ||
| # Align predictions with dataframe index | ||
| ar_pred = np.full(df.shape[0], np.nan) | ||
| ar_pred[AR_LAG+1:] = preds | ||
| df["ar_pred"] = ar_pred | ||
| return df | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,4 +12,20 @@ def process(self) -> DataFrame: | |
| This is to decouple the pipulation of indicator from IStrategy | ||
| """ | ||
| pass | ||
|
|
||
|
|
||
| fwd_ret_timeframe = [1, 5, 10, 20, 90] | ||
| class AlphaEvaluator: | ||
| def __init__(self, dataframe: DataFrame, alpha: IAlpha): | ||
| self.df = dataframe | ||
| self.alpha = alpha(dataframe) | ||
|
||
|
|
||
| def evaluate_information_coefficient(self, alpha_names): | ||
| self.df = self.alpha.process() | ||
| out = {} | ||
| for a in alpha_names: | ||
| for t in fwd_ret_timeframe: | ||
| self.df['fwd_ret'] = self.df['close'].pct_change().shift(-t) | ||
|
CYX22222003 marked this conversation as resolved.
Outdated
|
||
| self.df = self.df.dropna(subset=[a, 'fwd_ret']) | ||
| ic = self.df['alpha'].corr(self.df['fwd_ret'], method='spearman') | ||
|
CYX22222003 marked this conversation as resolved.
Outdated
CYX22222003 marked this conversation as resolved.
Outdated
|
||
| out[(a, t)] = ic | ||
| return out | ||
Uh oh!
There was an error while loading. Please reload this page.