Skip to content

Commit dd21383

Browse files
authored
Add files via upload
1 parent 0bcbad4 commit dd21383

File tree

5 files changed

+223
-101
lines changed

5 files changed

+223
-101
lines changed

scutquant/account.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class Account:
2525

2626
def __init__(self, init_cash: float, position: dict, available: dict, init_price: dict):
2727
self.cash = init_cash # 可用资金
28-
self.cash_available = deepcopy(init_cash)
28+
# self.cash_available = deepcopy(init_cash)
2929
self.position = position # keys应包括所有资产,如无头寸则值为0,以便按照keys更新持仓
3030
self.available = available # 需要持有投资组合的底仓,否则按照T+1制度无法做空
3131
self.price = init_price # 资产价格
@@ -39,7 +39,7 @@ def __init__(self, init_cash: float, position: dict, available: dict, init_price
3939
self.turnover = []
4040
self.trade_value = 0.0
4141

42-
def generate_total_order(self, order: dict, freq: int) -> dict:
42+
def adjust_order(self, order: dict, freq: int) -> dict:
4343
order_offset = self.auto_offset(freq)
4444
if order_offset is not None:
4545
for key in order_offset["buy"].keys():
@@ -55,8 +55,8 @@ def generate_total_order(self, order: dict, freq: int) -> dict:
5555
order["sell"][key] += order_offset["sell"][key]
5656
return order
5757

58-
def check_order(self, order: dict, price: dict, cost_rate: float = 0.0015, min_cost: float = 5) -> \
59-
tuple[dict, bool]: # 检查是否有足够的资金完成order, 如果不够则不买
58+
def check_order(self, order: dict, price: dict, cost_rate: float = 0.0015, min_cost: float = 5) -> dict:
59+
# 检查是否有足够的资金完成order, 如果不够则调整订单(sell不变, buy按比例减少)
6060
cash_inflow = 0.0
6161
cash_outflow = 0.0
6262
order_copy = deepcopy(order)
@@ -74,11 +74,13 @@ def check_order(self, order: dict, price: dict, cost_rate: float = 0.0015, min_c
7474
order["buy"].pop(code)
7575
cost = max(min_cost, (cash_inflow + cash_outflow) * cost_rate)
7676
cash_needed = cash_outflow - cash_inflow + cost
77-
# print("cash_needed: ", cash_needed, "cash: ", self.cash)
7877
if cash_needed > self.cash:
79-
return order, False
80-
else:
81-
return order, True
78+
# c_n = buy + r(buy + sell) - sell > c, 令n * buy + r(n * buy + sell) - sell = c
79+
# 则n * buy * (1 + r) - (1 - r) * sell = c, 即 n * buy * (1 + r) = c + (1 - r)sell
80+
# n = (c + (1 - r)sell) / buy(1 + r)
81+
ratio = (self.cash + (1 - cost_rate) * cash_inflow) / (cash_outflow * (1 + cost_rate))
82+
order["buy"] = {k: int(v * ratio + 0.5) for k, v in order["buy"].items()} # 这样虽然不是整手下单, 但只要在1手以上都没问题
83+
return order
8284

8385
def update_price(self, price: dict): # 更新市场价格
8486
for code in price.keys():

scutquant/alpha.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@ def market_neutralize(x: pd.Series, long_only: bool = False) -> pd.Series:
1919
"""
2020
_mean = x.groupby(level=0).mean()
2121
x -= _mean
22-
if long_only: # 考虑到A股有做空限制, 因此将权重为负的股票(即做空的股票)的权重调整为0(即纯多头), 并相应调整多头的权重
23-
x[x.values < 0] = 0
24-
abs_sum = x[x.values > 0].groupby(level=0).sum()
25-
else:
26-
abs_sum = abs(x).groupby(level=0).sum()
22+
abs_sum = abs(x).groupby(level=0).sum()
2723
x /= abs_sum
24+
if long_only:
25+
x[x < 0] = 0
26+
x *= 2
2827
return x
2928

3029

@@ -119,6 +118,7 @@ def call(self):
119118
pass
120119

121120
def normalize(self):
121+
self.result = mad_winsor(inf_mask(self.result))
122122
if self.norm_method == "zscore":
123123
self.result = cs_zscore(self.result)
124124
elif self.norm_method == "robust_zscore":

scutquant/executor.py

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,43 @@
11
from . import account, strategy
2-
from .signal_generator import *
2+
import pandas as pd
3+
import numpy as np
4+
import warnings
5+
6+
warnings.filterwarnings("ignore")
7+
8+
9+
def get_daily_inter(data: pd.Series | pd.DataFrame, shuffle=False):
10+
daily_count = data.groupby(level=0).size().values
11+
daily_index = np.roll(np.cumsum(daily_count), 1)
12+
daily_index[0] = 0
13+
if shuffle:
14+
daily_shuffle = list(zip(daily_index, daily_count))
15+
np.random.shuffle(daily_shuffle)
16+
daily_index, daily_count = zip(*daily_shuffle)
17+
return daily_index, daily_count
18+
19+
20+
def prepare(predict: pd.DataFrame, data: pd.DataFrame, price: str, volume: str, real_ret: pd.Series) -> pd.DataFrame:
21+
"""
22+
:param predict: pd.DataFrame, 预测值, 应包括"predict"
23+
:param data: pd.DataFrame, 提供时间和价格信息
24+
:param price: str, data中表示价格的列名
25+
:param volume: str, data中表示成交量的列名
26+
:param real_ret: pd.Series, 真实收益率
27+
:return: pd.DataFrame
28+
"""
29+
data_ = data.copy()
30+
predict.columns = ["predict"]
31+
index = predict.index
32+
data1 = data_[data_.index.isin(index)]
33+
data1 = data1.reset_index()
34+
data1 = data1.set_index(predict.index.names).sort_index()
35+
predict["price"] = data1[price]
36+
predict["volume"] = data1[volume] # 当天的交易量, 假设交易量不会发生大的跳跃
37+
predict.index.names = ["time", "code"]
38+
predict["price"] = predict["price"].groupby(["code"]).shift(-1) # 指令是T时生成的, 但是T+1执行, 所以是shift(-1)
39+
predict["R"] = real_ret[real_ret.index.isin(predict.index)] # 本来就是T+2对T+1的收益率, 因此不用前移
40+
return predict.dropna()
341

442

543
class Executor:
@@ -61,18 +99,17 @@ def create_account(self):
6199
self.benchmark = account.Account(self.ben_cash, {}, {}, self.price.copy())
62100

63101
def get_cash_available(self):
64-
self.value_hold = 0.0
65-
for code in self.user_account.price.keys(): # 更新持仓市值, 如果持有的资产在价格里面, 更新资产价值
66-
if code in self.user_account.position.keys():
67-
self.value_hold += self.user_account.position[code] * self.user_account.price[code]
68-
# value是账户总价值, 乘risk_deg后得到所有可交易资金, 减去value_hold就是剩余可交易资金
69-
return self.user_account.value * self.s.risk_degree - self.value_hold
102+
"""
103+
fixme: 调整计算方式使其适应先卖后买的情况
104+
之所以不用cash * risk_degree是因为先卖后买的情况下, 当前的cash跟实际可支配的cash不一样(因为卖了就有钱了)
105+
"""
106+
return self.user_account.value * self.s.risk_degree
70107

71108
def execute(self, data: pd.DataFrame, verbose: int = 0):
72109
"""
73110
:param data: pd.DataFrame, 包括三列:'predict', 'volume', 'price', 'label' 以及多重索引[('time', 'code')]
74111
:param verbose: int, 是否输出交易记录
75-
:return: self
112+
:return:
76113
"""
77114

78115
def check_names(index=data.index, predict="predict", price="price"):
@@ -88,26 +125,31 @@ def check_names(index=data.index, predict="predict", price="price"):
88125
self.init_account(data)
89126
self.create_account()
90127
if self.mode == "generate":
91-
time = data.index.get_level_values(0).unique().values
92-
for t in time:
93-
idx = data["R"].groupby(data.index.names[0]).mean() # 大盘收益率
94-
self.time.append(t)
95-
data_select = data[data.index.get_level_values(0) == t]
96-
signal = generate(data=data_select, strategy=self.s, cash_available=self.get_cash_available())
97-
order, current_price = signal["order"], signal["current_price"]
128+
benchmark = data["R"].groupby(level=0).transform(lambda x: x.mean()) # 大盘收益率
129+
daily_idx, daily_count = get_daily_inter(data)
130+
for idx, count in zip(daily_idx, daily_count):
131+
batch = slice(idx, idx + count)
132+
data_batch = data.iloc[batch]
133+
benchmark_batch = benchmark.iloc[batch]
134+
current_day = data_batch.index.get_level_values(0)[0]
135+
self.time.append(current_day)
136+
order, current_price = self.s.to_signal(data_batch, position=self.user_account.position,
137+
cash_available=self.get_cash_available())
98138

99139
if self.s.auto_offset:
100-
order = self.user_account.generate_total_order(order=order, freq=self.s.offset_freq)
101-
order, trade = self.user_account.check_order(order, current_price)
102-
103-
if verbose == 1 and trade:
104-
print(t, '\n', "buy:", '\n', order["buy"], '\n', "sell:", order["sell"], '\n')
140+
order = self.user_account.adjust_order(order=order, freq=self.s.offset_freq)
141+
order = self.user_account.check_order(order, current_price)
142+
# print(trade)
143+
# trade = True
144+
if verbose == 1:
145+
print(current_day, '\n', "buy:", '\n', order["buy"], '\n', "sell:", order["sell"], '\n')
105146

106147
self.user_account.update_all(order=order, price=current_price, cost_buy=self.cost_buy,
107-
cost_sell=self.cost_sell, min_cost=self.min_cost, trade=trade)
148+
cost_sell=self.cost_sell, min_cost=self.min_cost)
108149
self.user_account.risk_control(risk_degree=self.s.risk_degree, cost_rate=self.cost_sell,
109150
min_cost=self.min_cost)
110-
self.benchmark.value *= (1 + idx[idx.index == t][0]) # 乘上1+大盘收益率, 相当于等权指数
151+
152+
self.benchmark.value *= 1 + benchmark_batch.values[0] # 乘上1+大盘收益率, 相当于等权指数
111153
self.benchmark.val_hist.append(self.benchmark.value)
112154
else:
113155
raise ValueError("simulate mode is not available by far")

scutquant/report.py

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,15 @@ def plot(data, label, title: str = None, xlabel: str = None, ylabel: str = None,
8282
plt.figure(figsize=figsize)
8383
# plt.clf()
8484
if mode == "plot":
85-
for d in range(len(data)):
86-
plt.plot(data[d], label=label[d])
87-
plt.xticks(rotation=45)
85+
if len(data) > 10:
86+
cmap = plt.get_cmap('tab20')
87+
for d in range(len(data)):
88+
plt.plot(data[d], label=label[d], color=cmap(d))
89+
plt.xticks(rotation=45)
90+
else:
91+
for d in range(len(data)):
92+
plt.plot(data[d], label=label[d])
93+
plt.xticks(rotation=45)
8894
elif mode == "bar":
8995
bar = plt.bar(label, data, label="value")
9096
plt.bar_label(bar, label_type='edge')
@@ -122,16 +128,13 @@ def accuracy(pred: pd.Series, y: pd.Series, sign: str = ">=") -> float:
122128
return len(data_true) / len(data)
123129

124130

125-
def report_all(user_account, benchmark, show_raw_value: bool = False, excess_return: bool = True, risk: bool = True,
126-
turnover: bool = True, rf: float = 0.03, freq: float = 1, time=None, figsize: tuple = (10, 6)) -> None:
131+
def report_all(user_account, benchmark, show_raw_value: bool = False, rf: float = 0.03, freq: float = 1, time=None,
132+
figsize: tuple = (10, 6)) -> None:
127133
"""
128134
129135
:param user_account: account类
130136
:param benchmark: account类
131137
:param show_raw_value: 显示原始市值(具体金额)
132-
:param excess_return: 显示超额收益曲线
133-
:param risk: 显示风险度
134-
:param turnover: 显示换手率
135138
:param rf: 显示无风险利率
136139
:param freq: 频率, 日频为1,月频为30,其它类推
137140
:param time: 显示时间轴
@@ -159,20 +162,22 @@ def report_all(user_account, benchmark, show_raw_value: bool = False, excess_ret
159162
days += 1
160163
days /= len(acc_ret)
161164

162-
acc_dd = calc_drawdown(pd.Series(acc_ret))
163-
ben_dd = calc_drawdown(pd.Series(ben_ret))
165+
acc_ret = pd.Series(acc_ret, name="acc_ret", index=time) # 累计收益率
166+
ben_ret = pd.Series(ben_ret, name="ben_ret", index=time) # benchmark的累计收益率
167+
excess_ret = pd.Series(excess_ret, name="excess_ret", index=time)
164168

165-
ret = pd.Series(acc_ret) # 累计收益率
166-
ben = pd.Series(ben_ret) # benchmark的累计收益率
169+
acc_dd = calc_drawdown(acc_ret)
170+
ben_dd = calc_drawdown(ben_ret)
171+
excess_dd = calc_drawdown(excess_ret)
167172

168-
ann_return = annualized_return(ret, freq=freq)
169-
ann_std = annualized_volatility(ret, freq=freq)
170-
ben_ann_return = annualized_return(ben, freq=freq)
171-
ben_ann_std = annualized_volatility(ben, freq=freq)
173+
ann_return = annualized_return(acc_ret, freq=freq)
174+
ann_std = annualized_volatility(acc_ret, freq=freq)
175+
ben_ann_return = annualized_return(ben_ret, freq=freq)
176+
ben_ann_std = annualized_volatility(ben_ret, freq=freq)
172177

173-
beta = ret.cov(ben) / ben.var()
174-
alpha = ret.mean() - beta * ben.mean()
175-
epsilon = pd.Series(ret - beta * ben - alpha).std()
178+
beta = acc_ret.cov(ben_ret) / ben_ret.var()
179+
alpha = acc_ret.mean() - beta * ben_ret.mean()
180+
epsilon = (acc_ret - beta * ben_ret - alpha).std()
176181

177182
sharpe = sharpe_ratio(acc_ret, rf=rf, freq=freq * 365)
178183
sortino = sortino_ratio(acc_ret, ben_ret)
@@ -207,10 +212,6 @@ def report_all(user_account, benchmark, show_raw_value: bool = False, excess_ret
207212
plt.legend()
208213
plt.show()
209214
else:
210-
acc_ret = pd.Series(acc_ret, name="acc_ret", index=time)
211-
ben_ret = pd.Series(ben_ret, name="ben_ret", index=time)
212-
excess_ret = pd.Series(excess_ret, name="excess_ret", index=time)
213-
214215
plt.figure(figsize=(10, 6))
215216
plt.plot(acc_ret, label="return", color="red")
216217
plt.plot(ben_ret, label="benchmark", color="blue")
@@ -222,21 +223,19 @@ def report_all(user_account, benchmark, show_raw_value: bool = False, excess_ret
222223
plt.clf()
223224
plt.figure(figsize=(10, 6))
224225
plt.plot(acc_dd, label="drawdown")
225-
plt.plot(ben_dd, label="excess_return_drawdown")
226+
plt.plot(excess_dd, label="excess_return_drawdown")
226227
plt.legend()
227228
plt.title("Drawdown")
228229
plt.show()
229230

230-
if risk:
231-
risk = pd.DataFrame({'risk': user_account.risk_curve}, index=time)
232-
plot([risk], label=['risk_degree'], title='Risk Degree', ylabel='value', figsize=figsize)
231+
risk = pd.DataFrame({'risk': user_account.risk_curve}, index=time)
232+
plot([risk], label=['risk_degree'], title='Risk Degree', ylabel='value', figsize=figsize)
233233

234-
if turnover:
235-
risk = pd.DataFrame({'turnover': user_account.turnover}, index=time)
236-
plot([risk], label=['turnover'], title='Turnover', figsize=figsize)
234+
risk = pd.DataFrame({'turnover': user_account.turnover}, index=time)
235+
plot([risk], label=['turnover'], title='Turnover', figsize=figsize)
237236

238237

239-
def group_return_ana(pred: pd.DataFrame | pd.Series, y_true: pd.Series, n: int = 5, groupby: str = "time",
238+
def group_return_ana(pred: pd.DataFrame | pd.Series, y_true: pd.Series, n: int = 10, groupby: str = "time",
240239
figsize: tuple = (10, 6)) -> None:
241240
"""
242241
因子对股票是否有良好的区分度, 若有, 则应出现明显的分层效应(即单调性)
@@ -283,12 +282,10 @@ def group_return_ana(pred: pd.DataFrame | pd.Series, y_true: pd.Series, n: int =
283282
win_rate = []
284283
mean_ret = []
285284
for c in cols:
286-
# dt = t_df[c] + 1
287-
# data.append(dt.cumprod() - 1)
288285
data.append(t_df[c].cumsum())
289286
label.append(c)
290-
win_rate.append(len(t_df[t_df[c] >= 0]) / len(t_df))
291-
mean_ret.append(t_df[c].cumsum().values[-1] / len(t_df) * 100)
287+
win_rate.append(round(len(t_df[t_df[c] >= 0]) / len(t_df), 4))
288+
mean_ret.append(round(t_df[c].cumsum().values[-1] / len(t_df) * 100, 4))
292289
plot(data, label, title='Grouped Return', xlabel='time_id', ylabel='value', figsize=figsize)
293290
plot(win_rate, label=cols, title="Win Rate of Each Group", mode="bar", figsize=figsize)
294291
plot(mean_ret, label=cols, title="Mean Return of Each Group(%)", mode="bar", figsize=figsize)

0 commit comments

Comments
 (0)