-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstrat1_garch_vol.py
334 lines (297 loc) · 19.4 KB
/
strat1_garch_vol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
import pandas as pd
import numpy as np
import scipy as sp
import copy
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import arch
import copy
# 这个就是正常版本的,PNL除500的名义本金
class PairTrading:
def __init__(self, df1, df2, df_beta, df_multiplier, window_list1, window_list2, open_list1, open_list2,
gain_list, loss_list, base_amount, start, mean_reverts):
self.df1 = copy.deepcopy(df1) # 500 ajdusted close
self.df2 = copy.deepcopy(df2) # 1000 adj close
self.df_beta = copy.deepcopy(
df_beta) # 500 dui 1000 reg get beta # no idea where beta comes from and how it is calculated
self.df_multiplier = copy.deepcopy(df_multiplier) # multipier between 500 and 1000
self.window_list1 = window_list1 # windows of return calculation
self.window_list2 = window_list2 # windows of return calculation in 15-16 when the strategy fails
self.open_list1 = open_list1 # threshold for opeing accounts : 1% 2% 3%
self.open_list2 = open_list2 # threshold condition for validity of the strategy when the long term negative return is not too big : -8% -10%
self.gain_list = gain_list
self.loss_list = loss_list
self.mrvs = mean_reverts
self.base_amount = base_amount # amount of position
self.start = start
self.combine_backtesting()
def combine_backtesting(self): # this func reports the execution results of the pair_trading_backtesting
# per different values for the parameters
#
total_record_col = ["回归线", "短线window", "短线开仓信号(500-1000收益率)", "长线window",
"长线filter信号(500-1000收益率)",
"止盈线", "止损线", "总开仓次数",
"赚钱次数", "赚钱比例(%)",
"平均持仓时间", "赚钱时平均持仓时间", "亏钱时平均持仓时间", "总盈亏百分比",
"总盈亏(绝对数额)", "基础手数", "夏普"]
df_record_final = pd.DataFrame(columns=total_record_col)
for window1 in self.window_list1:
for window2 in self.window_list2:
for open1 in self.open_list1:
for open2 in self.open_list2:
for stop_gain in self.gain_list:
for stop_loss in self.loss_list:
for mrv in self.mrvs:
df_combine, df_record, info = \
self.pair_trading_backtesting(window1, window2, open1, open2, stop_gain, stop_loss, mrv)
df_line = pd.DataFrame(info, index=[0])
df_record_final = pd.concat([df_record_final, df_line])
df_record_final.to_csv("result_garch/Combined Final Result with amount of CSI500={} {}.csv".
format(self.base_amount, self.start), encoding="gbk", index=False)
# when the parameters are set, the pair trading's backtesting takes place here
def pair_trading_backtesting(self, window1, window2, open1, open2, stop_gain, stop_loss, mrv):
cal_rolling_mean(self.df1, window1, window2) # 对中证500 算了两个不同window的跨期收益率 window_short & window_long
cal_rolling_mean(self.df2, window1, window2) # 对中证1000 算了两个不同window的跨期收益率 window_short & window_long
get_beta(self.df1, self.df_beta, self.df_multiplier, self.start) # 获取了该月份对应的Beta和multiplier
# monthly avg price CSI500 / CI1000 is multiplier
# df_beta is the return beta of CSI500 regressed on CSI1000
# so to hedge, we need to hedge w.r.t. the abs price
# R500 = multiplier * R1000
# S500 = beta * S1000
# Delta500 = R500 * S500 = beta * multiplier * Delta1000
df_combine = pd.concat([self.df1, self.df2], axis=1).dropna() # horizontally
print(df_combine.head()) # flag
print(df_combine.columns)
df_combine.columns = ["收盘价1", "rolling_mean1_short", "rolling_mean1_long", "Beta", "Multiplier", "收盘价2",
"rolling_mean2_short", "rolling_mean2_long"] # 共 8 列
print(df_combine.head()) # flag
print(df_combine.columns)
# 以下将CSI500视为资产而CSI1000视为市场的代表
# 不同window使用同一个beta
df_combine["rolling_difference_short"] = df_combine["rolling_mean1_short"] - df_combine["Beta"] * df_combine[
"rolling_mean2_short"] # window1
df_combine["rolling_difference_long"] = df_combine["rolling_mean1_long"] - df_combine["Beta"] * df_combine[
"rolling_mean2_long"] # window2
# rolling std as estimator for sigma
sig1 = df_combine["rolling_difference_short"].std()
sig2 = df_combine["rolling_difference_long"].std()
# apply garch to the time series which is the spread created by hedge
# df_combine.to_csv('df_combine_t2.csv')
date = df_combine.index
# for short windows there are window1 NANs in the head
model1 = arch.arch_model(df_combine["rolling_difference_short"], vol='GARCH', p=1, q=1)
rlt1 = model1.fit()
mug1 = rlt1.params[0]
omega1 = rlt1.params[1]
alpha1 = rlt1.params[2]
beta1 = rlt1.params[3]
df_combine['ht1'] = pd.Series([0] * (len(date)))
df_combine.loc[date[0], 'ht1'] = df_combine["rolling_difference_short"].std()
for index in range(1, len(date)):
df_combine.loc[date[index], "ht1"] \
= omega1 + alpha1 * ((df_combine.loc[date[index - 1], "rolling_difference_short"] - mug1) ** 2) \
+ beta1 * (df_combine.loc[date[index - 1], 'ht1'])
# df_combine.to_csv('test2.csv')
model2 = arch.arch_model(df_combine["rolling_difference_long"], vol='GARCH', p=1, q=1)
rlt2 = model2.fit()
mug2 = rlt2.params[0]
omega2 = rlt2.params[1]
alpha2 = rlt2.params[2]
beta2 = rlt2.params[3]
df_combine['ht2'] = pd.Series([0] * (len(date)))
df_combine.loc[date[0], 'ht2'] = df_combine["rolling_difference_long"].std()
for index in range(1, len(date)):
df_combine.loc[date[index], "ht2"] \
= omega2 + alpha2 * ((df_combine.loc[date[index - 1], "rolling_difference_long"] - mug2) ** 2) \
+ beta2 * (df_combine.loc[date[index - 1], 'ht2'])
df_combine['cd1'], df_combine['cd2'], df_combine['cd3'] = pd.Series([False] * (len(date))), \
pd.Series([False] * (len(date))), \
pd.Series([False] * (len(date)))
for index in range(0, len(date)):
df_combine.loc[date[index], 'cd1'] = df_combine.loc[date[index], 'rolling_difference_short'] > \
open1 * np.sqrt(df_combine.loc[date[index], 'ht1'])
df_combine.loc[date[index], 'cd2'] = df_combine.loc[date[index], 'rolling_difference_long'] > \
open2 * np.sqrt(df_combine.loc[date[index], 'ht2'])
df_combine.loc[date[index], 'cd3'] = df_combine.loc[date[index], 'rolling_difference_long'] < \
- open2 * np.sqrt(df_combine.loc[date[index], 'ht2'])
# df_combine.to_csv('with sig1 and sig2_1.csv')
df_combine["Status"] = df_combine['cd1'] & df_combine["cd2"] & df_combine["cd3"]
# "Status" just reports the spread spot value as relative to the shreshold
# "Status" is what open and stop is based on
df_combine.to_csv('test1.csv') # wait and see
# what is the spread created by hedge? well, it is delta500 - delta1000 * multiplier * beta
# and we enter into the shorting of a spread when it is larger than 1 unit
# which means we short 1 delta500 and long multiplier * beta * delta1000
df_combine["PnL_day"] = self.base_amount * (
(df_combine["收盘价2"] - df_combine["收盘价2"].shift(1)) * df_combine["Beta"] * df_combine[
"Multiplier"] -
(df_combine["收盘价1"] - df_combine["收盘价1"].shift(1)))
df_combine = df_combine.dropna()
record_col = ["start_date", "end_date", "duration", "total_pnl", "total_pnl_pct",
"total_pnl_annual"]
# do the backtesting, find the place to start
df_record = pd.DataFrame(columns=record_col)
status_list = df_combine["Status"].to_list()
print('status list\n', status_list)
if True in status_list:
first_open = np.where(status_list)[0][0] # where in starts 'True' first appears
print('first_open', np.where(status_list))
index_start = first_open + 1 # Second day
index = index_start
else:
index_start = len(df_combine) # Second day
index = index_start
# rv 'index' is set to be the same as index_start, but keep in mind that it is just used in iteration
# 'index_start' is a constant value which is the day before opening serving as the opening signal
while index < len(df_combine) - 1:
print("It is Index {} now".format(index))
index1, info = self.cal_strategy(df_combine, index_start, stop_gain, stop_loss, mrv)
print(df_record)
df_line = pd.DataFrame(info, index=[0])
print('df_line =', df_line.head())
df_record = pd.concat([df_record, df_line])
print('df_record =', df_record)
if True in status_list[index1:]:
index_start = np.where(status_list[index1:])[0][0] + 1 + index1
else:
# 提前结束
index_start = len(df_combine) - 1
index = index_start
total_record_col = ["回归线", "短线window", "短线开仓信号(500-1000收益率)", "长线window",
"长线filter信号(500-1000收益率)",
"止盈线", "止损线", "总开仓次数", "赚钱次数",
"赚钱比例(%)", "平均持仓时间", "赚钱时平均持仓时间", "亏钱时平均持仓时间", "总盈亏百分比",
"总盈亏(绝对数额)", "基础手数", "夏普"]
number_record = len(df_record)
number_gain = len(df_record[df_record["total_pnl_pct"] > 0])
if len(df_record) > 0:
pct_gain = number_gain / number_record * 100
else:
pct_gain = 0
duration_mean = df_record["duration"].mean()
duration_gain = df_record.loc[df_record["total_pnl_pct"] > 0, "duration"].mean()
duration_loss = df_record.loc[df_record["total_pnl_pct"] < 0, "duration"].mean()
total_gain_pct = df_record["total_pnl_pct"].sum()
total_gain = df_record["total_pnl"].sum()
if len(df_record) > 0:
sharpe = df_record["total_pnl_pct"].mean() / df_record["total_pnl_pct"].std()
else:
sharpe = 0
info_final = dict(zip(total_record_col,
[mrv, window1, open1, window2, open2, stop_gain, stop_loss, number_record, number_gain,
pct_gain, duration_mean, duration_gain, duration_loss, total_gain_pct, total_gain,
self.base_amount, sharpe]))
df_combine.to_csv(
"result_garch/Original window1={} open1={} window2={} open2={} gain={}loss={} base={} date={} mrv={}.csv".
format(window1, open1, window2, open2, stop_gain, stop_loss, self.base_amount, self.start, mrv),
encoding="gbk")
df_record.to_csv(
"result_garch/Record window1={} open1={} window2={} open2={} gain={}loss={} base={} date={} mrv={}.csv".
format(window1, open1, window2, open2, stop_gain, stop_loss, self.base_amount, self.start, mrv),
encoding="gbk", index=False)
return df_combine, df_record, info_final
def cal_strategy(self, df_combine, index_start, stop_gain, stop_loss, mrv):
index = index_start
index_list = df_combine.index
start_date = index_list[index]
df_combine.loc[index_list[index], "Open"] = True
df_combine.loc[index_list[index], "Event"] = "开仓"
df_combine.loc[index_list[index], "PnL_Combine"] = \
df_combine.loc[index_list[index_start]:index_list[index], "PnL_day"].sum()
df_combine.loc[index_list[index], "PnL_Combine_pct"] = df_combine.loc[index_list[index], "PnL_Combine"] / \
(df_combine.loc[
index_list[
index_start], "收盘价1"] * self.base_amount)
index += 1
date = df_combine.index
while index < len(df_combine) - 1 and stop_loss < df_combine.loc[index_list[index - 1], "PnL_Combine_pct"] \
< stop_gain \
and abs(df_combine.loc[date[index], 'rolling_difference_long']) > mrv \
and abs(df_combine.loc[date[index], 'rolling_difference_short']) > mrv:
df_combine.loc[index_list[index], "PnL_Combine"] = \
df_combine.loc[index_list[index_start]:index_list[index], "PnL_day"].sum()
df_combine.loc[index_list[index], "PnL_Combine_pct"] = df_combine.loc[index_list[index], "PnL_Combine"] / \
(df_combine.loc[
index_list[
index_start], "收盘价1"] * self.base_amount)
index += 1
df_combine.loc[index_list[index], "Stop"] = True
df_combine.loc[index_list[index], "Event"] = "平仓"
end_date = index_list[index]
duration = (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days
total_pnl = df_combine.loc[index_list[index - 1], "PnL_Combine"]
total_pnl_pct = df_combine.loc[index_list[index - 1], "PnL_Combine_pct"]
total_pnl_annual = total_pnl_pct / duration * 365
info = dict(zip(["start_date", "end_date", "duration", "total_pnl", "total_pnl_pct", "total_pnl_annual"],
[start_date, end_date, duration, total_pnl, total_pnl_pct, total_pnl_annual]))
return index, info
def cal_rolling_mean(df, window_short, window_long): # to give value to rolling_mean_short & rolling_mean_long
date = df.index
for index in range(window_short, len(df)):
# rolling_mean_short = 某一时点相对于 window_short 天前的收益率
df.loc[date[index], "rolling_mean_short"] = (df.loc[date[index], "收盘价"] - df.loc[
date[index - window_short], "收盘价"]) / \
df.loc[date[index - window_short], "收盘价"]
for index in range(window_long, len(df)):
df.loc[date[index], "rolling_mean_long"] = (df.loc[date[index], "收盘价"] - df.loc[
date[index - window_long], "收盘价"]) / \
df.loc[date[index - window_long], "收盘价"]
return df
def get_beta(df, df_beta, df_multiplier, start="201811"):
date = df.index
for index in range(len(df)):
d = pd.to_datetime(date[index]) # 2021-09-13 00:00:00
d = d.strftime("%Y-%m-%d") # 2021-09-13 00:00:00
d = d[:4] + d[5:7] # 202109
if d > start:
df.loc[date[index], "Beta"] = df_beta.loc[d[:6], "Beta"]
# print('the month =', d)
# print('beta =' , df_beta.loc[d[:6], "Beta"])
df.loc[date[index], "Multiplier"] = df_multiplier.loc[int(d[:6]), "Multiplier"]
return df # 每月取一个beta取一个multiplier
if __name__ == "__main__":
df_500 = pd.read_csv("data/CSI500.csv", index_col="日期", encoding="gbk")
df_500 = df_500.iloc[:-1, :1]
df_1000 = pd.read_csv("data/CSI1000.csv", index_col="日期", encoding="gbk")
df_1000 = df_1000.iloc[:-1, :1]
df_beta = pd.read_csv("data/Beta.csv", index_col="月份", encoding="gbk")
df_multiplier = pd.read_csv("data/multiplier.csv", index_col="月份")
# print(cal_rolling_mean(df_500, 240))
# print(pair_trading_backtesting(df_500, df_1000,df_beta,240,0.02,0.02,-0.02,1))
# print(get_beta(df_500, df_beta))
window_list1 = [21, 60] # [10, 20, 60]
open_list1 = [0.01, 0.015, 0.02, 0.025]
# window_list2 = [120, 180, 240, 300]
window_list2 = [120, 240] # [180, 240, 300]
open_list2 = [-0.015, -0.03] # [-1.2, -1.3]
gain_list = [0.02] # the final PnL is not sensitive to stop_gain
loss_list = [-0.02] # the final PnL is not sensitive to stop_loss
mean_reverts = [0.0005, 0.0008, 0.001]
# window_list = [240]
# open_list = [0.02]
# gain_list = [0.01, 0.02]
# loss_list = [-0.01]
PairTrading(df_500, df_1000, df_beta, df_multiplier, window_list1, window_list2, open_list1, open_list2,
gain_list, loss_list, 10000, "201511", mean_reverts)
# PairTrading(df_500, df_1000, df_beta, df_multiplier, window_list1, window_list2, open_list1, open_list2,
# gain_list, loss_list, 10000, "201711")
# PairTrading(df_500, df_1000, df_beta, df_multiplier, window_list1, window_list2, open_list1, open_list2,
# gain_list, loss_list, 10000, "201811")
# PairTrading(df_500, df_1000, df_beta, df_multiplier, window_list1, window_list2, open_list1, open_list2,
# gain_list, loss_list, 10000, "201911")
# PairTrading(df_500, df_1000, df_beta, df_multiplier, window_list1, window_list2, open_list1, open_list2,
# gain_list, loss_list, 10000, "202011")
# here the base amount stays unchanged as it is just a multiplier
# tests are conducted per different starting dates
# print(combine_backtesting(df_500, df_1000, df_beta, window_list, open_list, gain_list, loss_list, base_amount=10000, date="201811"))
# print(combine_backtesting(df_500, df_1000, df_beta, window_list, open_list, gain_list, loss_list, base_amount=10000,
# date="201511"))
# print(combine_backtesting(df_500, df_1000, df_beta, window_list, open_list, gain_list, loss_list, base_amount=10000,
# date="201711"))
# print(combine_backtesting(df_500, df_1000, df_beta, window_list, open_list, gain_list, loss_list, base_amount=10000,
# date="201911"))
# print(combine_backtesting(df_500, df_1000, df_beta, window_list, open_list, gain_list, loss_list, base_amount=10000,
# start="202111"))
# print(combine_backtesting(df_500, df_1000, df_beta, window_list, open_list, gain_list, loss_list, base_amount=1, date="201811"))
# print(combine_backtesting(df_500, df_1000, df_beta, window_list, open_list, gain_list, loss_list, base_amount=100,date="201811"))