原创内容第825篇,专注量化投资、个人成长与财富自由。1、duckdb loader按列加载多文件夹里的csv数据,性能更好。from bt_algos_extend import Task, Engine
def ranking_ETFs():
t = Task()
t.name = '基于ETF历史评分的轮动策略'
# 排序
t.period = 'RunDaily'
t.weight = 'WeighEqually'
t.order_by_signal = 'trend_score(close,25)*0.4+(roc(close,5)+roc(close,10))*0.2+ma(volume,5)/ma(volume,20)'
# t.start_date = '20180101'
# t.end_date = '20240501'
t.symbols = [
'159915.SZ', # '创业板ETF',
'510180.SH', # '上证180ETF',
'518880.SH', # '黄金ETF',
'513100.SH', # '纳指ETF',
'159509.SZ', # '纳指科技ETF',
'512100.SH', # '中证1000ETF',
'513500.SH', # '标普500ETF',
'512480.SH', # '科创100ETF'
]
t.benchmark = '510300.SH'
return t
res = Engine().run(ranking_ETFs())
import matplotlib.pyplot as plt
print(res.stats)
from matplotlib import rcParams
rcParams['font.family'] =
'SimHei'
# res.plot_weights()
res.prices.plot()
print(res.get_transactions())
df = (res.prices.pct_change() + 1).cumprod()
print(df.iloc[-1])
plt.show()
from datetime import datetime
import duckdb
import pandas as pd
from tqdm import tqdm
class DuckdbLoader:
def __init__(self, path, symbols, cols=['close'], start_date='20100101',
end_date=datetime.now().strftime('%Y%m%d'), folder='/*'):
self.path = path
self.folder = folder
self.start_date = start_date
self.end_date = end_date
self.cols = cols
self.symbols = symbols
self.df = None
self._load_data(self.symbols, self.cols)
def get_col_df(self, col='close'):
if col not in self.df.columns:
print('列数据没有加载!')
return None
df_col = self.df[[col, 'symbol']].pivot_table(values=col, index=self.df.index, columns='symbol')
return df_col
def _load_data(self, symbols, columns):
columns.extend(['symbol', 'date'])
cols_str = ','.join(columns)
symbols_str = None
if symbols and len(symbols):
symbols = ["'{}'".format(s) for s in symbols]
symbols_str = ",".join(symbols)
query_str = """
select {} from '{}{}/*.csv'
where date >= '{}' and date <= '{}'
""".format(cols_str, self.path, self.folder, self.start_date, self.end_date)
if symbols_str:
query_str += ' and symbol IN ({})'.format(symbols_str)
df = duckdb.query(
query_str
).df()
df['date'] = df['date'].apply(lambda x: str(x))
df.set_index('date', inplace=True)
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True, ascending=True)
self.df = df
def calc_all_expressions(self, fields, names):
df = self.df
cols = []
count = 0
df.set_index([df.index, 'symbol'], inplace=True)
for field, name in tqdm(zip(fields, names)):
try:
if len(field) <= 0:
continue
se = calc_expr(df, field)
count += 1
if count 10:
df[name] = se
else:
se.name = name
cols.append(se)
except:
print('{}错误'.format(field))
import traceback
print(traceback.print_exc())
continue
if len(cols):
df_cols = pd.concat(cols, axis=1)
df = pd.concat([df, df_cols], axis=1)
# df_all = df.loc[self.start_date: self.end_date].copy()
# print(df_all.index.levels[0])
df['symbol'] = df.index.droplevel(0)
# df_all['symbol'] = df_all.index.levels[0]
df.index = df.index.droplevel(1)
self.df = df
if __name__ == '__main__':
from config import DATA_ETF_QUOTES
loader = DuckdbLoader(path=DATA_ETF_QUOTES.resolve(), symbols=['510300.SH', '159915.SZ'],
cols=['close', 'adj_factor'])
df_col = loader.get_col_df('adj_factor')
print(df_col)
from datafeed.expr import calc_expr
loader.calc_all_expressions(fields=['roc(close,20)'], names=['roc_20'])
df = loader.get_col_df('roc_20')
print(df)
回顾这二十年来,做对的事情有一些,有一点值得一提就是一直持续的做技术,当然不止于做技术。而且不是局限于工作中所需要的,而是觉得有意义,有外延和扩展性的。当然花了很多心思,只不过是乐在其中,也并不觉得苦,也没有人要求去学习或做什么,而是就当成一个兴趣爱好。从采集,到搜索再到推荐,再到后来的人工智能和量化投资。做喜欢且擅长的事情,你并不会觉得是在工作或者加班。我们讲"ABCZ"的计划,如果A(工作是你喜欢且擅长的),Z(投资计划,积累本金让自己财务小自由), 然后是造一个小商业系统(能够带来工资外,投资外的被动收入),当下以deepseek为代表的大模型及智能体,又进一步赋能“一个企业”技术栈。在等待风来的日子里,做喜欢和擅长的事情,做到最好,开心点,就是这样!代码和数据下载:AI量化实验室——2025量化投资的星辰大海AI量化实验室 星球,已经运行三年多,1500+会员。
aitrader代码,因子表达式引擎、遗传算法(Deap)因子挖掘引擎等,支持vnpy,qlib,backtrader和bt引擎,内置多个年化30%+的策略,每周五迭代一次,代码和数据在星球全部开源。