含年化100%+的复合轮动因子，aitraderv4.7源码发布，支持duckdb列存访问数据（python源码+数据）

原创内容第825篇，专注量化投资、个人成长与财富自由。

今天是周五，春天来了，桃花都开了。

周五是星球更新代码的日子，今天主要更新点如下：

1、duckdb loader按列加载多文件夹里的csv数据，性能更好。

2、解决了当天信号不显示的问题。

3、年化100%+的策略因子。

请前往星球下载：AI量化实验室——2025量化投资的星辰大海

代码随系统源码打包在策略集中：

核心代码片段如下：

from bt_algos_extend import Task, Engine


def ranking_ETFs():
    t = Task()
    t.name = '基于ETF历史评分的轮动策略'
    # 排序
    t.period = 'RunDaily'
    t.weight = 'WeighEqually'
    t.order_by_signal = 'trend_score(close,25)*0.4+(roc(close,5)+roc(close,10))*0.2+ma(volume,5)/ma(volume,20)'
    # t.start_date = '20180101'
    # t.end_date = '20240501'

    t.symbols = [
        '159915.SZ',  # '创业板ETF',
        '510180.SH',  # '上证180ETF',
        '518880.SH',  # '黄金ETF',
        '513100.SH',  # '纳指ETF',
        '159509.SZ',  # '纳指科技ETF',
        '512100.SH',  # '中证1000ETF',
        '513500.SH',  # '标普500ETF',
        '512480.SH',  # '科创100ETF'
    ]
    t.benchmark = '510300.SH'
    return t


res = Engine().run(ranking_ETFs())
import matplotlib.pyplot as plt

print(res.stats)
from matplotlib import rcParams

rcParams['font.family'] = 


    
'SimHei'
# res.plot_weights()
res.prices.plot()
print(res.get_transactions())
df = (res.prices.pct_change() + 1).cumprod()
print(df.iloc[-1])
plt.show()

duckdb通过列的方式加载多个csv数据：

from datetime import datetime

import duckdb
import pandas as pd
from tqdm import tqdm


class DuckdbLoader:
    def __init__(self, path, symbols, cols=['close'], start_date='20100101',
                 end_date=datetime.now().strftime('%Y%m%d'), folder='/*'):
        self.path = path
        self.folder = folder
        self.start_date = start_date
        self.end_date = end_date
        self.cols = cols
        self.symbols = symbols
        self.df = None
        self._load_data(self.symbols, self.cols)

    def get_col_df(self, col='close'):
        if col not in self.df.columns:
            print('列数据没有加载！')
            return None

        df_col = self.df[[col, 'symbol']].pivot_table(values=col, index=self.df.index, columns='symbol')
        return df_col

    def _load_data(self, symbols, columns):
        columns.extend(['symbol', 'date'])
        cols_str = ','.join(columns)

        symbols_str = None
        if symbols and len(symbols):
            symbols = ["'{}'".format(s) for s in symbols]
            symbols_str = ",".join(symbols)

        query_str = """
    select {} from '{}{}/*.csv'
    where date >= '{}' and date <= '{}'
    """.format(cols_str, self.path, self.folder, self.start_date, self.end_date)
        if symbols_str:



    
            query_str += ' and symbol IN ({})'.format(symbols_str)

        df = duckdb.query(
            query_str
        ).df()
        df['date'] = df['date'].apply(lambda x: str(x))
        df.set_index('date', inplace=True)
        df.index = pd.to_datetime(df.index)
        df.sort_index(inplace=True, ascending=True)
        self.df = df

    def calc_all_expressions(self, fields, names):
        df = self.df
        cols = []
        count = 0
        df.set_index([df.index, 'symbol'], inplace=True)
        for field, name in tqdm(zip(fields, names)):
            try:
                if len(field) <= 0:
                    continue
                se = calc_expr(df, field)

                count += 1
                if count 10:
                    df[name] = se
                else:
                    se.name = name
                    cols.append(se)
            except:
                print('{}错误'.format(field))
                import traceback
                print(traceback.print_exc())
                continue
        if len(cols):
            df_cols = pd.concat(cols, axis=1)
            df = pd.concat([df, df_cols], axis=1)

        # df_all = df.loc[self.start_date: self.end_date].copy()
        # print(df_all.index.levels[0])
        df['symbol'] = df.index.droplevel(0)
        # df_all['symbol'] = df_all.index.levels[0]
        df.index = df.index.droplevel(1)
        self.df = df


if __name__ == '__main__':
    from config import DATA_ETF_QUOTES

    loader = DuckdbLoader(path=DATA_ETF_QUOTES.resolve(), symbols=['510300.SH', '159915.SZ'],
                          cols=['close', 'adj_factor'])
    df_col = loader.get_col_df('adj_factor')
    print(df_col)

    


    
from datafeed.expr import calc_expr

    loader.calc_all_expressions(fields=['roc(close,20)'], names=['roc_20'])
    df = loader.get_col_df('roc_20')
    print(df)