原创内容第982篇,专注AGI+,AI量化投资、个人成长与财富自由。策略地址:
http://www.ailabx.com/strategy/68a3633d8c45f7483b1aa0e4
之前是对比pandas,duckdb和polars加载csv,parquet等。遇到一些问题,当csv文件多了之后,可能有一些表结构不对,或者数据格式问题,多个dataframe就合不到一块,而且出错后还不容易排查。import akshare as ak
import pandas as pd
import os
import time
from datetime import datetime
history_dir = "../data/csi300/history"
inc_dir = "../data/csi300/inc"
data_dir = '../data'
os.makedirs(history_dir, exist_ok=True)
os.makedirs(inc_dir, exist_ok=True)
def get_csi300_constituents():
"""获取沪深300成分股"""
df = ak.index_stock_cons_sina(symbol="000300")
df["code"] = df["code"].str.replace(r"sh\.|sz\.", "", regex=True)
return df["code"].tolist()
def download_adjusted_data(symbol):
"""下载个股后复权数据并统一字段"""
try:
for prefix in ["sh", "sz"]:
full_code = f"{symbol}"
df = ak.stock_zh_a_hist(
symbol=full_code,
period="daily",
adjust="hfq"
)
if not df.empty:
break
if df.empty:
print(f"⚠️ 未找到数据: {symbol}")
return None
df = df.rename(columns={
"日期": "date",
"开盘": "open",
"最高": "high",
"最低": "low",
"收盘": "close",
"成交量": "volume"
})
df.insert(0, "symbol", symbol)
return df[["symbol", "date", "open", "high", "low", "close", "volume"]]
except Exception as e:
print(f"❌ 下载{symbol}失败: {str(e)}")
return None
def split_and_save_data(df, symbol):
"""按日期分割数据并保存到不同目录"""
if df is None:
return
df["date"] = pd.to_datetime(df["date"])
history_data = df[df["date"] <= "2024-12-31"]
inc_data = df[df["date"] >= "2025-01-01"]
if not history_data.empty:
history_path = os.path.join(history_dir, f"{symbol}.csv")
history_data.to_hdf(os.path.join(data_dir,"history.h5"), key='history',
complib="blosc",
complevel=9,
mode="a", format="table")
if not inc_data.empty:
inc_path = os.path.join(inc_dir, f"{symbol}.csv")
inc_data.to_hdf(os.path.join(data_dir,"inc.h5"), key=symbol,
complib="blosc",
complevel=9,
mode="a", format="table"
)
if __name__ == "__main__":
print("开始获取沪深300成分股...")
symbols = get_csi300_constituents()
print(f"获取到{len(symbols)}只成分股")
total = len(symbols)
for i, symbol in enumerate(symbols, 1):
print(f"\n下载进度 ({i}/{total}): {symbol}")
start_time = time.time()
df = download_adjusted_data(symbol)
if df is not None:
split_and_save_data(df, symbol)
time_cost = time.time() - start_time
print(f"✅ 完成 {symbol} | 耗时: {time_cost:.2f}秒 | 记录数: {len(df)}")
else:
print(f"⏭ 跳过 {symbol}")
time.sleep(1)
print("\n全部任务完成!")
print(f"历史数据保存至: {os.path.abspath(history_dir)}")
print(f"增量数据保存至: {os.path.abspath(inc_dir)}")
之后不久,就开始了日更,这是一个正确的决定,为目标构建了一个成长系统。16年引入的增量是“量化”,19年引入的增量是“FOF投资理财”,22年的增量是“日更自媒体”。量化平台化?“职业投资的能力”?还是“财经视角看世界”?代码和数据下载:AI量化实验室——2025量化投资的星辰大海AI量化实验室 星球,已经运行三年多,1800+会员。
aitrader代码,因子表达式引擎、遗传算法(Deap)因子挖掘引擎等,支持vnpy,qlib,backtrader和bt引擎,内置多个年化30%+的策略,每周五迭代一次,代码和数据在星球全部开源。
点击 “查看原文”,直接访问策略集合。