原创内容第982篇,专注AGI+,AI量化投资、个人成长与财富自由。策略地址:
http://www.ailabx.com/strategy/68a3633d8c45f7483b1aa0e4
之前是对比pandas,duckdb和polars加载csv,parquet等。遇到一些问题,当csv文件多了之后,可能有一些表结构不对,或者数据格式问题,多个dataframe就合不到一块,而且出错后还不容易排查。import akshare as akimport pandas as pdimport osimport timefrom datetime import datetime
history_dir = "../data/csi300/history"inc_dir = "../data/csi300/inc"data_dir = '../data'os.makedirs(history_dir, exist_ok=True)os.makedirs(inc_dir, exist_ok=True)
def get_csi300_constituents(): """获取沪深300成分股""" df = ak.index_stock_cons_sina(symbol="000300") df["code"] = df["code"].str.replace(r"sh\.|sz\.", "", regex=True) return df["code"].tolist()
def download_adjusted_data(symbol): """下载个股后复权数据并统一字段""" try: for prefix in ["sh", "sz"]: full_code = f"{symbol}" df = ak.stock_zh_a_hist( symbol=full_code, period="daily", adjust="hfq" ) if not df.empty: break if df.empty: print(f"⚠️ 未找到数据: {symbol}") return None
df = df.rename(columns={ "日期": "date", "开盘": "open", "最高": "high", "最低": "low", "收盘": "close", "成交量": "volume" }) df.insert(0, "symbol", symbol) return df[["symbol", "date", "open", "high", "low", "close", "volume"]]
except Exception as e: print(f"❌ 下载{symbol}失败: {str(e)}") return None
def split_and_save_data(df, symbol): """按日期分割数据并保存到不同目录""" if df is None: return
df["date"] = pd.to_datetime(df["date"])
history_data = df[df["date"] <= "2024-12-31"] inc_data = df[df["date"] >= "2025-01-01"]
if not history_data.empty: history_path = os.path.join(history_dir, f"{symbol}.csv") history_data.to_hdf(os.path.join(data_dir,"history.h5"), key='history', complib="blosc", complevel=9, mode="a", format="table")
if not inc_data.empty:
inc_path = os.path.join(inc_dir, f"{symbol}.csv") inc_data.to_hdf(os.path.join(data_dir,"inc.h5"), key=symbol, complib="blosc", complevel=9, mode="a", format="table" )
if __name__ == "__main__": print("开始获取沪深300成分股...") symbols = get_csi300_constituents() print(f"获取到{len(symbols)}只成分股")
total = len(symbols) for i, symbol in enumerate(symbols, 1): print(f"\n下载进度 ({i}/{total}): {symbol}") start_time = time.time()
df = download_adjusted_data(symbol)
if df is not None: split_and_save_data(df, symbol) time_cost = time.time() - start_time print(f"✅ 完成 {symbol} | 耗时: {time_cost:.2f}秒 | 记录数: {len(df)}") else: print(f"⏭ 跳过 {symbol}")
time.sleep(1)
print("\n全部任务完成!") print(f"历史数据保存至: {os.path.abspath(history_dir)}") print(f"增量数据保存至: {os.path.abspath(inc_dir)}")
之后不久,就开始了日更,这是一个正确的决定,为目标构建了一个成长系统。16年引入的增量是“量化”,19年引入的增量是“FOF投资理财”,22年的增量是“日更自媒体”。量化平台化?“职业投资的能力”?还是“财经视角看世界”?代码和数据下载:AI量化实验室——2025量化投资的星辰大海AI量化实验室 星球,已经运行三年多,1800+会员。
aitrader代码,因子表达式引擎、遗传算法(Deap)因子挖掘引擎等,支持vnpy,qlib,backtrader和bt引擎,内置多个年化30%+的策略,每周五迭代一次,代码和数据在星球全部开源。
点击 “查看原文”,直接访问策略集合。