41 lines
1.0 KiB
Python
41 lines
1.0 KiB
Python
"""
|
|
因子中性化批量处理工具
|
|
行业中性 + 市值中性
|
|
"""
|
|
import pandas as pd
|
|
import numpy as np
|
|
from typing import Dict, List
|
|
from factors.base_factor import BaseStructuredFactor
|
|
|
|
|
|
def batch_process_factors(
|
|
factors: Dict[str, BaseStructuredFactor],
|
|
data: pd.DataFrame
|
|
) -> pd.DataFrame:
|
|
"""
|
|
批量处理所有因子,返回处理后因子DataFrame
|
|
参数:
|
|
factors: 因子字典
|
|
data: 原始数据
|
|
返回:
|
|
处理后的因子DataFrame,每列一个因子
|
|
"""
|
|
processed = {}
|
|
|
|
for name, factor in factors.items():
|
|
processed[name] = factor.process(data)
|
|
|
|
return pd.DataFrame(processed)
|
|
|
|
|
|
def prepare_market_cap_neutral(data: pd.DataFrame) -> pd.DataFrame:
|
|
"""
|
|
预处理,添加log_market_cap列用于中性化
|
|
如果已有market_cap列,自动计算对数
|
|
"""
|
|
if 'market_cap' in data.columns and 'log_market_cap' not in data.columns:
|
|
data = data.copy()
|
|
data['log_market_cap'] = np.log(data['market_cap'])
|
|
|
|
return data
|