rl 的第一次

This commit is contained in:
zhangkun9038@dingtalk.com 2026-02-18 01:43:03 +08:00
parent 9b29cc6597
commit 6ea3425986
3 changed files with 49 additions and 66 deletions

View File

@ -73,11 +73,25 @@
"data_kitchen": {
"fillna": "ffill"
},
"freqaimodel": "LightGBMMultiTargetRegressor",
"freqaimodel": "MyCoolRLModel",
"purge_old_models": 2,
"train_period_days": 12,
"backtest_period_days": 2,
"live_retrain_hours": 2,
"continual_learning": false,
"rl_config": {
"train_cycles": 50,
"add_state_info": true,
"max_trade_duration_candles": 300,
"max_training_drawdown_pct": 0.02,
"cpu_count": 16,
"model_type": "PPO",
"policy_type": "MlpPolicy",
"model_reward_parameters": {
"rr": 1,
"profit_aim": 0.025
}
},
"outlier_detection": {
"method": "IsolationForest",
"contamination": 0.1
@ -114,17 +128,8 @@
"shuffle": false
},
"model_training_parameters": {
"n_estimators": 600,
"learning_rate": 0.02,
"num_leaves": 60,
"max_depth": 12,
"min_data_in_leaf": 15,
"feature_fraction": 0.8,
"bagging_fraction": 0.8,
"bagging_freq": 5,
"verbose": -1
},
"ml_prediction_api_url": "http://pairlist.xl.home/api/mlprediction"
"verbose": 1
}
},
"api_server": {
"enabled": true,

View File

@ -9,6 +9,7 @@ import pandas_ta as ta
from freqtrade.persistence import Trade
import numpy as np
from datetime import datetime, timezone, timedelta
from functools import reduce
logger = logging.getLogger(__name__)
@ -29,6 +30,10 @@ class FreqaiPrimer(IStrategy):
# FreqAI 要求
process_only_new_candles = True
# 时间框架和交易配置
timeframe = "3m"
can_short = False # 只支持做多
stoploss = -0.15 # 固定止损 -15%
trailing_stop = True
trailing_stop_positive_offset = 0.005 # 追踪止损偏移量 0.5%
@ -67,6 +72,12 @@ class FreqaiPrimer(IStrategy):
def feature_engineering_standard(self, dataframe: DataFrame, metadata: dict, **kwargs) -> DataFrame:
"""标准时间类特征"""
# 以下特征对于RL模型是必需的
dataframe[f"%-raw_close"] = dataframe["close"]
dataframe[f"%-raw_open"] = dataframe["open"]
dataframe[f"%-raw_high"] = dataframe["high"]
dataframe[f"%-raw_low"] = dataframe["low"]
# 标准时间类特征
if "date" in dataframe.columns:
dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek
dataframe["%-hour_of_day"] = dataframe["date"].dt.hour
@ -74,10 +85,8 @@ class FreqaiPrimer(IStrategy):
def set_freqai_targets(self, dataframe: DataFrame, metadata: dict, **kwargs) -> DataFrame:
"""设置FreqAI目标变量强化学习不需要传统标签但需要保持兼容"""
# 为了保持与FreqAI框架的兼容性添加默认目标列
dataframe["&s-entry_signal"] = 0
dataframe["&s-exit_signal"] = 0
dataframe["&s-future_volatility"] = 0
# 对于RL没有直接的目标要设置。这是填充中性直到智能体发送动作。
dataframe["&-action"] = 0
return dataframe
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
@ -116,26 +125,27 @@ class FreqaiPrimer(IStrategy):
return dataframe
def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
"""生成入场信号"""
# 基础入场条件:价格接近布林带下轨且成交量放大
entry_condition = (
(dataframe['close'] <= dataframe['bb_lower_1h'] * 1.01) &
(dataframe['volume_spike'] == True)
)
def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
"""生成入场信号 - 基于RL智能体的&-action"""
dataframe.loc[entry_condition, 'enter_long'] = 1
return dataframe
enter_long_conditions = [df["do_predict"] == 1, df["&-action"] == 1]
if enter_long_conditions:
df.loc[
reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
] = (1, "long")
# 目前只支持做多,所以不添加做空逻辑
return df
def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
"""生成出场信号"""
# 基础出场条件:价格接近布林带上轨
exit_condition = (
(dataframe['close'] >= dataframe['bb_upper_1h'] * 0.99)
)
def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
"""生成出场信号 - 基于RL智能体的&-action"""
exit_long_conditions = [df["do_predict"] == 1, df["&-action"] == 2]
if exit_long_conditions:
df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1
dataframe.loc[exit_condition, 'exit_long'] = 1
return dataframe
# 目前只支持做多,所以不添加做空逻辑
return df
def confirm_trade_entry(self, pair, order_type, amount, rate, time_in_force, current_time, entry_tag, side, **kwargs):
"""确认入场交易"""
@ -165,35 +175,3 @@ class FreqaiPrimer(IStrategy):
# 基础出场逻辑
return exit_reason == 'exit_signal'
# 强化学习模型定义
from freqtrade.freqai.prediction_models.ReinforcementLearner import BaseReinforcementLearner
class MyReinforcementLearner(BaseReinforcementLearner):
def calculate_reward(self, trade: 'Trade', dataframe: DataFrame, pair: str, trade_dir: int) -> float:
"""自定义奖励函数,重点识别和应对洗盘行为"""
# 基础收益奖励
profit = trade.calc_profit_ratio()
reward = profit * 100
# 持仓时间奖励/惩罚:避免过早被洗盘出局
hold_duration = (trade.close_date_utc - trade.open_date_utc).total_seconds() / 60
if hold_duration < 15 and profit < 0:
reward -= 5 # 惩罚被洗盘的交易
elif hold_duration > 120 and profit < 0.02:
reward -= 2 # 惩罚长时间低收益交易
# 成交量异常奖励:识别洗盘后的反弹
if 'volume_spike' in dataframe.columns:
entry_idx = dataframe[dataframe['date'] == trade.open_date_utc].index[0]
if entry_idx + 20 < len(dataframe):
post_entry_volume = dataframe.iloc[entry_idx:entry_idx+20]['volume_spike'].sum()
if post_entry_volume > 3 and profit > 0.03:
reward += 8 # 奖励识别并利用洗盘后反弹的交易
# 最大回撤惩罚:控制风险
max_drawdown = trade.max_drawdown
if max_drawdown > 0.05:
reward -= max_drawdown * 200
return reward

View File

@ -65,7 +65,7 @@ START_DATE_RAW=""
END_DATE_RAW=""
PAIRS_ARG=""
PAIR_REMOTE_LIST_URL=""
FREQAI_MODEL="LightGBMRegressorMultiTarget"
FREQAI_MODEL="MyCoolRLModel"
# Parse parameters based on whether we have named parameters
if [ "$HAS_NAMED_PARAMS" = true ]; then