主时间框架: 15m (用于入场/出场决策)

This commit is contained in:
zhangkun9038@dingtalk.com 2026-02-18 13:14:32 +08:00
parent 34ea6a04eb
commit 10adb63c7f
3 changed files with 64 additions and 156 deletions

View File

@ -13,8 +13,8 @@
"fiat_display_currency": "USD", "fiat_display_currency": "USD",
"dry_run": true, "dry_run": true,
"enable_strategy_log": true, "enable_strategy_log": true,
"timeframe": "3m", "timeframe": "15m",
"additional_timeframes": ["4h"], "additional_timeframes": ["1h"],
"dry_run_wallet": 2000, "dry_run_wallet": 2000,
"cancel_open_orders_on_exit": true, "cancel_open_orders_on_exit": true,
"stoploss": -0.14, "stoploss": -0.14,
@ -102,7 +102,7 @@
}, },
"feature_parameters": { "feature_parameters": {
"include_timeframes": [ "include_timeframes": [
"3m", "5m",
"15m" "15m"
], ],
"include_corr_pairlist": [ "include_corr_pairlist": [

View File

@ -31,7 +31,7 @@ class FreqaiPrimer(IStrategy):
process_only_new_candles = True process_only_new_candles = True
# 时间框架和交易配置 # 时间框架和交易配置
timeframe = "3m" timeframe = "15m"
can_short = False # 只支持做多 can_short = False # 只支持做多
stoploss = -0.15 # 固定止损 -15% stoploss = -0.15 # 固定止损 -15%
@ -134,24 +134,24 @@ class FreqaiPrimer(IStrategy):
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
"""计算用于强化学习的特征""" """计算用于强化学习的特征"""
# 基础技术指标 # 基础技术指标 (适用于15m timeframe)
dataframe['rsi_1h'] = ta.rsi(dataframe['close'], length=14) dataframe['rsi_15m'] = ta.rsi(dataframe['close'], length=14)
# MACD指标 # MACD指标 (适用于15m timeframe)
macd = ta.macd(dataframe['close'], fast=12, slow=26, signal=9) macd = ta.macd(dataframe['close'], fast=12, slow=26, signal=9)
dataframe['macd_1h'] = macd['MACD_12_26_9'] dataframe['macd_15m'] = macd['MACD_12_26_9']
dataframe['macd_signal_1h'] = macd['MACDs_12_26_9'] dataframe['macd_signal_15m'] = macd['MACDs_12_26_9']
# 布林带 # 布林带 (适用于15m timeframe)
bbands = ta.bbands(dataframe['close'], length=20, std=2) bbands = ta.bbands(dataframe['close'], length=20, std=2)
dataframe['bb_upper_1h'] = bbands['BBU_20_2.0'] dataframe['bb_upper_15m'] = bbands['BBU_20_2.0']
dataframe['bb_lower_1h'] = bbands['BBL_20_2.0'] dataframe['bb_lower_15m'] = bbands['BBL_20_2.0']
# 移动平均线 # 移动平均线 (适用于15m timeframe)
dataframe['ema_5_1h'] = ta.ema(dataframe['close'], length=5) dataframe['ema_5_15m'] = ta.ema(dataframe['close'], length=5)
dataframe['ema_20_1h'] = ta.ema(dataframe['close'], length=20) dataframe['ema_20_15m'] = ta.ema(dataframe['close'], length=20)
# 成交量指标 # 成交量指标 (适用于15m timeframe)
dataframe['volume_ma'] = dataframe['volume'].rolling(window=20).mean() dataframe['volume_ma'] = dataframe['volume'].rolling(window=20).mean()
dataframe['volume_ratio'] = dataframe['volume'] / dataframe['volume_ma'] dataframe['volume_ratio'] = dataframe['volume'] / dataframe['volume_ma']
dataframe['volume_spike'] = dataframe['volume_ratio'] > 2.0 dataframe['volume_spike'] = dataframe['volume_ratio'] > 2.0
@ -189,32 +189,3 @@ class FreqaiPrimer(IStrategy):
# 目前只支持做多,所以不添加做空逻辑 # 目前只支持做多,所以不添加做空逻辑
return df return df
def confirm_trade_entry(self, pair, order_type, amount, rate, time_in_force, current_time, entry_tag, side, **kwargs):
"""确认入场交易"""
# 检查FreqAI是否启用
freqai_enabled = self.config.get('freqai', {}).get('enabled', False)
if freqai_enabled:
# 强化学习模式下入场决策主要由RL智能体通过环境交互决定
# 这里可以添加一些基础的过滤条件
df, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe)
if len(df) > 0:
last_row = df.iloc[-1]
# 基础过滤避免在RSI超买时入场
if 'rsi_1h' in last_row and last_row['rsi_1h'] > 70:
return False
return True
def confirm_trade_exit(self, pair, order_type, amount, rate, time_in_force, current_time, exit_reason, side, **kwargs):
"""确认出场交易"""
# 检查FreqAI是否启用
freqai_enabled = self.config.get('freqai', {}).get('enabled', False)
if freqai_enabled:
# 强化学习模式下出场决策主要由RL智能体通过环境交互决定
pass
# 基础出场逻辑
return exit_reason == 'exit_signal'

View File

@ -11,161 +11,98 @@ class MyCoolRLModel(ReinforcementLearner):
""" """
针对高波动资产 ( PEPE) 优化的强化学习模型 针对高波动资产 ( PEPE) 优化的强化学习模型
核心改进 核心改进
1. 移除入场奖励引入开仓成本惩罚 1. 极度简化奖励函数让模型容易学习
2. 引入非线性盈亏奖励鼓励捕捉大趋势 2. 大幅鼓励入场和交易
3. 严厉惩罚浮亏持仓抗单行为 3. 减少对亏损的惩罚让模型敢于尝试
4. 增加对大户操纵的识别和防御机制
""" """
class MyRLEnv(Base5ActionRLEnv): class MyRLEnv(Base5ActionRLEnv):
""" """
自定义环境重写 calculate_reward 以适应动量交易和大户操纵防御 自定义环境重写 calculate_reward 以适应动量交易
""" """
def calculate_reward(self, action: int) -> float: def calculate_reward(self, action: int) -> float:
""" """
奖励函数的示例这是用户可能希望 优化的奖励函数特别处理亏损订单持仓太久的问题
注入自己创意的一个函数
警告
此函数是功能展示旨在展示尽可能多的
环境控制功能它还设计用于在小型计算机上快速运行
这是一个基准** 用于生产环境
:param action: int = 智能体为当前K线做出的动作 :param action: int = 智能体为当前K线做出的动作
:return: :return:
float = 给智能体当前步骤的奖励用于优化 float = 给智能体当前步骤的奖励
神经网络中的权重
""" """
# 首先,如果动作无效,则惩罚 # 首先,如果动作无效,则惩罚
if not self._is_valid(action): if not self._is_valid(action):
self.tensorboard_log("invalid", category="actions") self.tensorboard_log("invalid", category="actions")
return -2.0 return -1.0
# 获取核心状态数据 # 获取核心状态数据
pnl = self.get_unrealized_profit() # 当前浮动盈亏 (百分比,如 0.01 代表 1%) pnl = self.get_unrealized_profit()
# 获取持仓时间 (K线数量) # 获取持仓时间
trade_duration = 0 trade_duration = 0
if self._last_trade_tick is not None: if self._last_trade_tick is not None:
trade_duration = self._current_tick - self._last_trade_tick trade_duration = self._current_tick - self._last_trade_tick
# 读取配置中的最大持仓时间,默认 100 根 K 线
max_trade_duration = self.rl_config.get("max_trade_duration_candles", 100)
# 奖励累加器
reward = 0.0
# ========================================================= # =========================================================
# 场景 A: 决定入场 (Long Enter / Short Enter) # 场景 1: 入场 (Long Enter)
# ========================================================= # =========================================================
if action in (Actions.Long_enter.value, Actions.Short_enter.value): if action == Actions.Long_enter.value and self._position == Positions.Neutral:
if self._position == Positions.Neutral: # 奖励入场,鼓励模型尝试
# 入场给予奖励,鼓励模型尝试 return 0.5
# 从 -0.01 改为 +0.02,让模型更愿意入场
return 0.02
# ========================================================= # =========================================================
# 场景 B: 观望 (Neutral) # 场景 2: 空仓观望 (Neutral) - 重罚!
# ========================================================= # =========================================================
if action == Actions.Neutral.value and self._position == Positions.Neutral: if action == Actions.Neutral.value and self._position == Positions.Neutral:
# 空仓观望给予较重惩罚,强烈鼓励模型寻找机会 # 空仓观望给予重罚,强烈鼓励模型寻找机会
# 从 -0.001 改为 -0.02,避免模型一直观望 return -0.5
return -0.02
# ========================================================= # =========================================================
# 场景 C: 持仓中 (Holding) - 每一根 K 线都会触发 # 场景 3: 持仓中 (Holding)
# ========================================================= # =========================================================
if self._position in (Positions.Short, Positions.Long): if self._position in (Positions.Short, Positions.Long):
# 如果当前动作是继续持有 (Neutral)
if action == Actions.Neutral.value: if action == Actions.Neutral.value:
# 持仓时,综合考虑盈亏和时间
reward = 0.0
# 1. 时间管理:希望持仓,但不要超时 # 1. 盈亏奖励/惩罚
# - 前50%时间:轻微奖励,鼓励持仓捕捉趋势
# - 50%-80%时间:轻微惩罚
# - 超过80%时间:惩罚加重
time_ratio = trade_duration / max_trade_duration
if time_ratio < 0.5:
# 前半段时间:轻微奖励,鼓励持仓
time_reward = 0.005 * (1 - time_ratio * 2)
reward += time_reward
elif time_ratio < 0.8:
# 50%-80%时间:轻微惩罚
time_penalty = -0.01 * ((time_ratio - 0.5) / 0.3)
reward += time_penalty
else:
# 超过80%时间:惩罚加重,提醒模型该离场了
time_penalty = -0.02 * ((time_ratio - 0.8) / 0.2) - 0.01
reward += time_penalty
# 2. 浮动盈亏反馈 (关键!)
if pnl > 0: if pnl > 0:
# 浮盈:给予更强的正反馈,鼓励拿住趋势 # 浮盈:直接奖励
# 从 np.log(1 + pnl) * 0.5 改为 pnl * 2.0 reward += pnl * 10.0
reward += pnl * 2.0
else: else:
# 浮亏:给予惩罚,但不要太严厉 # 浮亏:惩罚,但不要太严厉
# 从 abs(pnl) * 1.0 改为 abs(pnl) * 0.5 reward += pnl * 2.0
reward -= (abs(pnl) * 0.5)
# 3. 止损惩罚加速
# 如果浮亏超过 3%,给予额外的惩罚,但不要太严厉
if pnl < -0.03:
reward -= 0.2 # 从 -0.5 改为 -0.2
# 4. 【新增】检测是否在大户操纵中被套 # 2. 时间惩罚 - 特别针对亏损订单持仓太久
# 如果刚入场不久就出现大幅反向波动,可能是被大户收割 # 如果浮亏且持仓时间较长,增加时间惩罚
if trade_duration < 5 and pnl < -0.01: if pnl < 0 and trade_duration > 10: # 如果亏损且持仓超过10根K线
# 入场后5根K线内就亏损超过1%,很可能是被大户骗了 # 时间惩罚随持仓时间递增
reward -= 0.3 # 从 -1.0 改为 -0.3,不要太严厉 time_penalty = -0.01 * (trade_duration - 10) # 每多持有一根K线多惩罚0.01
reward += time_penalty
# 3. 如果浮亏且持仓时间很长,惩罚加重
if pnl < 0 and trade_duration > 30: # 如果亏损且持仓超过30根K线
heavy_time_penalty = -0.1 * (trade_duration - 30) # 每多持有一根K线多惩罚0.1
reward += heavy_time_penalty
return reward
# ========================================================= # =========================================================
# 场景 D: 离场结算 (Exit) # 场景 4: 离场结算 (Exit)
# ========================================================= # =========================================================
if (action == Actions.Long_exit.value and self._position == Positions.Long) or \ if (action == Actions.Long_exit.value and self._position == Positions.Long) or \
(action == Actions.Short_exit.value and self._position == Positions.Short): (action == Actions.Short_exit.value and self._position == Positions.Short):
# 基础因子 - 增加盈利奖励
factor = 20.0
# 【核心修改 3】非线性结算奖励 - 大幅增加盈利奖励
if pnl > 0: if pnl > 0:
# 盈利:奖励 = PnL * 因子 # 盈利离场:大幅奖励
# 如果这笔交易是大赚 (比如 > 2%),因子翻倍 return pnl * 50.0
if pnl > 0.02:
factor *= 3.0 # 从 2.0 改为 3.0
# 最终奖励
total_reward = pnl * factor
# 额外奖励:如果这笔交易很快就赚了钱(高效率)
if trade_duration < (max_trade_duration * 0.2):
total_reward *= 2.0 # 从 1.5 改为 2.0
# 【新增】奖励快速脱离陷阱
# 如果是因被大户收割而被迫止损,但及时离场,给予一定奖励
# 这鼓励模型快速识别并逃离陷阱
if trade_duration < 5 and pnl < 0 and pnl > -0.02:
# 在5根K线内止损亏损小于2%,说明及时逃离了陷阱
total_reward += 1.0 # 从 0.5 改为 1.0,给予更多奖励
return float(total_reward)
else: else:
# 亏损:惩罚 = PnL * 因子 - 大幅减少亏损惩罚 # 亏损离场:惩罚,但要考虑持仓时间
# 亏损时的惩罚系数通常要比盈利系数大,模拟"损失厌恶" # 如果是长期亏损持仓被迫离场,惩罚稍重
# 这会让模型非常忌惮亏损离场 # 如果是短期止损离场,惩罚较轻
loss_factor = 5.0 # 从 15.0 改为 5.0,大幅减少惩罚 base_penalty = pnl * 5.0
if pnl < 0 and trade_duration > 20: # 长期亏损持仓
# 【新增】区分不同类型的亏损 return base_penalty * 1.5 # 惩罚加重50%
# 如果是被大户收割导致的快速亏损,惩罚可以适当减轻
# 因为这是市场环境问题,而非模型判断错误
if trade_duration < 5 and pnl < -0.02:
# 快速大幅亏损,可能是被大户收割
loss_factor = 3.0 # 从 10.0 改为 3.0,进一步减轻惩罚
else: else:
loss_factor = 5.0 # 从 15.0 改为 5.0 return base_penalty
return float(pnl * loss_factor)
return float(reward) return 0.0