主时间框架: 15m (用于入场/出场决策)
This commit is contained in:
parent
34ea6a04eb
commit
10adb63c7f
@ -13,8 +13,8 @@
|
||||
"fiat_display_currency": "USD",
|
||||
"dry_run": true,
|
||||
"enable_strategy_log": true,
|
||||
"timeframe": "3m",
|
||||
"additional_timeframes": ["4h"],
|
||||
"timeframe": "15m",
|
||||
"additional_timeframes": ["1h"],
|
||||
"dry_run_wallet": 2000,
|
||||
"cancel_open_orders_on_exit": true,
|
||||
"stoploss": -0.14,
|
||||
@ -102,7 +102,7 @@
|
||||
},
|
||||
"feature_parameters": {
|
||||
"include_timeframes": [
|
||||
"3m",
|
||||
"5m",
|
||||
"15m"
|
||||
],
|
||||
"include_corr_pairlist": [
|
||||
|
||||
@ -31,7 +31,7 @@ class FreqaiPrimer(IStrategy):
|
||||
process_only_new_candles = True
|
||||
|
||||
# 时间框架和交易配置
|
||||
timeframe = "3m"
|
||||
timeframe = "15m"
|
||||
can_short = False # 只支持做多
|
||||
|
||||
stoploss = -0.15 # 固定止损 -15%
|
||||
@ -134,24 +134,24 @@ class FreqaiPrimer(IStrategy):
|
||||
|
||||
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
"""计算用于强化学习的特征"""
|
||||
# 基础技术指标
|
||||
dataframe['rsi_1h'] = ta.rsi(dataframe['close'], length=14)
|
||||
# 基础技术指标 (适用于15m timeframe)
|
||||
dataframe['rsi_15m'] = ta.rsi(dataframe['close'], length=14)
|
||||
|
||||
# MACD指标
|
||||
# MACD指标 (适用于15m timeframe)
|
||||
macd = ta.macd(dataframe['close'], fast=12, slow=26, signal=9)
|
||||
dataframe['macd_1h'] = macd['MACD_12_26_9']
|
||||
dataframe['macd_signal_1h'] = macd['MACDs_12_26_9']
|
||||
dataframe['macd_15m'] = macd['MACD_12_26_9']
|
||||
dataframe['macd_signal_15m'] = macd['MACDs_12_26_9']
|
||||
|
||||
# 布林带
|
||||
# 布林带 (适用于15m timeframe)
|
||||
bbands = ta.bbands(dataframe['close'], length=20, std=2)
|
||||
dataframe['bb_upper_1h'] = bbands['BBU_20_2.0']
|
||||
dataframe['bb_lower_1h'] = bbands['BBL_20_2.0']
|
||||
dataframe['bb_upper_15m'] = bbands['BBU_20_2.0']
|
||||
dataframe['bb_lower_15m'] = bbands['BBL_20_2.0']
|
||||
|
||||
# 移动平均线
|
||||
dataframe['ema_5_1h'] = ta.ema(dataframe['close'], length=5)
|
||||
dataframe['ema_20_1h'] = ta.ema(dataframe['close'], length=20)
|
||||
# 移动平均线 (适用于15m timeframe)
|
||||
dataframe['ema_5_15m'] = ta.ema(dataframe['close'], length=5)
|
||||
dataframe['ema_20_15m'] = ta.ema(dataframe['close'], length=20)
|
||||
|
||||
# 成交量指标
|
||||
# 成交量指标 (适用于15m timeframe)
|
||||
dataframe['volume_ma'] = dataframe['volume'].rolling(window=20).mean()
|
||||
dataframe['volume_ratio'] = dataframe['volume'] / dataframe['volume_ma']
|
||||
dataframe['volume_spike'] = dataframe['volume_ratio'] > 2.0
|
||||
@ -189,32 +189,3 @@ class FreqaiPrimer(IStrategy):
|
||||
|
||||
# 目前只支持做多,所以不添加做空逻辑
|
||||
return df
|
||||
|
||||
def confirm_trade_entry(self, pair, order_type, amount, rate, time_in_force, current_time, entry_tag, side, **kwargs):
|
||||
"""确认入场交易"""
|
||||
# 检查FreqAI是否启用
|
||||
freqai_enabled = self.config.get('freqai', {}).get('enabled', False)
|
||||
|
||||
if freqai_enabled:
|
||||
# 强化学习模式下,入场决策主要由RL智能体通过环境交互决定
|
||||
# 这里可以添加一些基础的过滤条件
|
||||
df, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe)
|
||||
if len(df) > 0:
|
||||
last_row = df.iloc[-1]
|
||||
# 基础过滤:避免在RSI超买时入场
|
||||
if 'rsi_1h' in last_row and last_row['rsi_1h'] > 70:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def confirm_trade_exit(self, pair, order_type, amount, rate, time_in_force, current_time, exit_reason, side, **kwargs):
|
||||
"""确认出场交易"""
|
||||
# 检查FreqAI是否启用
|
||||
freqai_enabled = self.config.get('freqai', {}).get('enabled', False)
|
||||
|
||||
if freqai_enabled:
|
||||
# 强化学习模式下,出场决策主要由RL智能体通过环境交互决定
|
||||
pass
|
||||
|
||||
# 基础出场逻辑
|
||||
return exit_reason == 'exit_signal'
|
||||
|
||||
@ -11,161 +11,98 @@ class MyCoolRLModel(ReinforcementLearner):
|
||||
"""
|
||||
针对高波动资产 (如 PEPE) 优化的强化学习模型。
|
||||
核心改进:
|
||||
1. 移除入场奖励,引入开仓成本惩罚。
|
||||
2. 引入非线性盈亏奖励,鼓励捕捉大趋势。
|
||||
3. 严厉惩罚浮亏持仓(抗单行为)。
|
||||
4. 增加对大户操纵的识别和防御机制。
|
||||
1. 极度简化奖励函数,让模型容易学习
|
||||
2. 大幅鼓励入场和交易
|
||||
3. 减少对亏损的惩罚,让模型敢于尝试
|
||||
"""
|
||||
|
||||
class MyRLEnv(Base5ActionRLEnv):
|
||||
"""
|
||||
自定义环境,重写 calculate_reward 以适应动量交易和大户操纵防御。
|
||||
自定义环境,重写 calculate_reward 以适应动量交易。
|
||||
"""
|
||||
|
||||
def calculate_reward(self, action: int) -> float:
|
||||
"""
|
||||
奖励函数的示例。这是用户可能希望
|
||||
注入自己创意的一个函数。
|
||||
|
||||
警告!
|
||||
此函数是功能展示,旨在展示尽可能多的
|
||||
环境控制功能。它还设计用于在小型计算机上快速运行。
|
||||
这是一个基准,*不* 用于生产环境。
|
||||
优化的奖励函数,特别处理亏损订单持仓太久的问题。
|
||||
|
||||
:param action: int = 智能体为当前K线做出的动作。
|
||||
:return:
|
||||
float = 给智能体当前步骤的奖励(用于优化
|
||||
神经网络中的权重)
|
||||
float = 给智能体当前步骤的奖励
|
||||
"""
|
||||
# 首先,如果动作无效,则惩罚
|
||||
if not self._is_valid(action):
|
||||
self.tensorboard_log("invalid", category="actions")
|
||||
return -2.0
|
||||
return -1.0
|
||||
|
||||
# 获取核心状态数据
|
||||
pnl = self.get_unrealized_profit() # 当前浮动盈亏 (百分比,如 0.01 代表 1%)
|
||||
pnl = self.get_unrealized_profit()
|
||||
|
||||
# 获取持仓时间 (K线数量)
|
||||
# 获取持仓时间
|
||||
trade_duration = 0
|
||||
if self._last_trade_tick is not None:
|
||||
trade_duration = self._current_tick - self._last_trade_tick
|
||||
|
||||
# 读取配置中的最大持仓时间,默认 100 根 K 线
|
||||
max_trade_duration = self.rl_config.get("max_trade_duration_candles", 100)
|
||||
|
||||
# 奖励累加器
|
||||
reward = 0.0
|
||||
|
||||
# =========================================================
|
||||
# 场景 A: 决定入场 (Long Enter / Short Enter)
|
||||
# 场景 1: 入场 (Long Enter)
|
||||
# =========================================================
|
||||
if action in (Actions.Long_enter.value, Actions.Short_enter.value):
|
||||
if self._position == Positions.Neutral:
|
||||
# 入场给予奖励,鼓励模型尝试
|
||||
# 从 -0.01 改为 +0.02,让模型更愿意入场
|
||||
return 0.02
|
||||
if action == Actions.Long_enter.value and self._position == Positions.Neutral:
|
||||
# 奖励入场,鼓励模型尝试
|
||||
return 0.5
|
||||
|
||||
# =========================================================
|
||||
# 场景 B: 观望 (Neutral)
|
||||
# 场景 2: 空仓观望 (Neutral) - 重罚!
|
||||
# =========================================================
|
||||
if action == Actions.Neutral.value and self._position == Positions.Neutral:
|
||||
# 空仓观望给予较重惩罚,强烈鼓励模型寻找机会
|
||||
# 从 -0.001 改为 -0.02,避免模型一直观望
|
||||
return -0.02
|
||||
# 空仓观望给予重罚,强烈鼓励模型寻找机会
|
||||
return -0.5
|
||||
|
||||
# =========================================================
|
||||
# 场景 C: 持仓中 (Holding) - 每一根 K 线都会触发
|
||||
# 场景 3: 持仓中 (Holding)
|
||||
# =========================================================
|
||||
if self._position in (Positions.Short, Positions.Long):
|
||||
# 如果当前动作是继续持有 (Neutral)
|
||||
if action == Actions.Neutral.value:
|
||||
# 持仓时,综合考虑盈亏和时间
|
||||
reward = 0.0
|
||||
|
||||
# 1. 时间管理:希望持仓,但不要超时
|
||||
# - 前50%时间:轻微奖励,鼓励持仓捕捉趋势
|
||||
# - 50%-80%时间:轻微惩罚
|
||||
# - 超过80%时间:惩罚加重
|
||||
time_ratio = trade_duration / max_trade_duration
|
||||
if time_ratio < 0.5:
|
||||
# 前半段时间:轻微奖励,鼓励持仓
|
||||
time_reward = 0.005 * (1 - time_ratio * 2)
|
||||
reward += time_reward
|
||||
elif time_ratio < 0.8:
|
||||
# 50%-80%时间:轻微惩罚
|
||||
time_penalty = -0.01 * ((time_ratio - 0.5) / 0.3)
|
||||
reward += time_penalty
|
||||
else:
|
||||
# 超过80%时间:惩罚加重,提醒模型该离场了
|
||||
time_penalty = -0.02 * ((time_ratio - 0.8) / 0.2) - 0.01
|
||||
reward += time_penalty
|
||||
|
||||
# 2. 浮动盈亏反馈 (关键!)
|
||||
# 1. 盈亏奖励/惩罚
|
||||
if pnl > 0:
|
||||
# 浮盈:给予更强的正反馈,鼓励拿住趋势
|
||||
# 从 np.log(1 + pnl) * 0.5 改为 pnl * 2.0
|
||||
reward += pnl * 2.0
|
||||
# 浮盈:直接奖励
|
||||
reward += pnl * 10.0
|
||||
else:
|
||||
# 浮亏:给予惩罚,但不要太严厉
|
||||
# 从 abs(pnl) * 1.0 改为 abs(pnl) * 0.5
|
||||
reward -= (abs(pnl) * 0.5)
|
||||
|
||||
# 3. 止损惩罚加速
|
||||
# 如果浮亏超过 3%,给予额外的惩罚,但不要太严厉
|
||||
if pnl < -0.03:
|
||||
reward -= 0.2 # 从 -0.5 改为 -0.2
|
||||
# 浮亏:惩罚,但不要太严厉
|
||||
reward += pnl * 2.0
|
||||
|
||||
# 4. 【新增】检测是否在大户操纵中被套
|
||||
# 如果刚入场不久就出现大幅反向波动,可能是被大户收割
|
||||
if trade_duration < 5 and pnl < -0.01:
|
||||
# 入场后5根K线内就亏损超过1%,很可能是被大户骗了
|
||||
reward -= 0.3 # 从 -1.0 改为 -0.3,不要太严厉
|
||||
# 2. 时间惩罚 - 特别针对亏损订单持仓太久
|
||||
# 如果浮亏且持仓时间较长,增加时间惩罚
|
||||
if pnl < 0 and trade_duration > 10: # 如果亏损且持仓超过10根K线
|
||||
# 时间惩罚随持仓时间递增
|
||||
time_penalty = -0.01 * (trade_duration - 10) # 每多持有一根K线,多惩罚0.01
|
||||
reward += time_penalty
|
||||
|
||||
# 3. 如果浮亏且持仓时间很长,惩罚加重
|
||||
if pnl < 0 and trade_duration > 30: # 如果亏损且持仓超过30根K线
|
||||
heavy_time_penalty = -0.1 * (trade_duration - 30) # 每多持有一根K线,多惩罚0.1
|
||||
reward += heavy_time_penalty
|
||||
|
||||
return reward
|
||||
|
||||
# =========================================================
|
||||
# 场景 D: 离场结算 (Exit)
|
||||
# 场景 4: 离场结算 (Exit)
|
||||
# =========================================================
|
||||
if (action == Actions.Long_exit.value and self._position == Positions.Long) or \
|
||||
(action == Actions.Short_exit.value and self._position == Positions.Short):
|
||||
|
||||
# 基础因子 - 增加盈利奖励
|
||||
factor = 20.0
|
||||
|
||||
# 【核心修改 3】非线性结算奖励 - 大幅增加盈利奖励
|
||||
if pnl > 0:
|
||||
# 盈利:奖励 = PnL * 因子
|
||||
# 如果这笔交易是大赚 (比如 > 2%),因子翻倍
|
||||
if pnl > 0.02:
|
||||
factor *= 3.0 # 从 2.0 改为 3.0
|
||||
|
||||
# 最终奖励
|
||||
total_reward = pnl * factor
|
||||
|
||||
# 额外奖励:如果这笔交易很快就赚了钱(高效率)
|
||||
if trade_duration < (max_trade_duration * 0.2):
|
||||
total_reward *= 2.0 # 从 1.5 改为 2.0
|
||||
|
||||
# 【新增】奖励快速脱离陷阱
|
||||
# 如果是因被大户收割而被迫止损,但及时离场,给予一定奖励
|
||||
# 这鼓励模型快速识别并逃离陷阱
|
||||
if trade_duration < 5 and pnl < 0 and pnl > -0.02:
|
||||
# 在5根K线内止损,亏损小于2%,说明及时逃离了陷阱
|
||||
total_reward += 1.0 # 从 0.5 改为 1.0,给予更多奖励
|
||||
|
||||
return float(total_reward)
|
||||
|
||||
# 盈利离场:大幅奖励
|
||||
return pnl * 50.0
|
||||
else:
|
||||
# 亏损:惩罚 = PnL * 因子 - 大幅减少亏损惩罚
|
||||
# 亏损时的惩罚系数通常要比盈利系数大,模拟"损失厌恶"
|
||||
# 这会让模型非常忌惮亏损离场
|
||||
loss_factor = 5.0 # 从 15.0 改为 5.0,大幅减少惩罚
|
||||
|
||||
# 【新增】区分不同类型的亏损
|
||||
# 如果是被大户收割导致的快速亏损,惩罚可以适当减轻
|
||||
# 因为这是市场环境问题,而非模型判断错误
|
||||
if trade_duration < 5 and pnl < -0.02:
|
||||
# 快速大幅亏损,可能是被大户收割
|
||||
loss_factor = 3.0 # 从 10.0 改为 3.0,进一步减轻惩罚
|
||||
# 亏损离场:惩罚,但要考虑持仓时间
|
||||
# 如果是长期亏损持仓被迫离场,惩罚稍重
|
||||
# 如果是短期止损离场,惩罚较轻
|
||||
base_penalty = pnl * 5.0
|
||||
if pnl < 0 and trade_duration > 20: # 长期亏损持仓
|
||||
return base_penalty * 1.5 # 惩罚加重50%
|
||||
else:
|
||||
loss_factor = 5.0 # 从 15.0 改为 5.0
|
||||
|
||||
return float(pnl * loss_factor)
|
||||
return base_penalty
|
||||
|
||||
return float(reward)
|
||||
return 0.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user