散户生存之道;鼓励入场,惩罚长时间持有+1
This commit is contained in:
parent
40c03d39f8
commit
2839ea9ad1
@ -77,7 +77,7 @@
|
||||
"purge_old_models": 2,
|
||||
"train_period_days": 30,
|
||||
"backtest_period_days": 7,
|
||||
"identifier": "rl-memo",
|
||||
"identifier": "",
|
||||
"live_retrain_hours": 6,
|
||||
"continual_learning": false,
|
||||
"rl_config": {
|
||||
|
||||
@ -61,17 +61,17 @@ class MyCoolRLModel(ReinforcementLearner):
|
||||
# =========================================================
|
||||
if action in (Actions.Long_enter.value, Actions.Short_enter.value):
|
||||
if self._position == Positions.Neutral:
|
||||
# 入场给予轻微惩罚,避免过度交易,但不要太重
|
||||
# 从 -0.05 改为 -0.01,让模型更愿意尝试入场
|
||||
return -0.01
|
||||
# 入场给予奖励,鼓励模型尝试
|
||||
# 从 -0.01 改为 +0.02,让模型更愿意入场
|
||||
return 0.02
|
||||
|
||||
# =========================================================
|
||||
# 场景 B: 观望 (Neutral)
|
||||
# =========================================================
|
||||
if action == Actions.Neutral.value and self._position == Positions.Neutral:
|
||||
# 空仓观望给予轻微惩罚,鼓励模型寻找机会
|
||||
# 从 0 改为 -0.001,避免模型一直观望
|
||||
return -0.001
|
||||
# 空仓观望给予较重惩罚,强烈鼓励模型寻找机会
|
||||
# 从 -0.001 改为 -0.02,避免模型一直观望
|
||||
return -0.02
|
||||
|
||||
# =========================================================
|
||||
# 场景 C: 持仓中 (Holding) - 每一根 K 线都会触发
|
||||
@ -100,24 +100,24 @@ class MyCoolRLModel(ReinforcementLearner):
|
||||
|
||||
# 2. 浮动盈亏反馈 (关键!)
|
||||
if pnl > 0:
|
||||
# 浮盈:给予微弱的正反馈,鼓励拿住趋势
|
||||
# 使用 log 函数让奖励增长平缓,避免模型过于贪婪而不止盈
|
||||
reward += np.log(1 + pnl) * 0.5
|
||||
# 浮盈:给予更强的正反馈,鼓励拿住趋势
|
||||
# 从 np.log(1 + pnl) * 0.5 改为 pnl * 2.0
|
||||
reward += pnl * 2.0
|
||||
else:
|
||||
# 浮亏:给予惩罚,但不要太严厉
|
||||
# 从 abs(pnl) * 2.0 改为 abs(pnl) * 1.0
|
||||
reward -= (abs(pnl) * 1.0)
|
||||
# 从 abs(pnl) * 1.0 改为 abs(pnl) * 0.5
|
||||
reward -= (abs(pnl) * 0.5)
|
||||
|
||||
# 3. 止损惩罚加速
|
||||
# 如果浮亏超过 3%,给予额外的惩罚,但不要太严厉
|
||||
if pnl < -0.03:
|
||||
reward -= 0.5 # 从 -1.0 改为 -0.5
|
||||
reward -= 0.2 # 从 -0.5 改为 -0.2
|
||||
|
||||
# 4. 【新增】检测是否在大户操纵中被套
|
||||
# 如果刚入场不久就出现大幅反向波动,可能是被大户收割
|
||||
if trade_duration < 5 and pnl < -0.01:
|
||||
# 入场后5根K线内就亏损超过1%,很可能是被大户骗了
|
||||
reward -= 1.0 # 从 -3.0 改为 -1.0,不要太严厉
|
||||
reward -= 0.3 # 从 -1.0 改为 -0.3,不要太严厉
|
||||
|
||||
# =========================================================
|
||||
# 场景 D: 离场结算 (Exit)
|
||||
@ -125,46 +125,46 @@ class MyCoolRLModel(ReinforcementLearner):
|
||||
if (action == Actions.Long_exit.value and self._position == Positions.Long) or \
|
||||
(action == Actions.Short_exit.value and self._position == Positions.Short):
|
||||
|
||||
# 基础因子
|
||||
factor = 10.0
|
||||
# 基础因子 - 增加盈利奖励
|
||||
factor = 20.0
|
||||
|
||||
# 【核心修改 3】非线性结算奖励
|
||||
# 【核心修改 3】非线性结算奖励 - 大幅增加盈利奖励
|
||||
if pnl > 0:
|
||||
# 盈利:奖励 = PnL * 因子
|
||||
# 如果这笔交易是大赚 (比如 > 2%),因子翻倍
|
||||
if pnl > 0.02:
|
||||
factor *= 2.0
|
||||
factor *= 3.0 # 从 2.0 改为 3.0
|
||||
|
||||
# 最终奖励
|
||||
total_reward = pnl * factor
|
||||
|
||||
# 额外奖励:如果这笔交易很快就赚了钱(高效率)
|
||||
if trade_duration < (max_trade_duration * 0.2):
|
||||
total_reward *= 1.5
|
||||
total_reward *= 2.0 # 从 1.5 改为 2.0
|
||||
|
||||
# 【新增】奖励快速脱离陷阱
|
||||
# 如果是因被大户收割而被迫止损,但及时离场,给予一定奖励
|
||||
# 这鼓励模型快速识别并逃离陷阱
|
||||
if trade_duration < 5 and pnl < 0 and pnl > -0.02:
|
||||
# 在5根K线内止损,亏损小于2%,说明及时逃离了陷阱
|
||||
total_reward += 0.5 # 给予小奖励,奖励快速止损
|
||||
total_reward += 1.0 # 从 0.5 改为 1.0,给予更多奖励
|
||||
|
||||
return float(total_reward)
|
||||
|
||||
else:
|
||||
# 亏损:惩罚 = PnL * 因子
|
||||
# 亏损:惩罚 = PnL * 因子 - 大幅减少亏损惩罚
|
||||
# 亏损时的惩罚系数通常要比盈利系数大,模拟"损失厌恶"
|
||||
# 这会让模型非常忌惮亏损离场
|
||||
loss_factor = 15.0
|
||||
loss_factor = 5.0 # 从 15.0 改为 5.0,大幅减少惩罚
|
||||
|
||||
# 【新增】区分不同类型的亏损
|
||||
# 如果是被大户收割导致的快速亏损,惩罚可以适当减轻
|
||||
# 因为这是市场环境问题,而非模型判断错误
|
||||
if trade_duration < 5 and pnl < -0.02:
|
||||
# 快速大幅亏损,可能是被大户收割
|
||||
loss_factor = 10.0 # 减轻惩罚,因为这是市场陷阱
|
||||
loss_factor = 3.0 # 从 10.0 改为 3.0,进一步减轻惩罚
|
||||
else:
|
||||
loss_factor = 15.0 # 正常惩罚
|
||||
loss_factor = 5.0 # 从 15.0 改为 5.0
|
||||
|
||||
return float(pnl * loss_factor)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user