rl 的第一次

2026-02-18 01:43:03 +08:00 · 2026-02-18 01:43:03 +08:00 · 6ea3425986
commit 6ea3425986
parent 9b29cc6597
3 changed files with 49 additions and 66 deletions
--- a/config_examples/freqaiprimer.json
+++ b/config_examples/freqaiprimer.json
@ -73,11 +73,25 @@
        "data_kitchen": {
            "fillna": "ffill"
        },
-        "freqaimodel": "LightGBMMultiTargetRegressor",
+        "freqaimodel": "MyCoolRLModel",
        "purge_old_models": 2,
        "train_period_days": 12,
        "backtest_period_days": 2,
        "live_retrain_hours": 2,
+        "continual_learning": false,
+        "rl_config": {
+            "train_cycles": 50,
+            "add_state_info": true,
+            "max_trade_duration_candles": 300,
+            "max_training_drawdown_pct": 0.02,
+            "cpu_count": 16,
+            "model_type": "PPO",
+            "policy_type": "MlpPolicy",
+            "model_reward_parameters": {
+                "rr": 1,
+                "profit_aim": 0.025
+            }
+        },
        "outlier_detection": {
            "method": "IsolationForest",
            "contamination": 0.1
@ -114,17 +128,8 @@
              "shuffle": false
        },
        "model_training_parameters": {
-            "n_estimators": 600,
-            "learning_rate": 0.02,
-            "num_leaves": 60,
-            "max_depth": 12,
-            "min_data_in_leaf": 15,
-            "feature_fraction": 0.8,
-            "bagging_fraction": 0.8,
-            "bagging_freq": 5,
-            "verbose": -1
-        },
-        "ml_prediction_api_url": "http://pairlist.xl.home/api/mlprediction"
+            "verbose": 1
+        }
    },
    "api_server": {
        "enabled": true,
--- a/freqtrade/templates/freqaiprimer.py
+++ b/freqtrade/templates/freqaiprimer.py
@ -9,6 +9,7 @@ import pandas_ta as ta
 from freqtrade.persistence import Trade
 import numpy as np
 from datetime import datetime, timezone, timedelta
+from functools import reduce

 logger = logging.getLogger(__name__)

@ -29,6 +30,10 @@ class FreqaiPrimer(IStrategy):
    # FreqAI 要求
    process_only_new_candles = True
    
+    # 时间框架和交易配置
+    timeframe = "3m"
+    can_short = False  # 只支持做多
+    
    stoploss = -0.15  # 固定止损 -15%
    trailing_stop = True
    trailing_stop_positive_offset = 0.005  # 追踪止损偏移量 0.5%
@ -67,6 +72,12 @@ class FreqaiPrimer(IStrategy):
    
    def feature_engineering_standard(self, dataframe: DataFrame, metadata: dict, **kwargs) -> DataFrame:
        """标准时间类特征"""
+        # 以下特征对于RL模型是必需的
+        dataframe[f"%-raw_close"] = dataframe["close"]
+        dataframe[f"%-raw_open"] = dataframe["open"]
+        dataframe[f"%-raw_high"] = dataframe["high"]
+        dataframe[f"%-raw_low"] = dataframe["low"]
+        # 标准时间类特征
        if "date" in dataframe.columns:
            dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek
            dataframe["%-hour_of_day"] = dataframe["date"].dt.hour
@ -74,10 +85,8 @@ class FreqaiPrimer(IStrategy):
    
    def set_freqai_targets(self, dataframe: DataFrame, metadata: dict, **kwargs) -> DataFrame:
        """设置FreqAI目标变量（强化学习不需要传统标签，但需要保持兼容）"""
-        # 为了保持与FreqAI框架的兼容性，添加默认目标列
-        dataframe["&s-entry_signal"] = 0
-        dataframe["&s-exit_signal"] = 0
-        dataframe["&s-future_volatility"] = 0
+        # 对于RL，没有直接的目标要设置。这是填充（中性）值，直到智能体发送动作。
+        dataframe["&-action"] = 0
        return dataframe
    
    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
@ -116,26 +125,27 @@ class FreqaiPrimer(IStrategy):
        
        return dataframe
    
-    def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-        """生成入场信号"""
-        # 基础入场条件：价格接近布林带下轨且成交量放大
-        entry_condition = (
-            (dataframe['close'] <= dataframe['bb_lower_1h'] * 1.01) &
-            (dataframe['volume_spike'] == True)
-        )
+    def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
+        """生成入场信号 - 基于RL智能体的&-action"""
        
-        dataframe.loc[entry_condition, 'enter_long'] = 1
-        return dataframe
+        enter_long_conditions = [df["do_predict"] == 1, df["&-action"] == 1]
+        
+        if enter_long_conditions:
+            df.loc[
+                reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
+            ] = (1, "long")
+        
+        # 目前只支持做多，所以不添加做空逻辑
+        return df
    
-    def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-        """生成出场信号"""
-        # 基础出场条件：价格接近布林带上轨
-        exit_condition = (
-            (dataframe['close'] >= dataframe['bb_upper_1h'] * 0.99)
-        )
+    def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
+        """生成出场信号 - 基于RL智能体的&-action"""
+        exit_long_conditions = [df["do_predict"] == 1, df["&-action"] == 2]
+        if exit_long_conditions:
+            df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1
        
-        dataframe.loc[exit_condition, 'exit_long'] = 1
-        return dataframe
+        # 目前只支持做多，所以不添加做空逻辑
+        return df
    
    def confirm_trade_entry(self, pair, order_type, amount, rate, time_in_force, current_time, entry_tag, side, **kwargs):
        """确认入场交易"""
@ -165,35 +175,3 @@ class FreqaiPrimer(IStrategy):
        
        # 基础出场逻辑
        return exit_reason == 'exit_signal'
-
-# 强化学习模型定义
-from freqtrade.freqai.prediction_models.ReinforcementLearner import BaseReinforcementLearner
-
-class MyReinforcementLearner(BaseReinforcementLearner):
-    def calculate_reward(self, trade: 'Trade', dataframe: DataFrame, pair: str, trade_dir: int) -> float:
-        """自定义奖励函数，重点识别和应对洗盘行为"""
-        # 基础收益奖励
-        profit = trade.calc_profit_ratio()
-        reward = profit * 100
-        
-        # 持仓时间奖励/惩罚：避免过早被洗盘出局
-        hold_duration = (trade.close_date_utc - trade.open_date_utc).total_seconds() / 60
-        if hold_duration < 15 and profit < 0:
-            reward -= 5  # 惩罚被洗盘的交易
-        elif hold_duration > 120 and profit < 0.02:
-            reward -= 2  # 惩罚长时间低收益交易
-        
-        # 成交量异常奖励：识别洗盘后的反弹
-        if 'volume_spike' in dataframe.columns:
-            entry_idx = dataframe[dataframe['date'] == trade.open_date_utc].index[0]
-            if entry_idx + 20 < len(dataframe):
-                post_entry_volume = dataframe.iloc[entry_idx:entry_idx+20]['volume_spike'].sum()
-                if post_entry_volume > 3 and profit > 0.03:
-                    reward += 8  # 奖励识别并利用洗盘后反弹的交易
-        
-        # 最大回撤惩罚：控制风险
-        max_drawdown = trade.max_drawdown
-        if max_drawdown > 0.05:
-            reward -= max_drawdown * 200
-        
-        return reward
--- a/tools/backtest.sh
+++ b/tools/backtest.sh
@ -65,7 +65,7 @@ START_DATE_RAW=""
 END_DATE_RAW=""
 PAIRS_ARG=""
 PAIR_REMOTE_LIST_URL=""
-FREQAI_MODEL="LightGBMRegressorMultiTarget"
+FREQAI_MODEL="MyCoolRLModel"

 # Parse parameters based on whether we have named parameters
 if [ "$HAS_NAMED_PARAMS" = true ]; then