myTestFreqAI/tools/analyze_entry_diagnostics.py

#!/usr/bin/env python3
"""
入场诊断统计分析脚本
分析回测日志中的入场诊断数据，生成统计报告
"""
import re
import sys
from collections import defaultdict

def parse_log_file(log_file):
    """解析日志文件，提取入场诊断数据"""
    entries = []

    with open(log_file, 'r', encoding='utf-8') as f:
        for line in f:
            if '[入场诊断]' not in line:
                continue

            entry = {}

            # 提取币对
            pair_match = re.search(r'\[入场诊断\]\s+(\S+)\s+\|', line)
            if pair_match:
                entry['pair'] = pair_match.group(1)

            # 提取价格
            price_match = re.search(r'价格:\s+([0-9.]+)', line)
            if price_match:
                entry['price'] = float(price_match.group(1))

            # 提取 vs 5K高点
            high_match = re.search(r'vs 5K高点:\s+([+-]?[0-9.]+)%', line)
            if high_match:
                entry['vs_5k_high'] = float(high_match.group(1))

            # 提取 vs EMA5
            ema_match = re.search(r'vs EMA5:\s+([+-]?[0-9.]+)%', line)
            if ema_match:
                entry['vs_ema5'] = float(ema_match.group(1))

            # 提取布林位置
            bb_match = re.search(r'布林位置:\s+([0-9.-]+)', line)
            if bb_match:
                entry['bb_position'] = float(bb_match.group(1))

            # 提取 RSI
            rsi_match = re.search(r'RSI:\s+([0-9.]+)', line)
            if rsi_match:
                entry['rsi'] = float(rsi_match.group(1))

            # 提取 MACD
            macd_match = re.search(r'MACD:\s+(\w+)', line)
            if macd_match:
                entry['macd'] = macd_match.group(1)

            # 提取市场状态
            market_match = re.search(r'市场:\s+(\w+)', line)
            if market_match:
                entry['market_state'] = market_match.group(1)

            # 提取 ML概率
            ml_match = re.search(r'ML概率:\s+([0-9.-]+)', line)
            if ml_match:
                entry['ml_prob'] = float(ml_match.group(1))

            if entry:
                entries.append(entry)

    return entries

def analyze_entries(entries):
    """分析入场数据，生成统计报告"""
    if not entries:
        print("❌ 没有找到入场诊断数据")
        return

    total = len(entries)

    # 统计各项指标
    bb_over_1 = sum(1 for e in entries if e.get('bb_position', 0) > 1.0)
    bb_over_0_8 = sum(1 for e in entries if e.get('bb_position', 0) > 0.8)

    rsi_over_70 = sum(1 for e in entries if e.get('rsi', 0) > 70)
    rsi_over_60 = sum(1 for e in entries if e.get('rsi', 0) > 60)

    high_positive = sum(1 for e in entries if e.get('vs_5k_high', -1) > 0)
    high_near_zero = sum(1 for e in entries if -0.1 <= e.get('vs_5k_high', -1) <= 0)

    ml_zero = sum(1 for e in entries if e.get('ml_prob', -1) == 0.0)
    ml_very_low = sum(1 for e in entries if 0 < e.get('ml_prob', -1) < 0.2)
    ml_low = sum(1 for e in entries if 0.2 <= e.get('ml_prob', -1) < 0.5)
    ml_medium = sum(1 for e in entries if 0.5 <= e.get('ml_prob', -1) < 0.7)
    ml_high = sum(1 for e in entries if e.get('ml_prob', -1) >= 0.7)

    macd_up = sum(1 for e in entries if e.get('macd') == 'up')
    macd_down = sum(1 for e in entries if e.get('macd') == 'down')

    # 市场状态统计
    market_states = defaultdict(int)
    for e in entries:
        if 'market_state' in e:
            market_states[e['market_state']] += 1

    # 币对统计
    pair_counts = defaultdict(int)
    for e in entries:
        if 'pair' in e:
            pair_counts[e['pair']] += 1

    # 打印报告
    print("=" * 80)
    print(f"📊 入场诊断统计报告（共 {total} 条候选入场）")
    print("=" * 80)

    print("\n【1. 布林位置分布】")
    print(f"  布林位置 > 1.0 (布林上轨之上):  {bb_over_1:3d}/{total} ({bb_over_1*100//total:2d}%)")
    print(f"  布林位置 > 0.8 (接近上轨):      {bb_over_0_8:3d}/{total} ({bb_over_0_8*100//total:2d}%)")
    print(f"  布林位置 ≤ 0.8 (相对安全):      {total-bb_over_0_8:3d}/{total} ({(total-bb_over_0_8)*100//total:2d}%)")
    if bb_over_1 > total * 0.1:
        print(f"  ⚠️ 警告：{bb_over_1*100//total}% 的入场在布林上轨之上（追高风险）")

    print("\n【2. RSI 超买分布】")
    print(f"  RSI > 70 (严重超买):            {rsi_over_70:3d}/{total} ({rsi_over_70*100//total:2d}%)")
    print(f"  RSI > 60 (轻度超买):            {rsi_over_60:3d}/{total} ({rsi_over_60*100//total:2d}%)")
    print(f"  RSI ≤ 60 (正常/超卖):           {total-rsi_over_60:3d}/{total} ({(total-rsi_over_60)*100//total:2d}%)")
    if rsi_over_70 > total * 0.2:
        print(f"  ⚠️ 警告：{rsi_over_70*100//total}% 的入场 RSI 超过 70（超买风险）")

    print("\n【3. 短期高点位置】")
    print(f"  vs 5K高点 > 0 (买在高位):      {high_positive:3d}/{total} ({high_positive*100//total:2d}%)")
    print(f"  vs 5K高点 [-0.1%, 0] (接近):   {high_near_zero:3d}/{total} ({high_near_zero*100//total:2d}%)")
    print(f"  vs 5K高点 < -0.1% (回调后):    {total-high_positive-high_near_zero:3d}/{total} ({(total-high_positive-high_near_zero)*100//total:2d}%)")
    if high_positive > total * 0.05:
        print(f"  ⚠️ 警告：{high_positive*100//total}% 的入场买在短期高点之上")

    print("\n【4. ML 置信度分布】")
    print(f"  ML 概率 = 0.00 (完全不看好):   {ml_zero:3d}/{total} ({ml_zero*100//total:2d}%)")
    print(f"  ML 概率 (0, 0.2) (极低):       {ml_very_low:3d}/{total} ({ml_very_low*100//total:2d}%)")
    print(f"  ML 概率 [0.2, 0.5) (低):       {ml_low:3d}/{total} ({ml_low*100//total:2d}%)")
    print(f"  ML 概率 [0.5, 0.7) (中):       {ml_medium:3d}/{total} ({ml_medium*100//total:2d}%)")
    print(f"  ML 概率 >= 0.7 (高):            {ml_high:3d}/{total} ({ml_high*100//total:2d}%)")
    print(f"  ML 概率 < 0.5 小计:             {ml_zero+ml_very_low+ml_low:3d}/{total} ({(ml_zero+ml_very_low+ml_low)*100//total:2d}%)")
    if ml_zero + ml_very_low + ml_low > total * 0.7:
        print(f"  ⚠️ 警告：{(ml_zero+ml_very_low+ml_low)*100//total}% 的候选入场 ML 置信度 < 0.5")

    print("\n【5. MACD 趋势】")
    print(f"  MACD 上升 (up):                 {macd_up:3d}/{total} ({macd_up*100//total:2d}%)")
    print(f"  MACD 下降 (down):               {macd_down:3d}/{total} ({macd_down*100//total:2d}%)")

    print("\n【6. 市场状态分布】")
    for state, count in sorted(market_states.items(), key=lambda x: -x[1]):
        print(f"  {state:15s}:                {count:3d}/{total} ({count*100//total:2d}%)")

    print("\n【7. 币对分布（Top 10）】")
    for pair, count in sorted(pair_counts.items(), key=lambda x: -x[1])[:10]:
        print(f"  {pair:15s}:                {count:3d}/{total} ({count*100//total:2d}%)")

    print("\n" + "=" * 80)
    print("💡 建议：")

    if bb_over_1 > total * 0.1:
        print(f"  • 建议添加布林位置过滤：bb_position <= 0.8")

    if rsi_over_70 > total * 0.2:
        print(f"  • 建议添加 RSI 过滤：rsi <= 65 或 70")

    if high_positive > total * 0.05:
        print(f"  • 建议添加短期高点过滤：vs_5k_high <= 0")

    if ml_zero + ml_very_low + ml_low > total * 0.7:
        print(f"  • ML 审核官可能阈值过低，建议提高 ml_entry_signal_threshold")

    print("=" * 80)

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("用法: python analyze_entry_diagnostics.py <日志文件路径>")
        print("示例: python analyze_entry_diagnostics.py outputs/filted.log")
        sys.exit(1)

    log_file = sys.argv[1]

    try:
        entries = parse_log_file(log_file)
        analyze_entries(entries)
    except FileNotFoundError:
        print(f"❌ 错误：找不到文件 {log_file}")
        sys.exit(1)
    except Exception as e:
        print(f"❌ 错误：{e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)