myTestFreqAI/tools/analyze_threshold_convergence.py

#!/usr/bin/env python3
"""
阈值收敛分析工具
从 Elasticsearch 读取阈值调整历史，分析收敛趋势
"""

import requests
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any
from collections import defaultdict

# ES 配置
ES_URL = "http://elastic.k8s.xunlang.home/"
ES_USERNAME = "elastic"
ES_PASSWORD = "your_secure_password"
ES_INDEX_PREFIX = "freqai.livelog"


def query_threshold_history(
    pair: str = None,
    label_name: str = "&s-entry_signal",
    hours: int = 24
) -> List[Dict[str, Any]]:
    """
    查询阈值调整历史

    Args:
        pair: 币对名称（None 表示所有币对）
        label_name: 标签名称
        hours: 查询最近 N 小时的数据

    Returns:
        阈值调整记录列表（按时间升序）
    """
    # 获取当前年月
    now = datetime.utcnow()
    year_month = now.strftime('%Y.%m')
    index_name = f"{ES_INDEX_PREFIX}.threshold_adjustment-{year_month}"

    # 构建查询
    must_clauses = [
        {"term": {"label_name": label_name}},
        {
            "range": {
                "@timestamp": {
                    "gte": f"now-{hours}h",
                    "lte": "now"
                }
            }
        }
    ]

    if pair:
        must_clauses.append({"term": {"pair.keyword": pair}})

    query = {
        "query": {
            "bool": {
                "must": must_clauses
            }
        },
        "sort": [{"@timestamp": {"order": "asc"}}],
        "size": 10000  # 最多返回 10000 条记录
    }

    try:
        response = requests.post(
            f"{ES_URL}{index_name}/_search",
            auth=(ES_USERNAME, ES_PASSWORD),
            headers={"Content-Type": "application/json"},
            data=json.dumps(query),
            timeout=10
        )

        if response.status_code == 200:
            result = response.json()
            records = []
            for hit in result['hits']['hits']:
                source = hit['_source']
                records.append(source)
            return records
        else:
            print(f"❌ ES 查询失败: {response.status_code}")
            return []

    except Exception as e:
        print(f"❌ ES 查询异常: {e}")
        return []


def analyze_convergence(records: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    分析阈值收敛情况

    Returns:
        分析结果：
        - total_adjustments: 总调整次数
        - threshold_trend: 阈值变化趋势
        - positive_ratio_trend: 正样本比例变化趋势
        - convergence_rate: 收敛速度
    """
    if not records:
        return {}

    # 按币对分组
    by_pair = defaultdict(list)
    for record in records:
        by_pair[record['pair']].append(record)

    analysis = {}

    for pair, pair_records in by_pair.items():
        # 提取时间序列数据
        timestamps = [datetime.fromisoformat(r['@timestamp']) for r in pair_records]
        thresholds = [r['threshold']['new'] for r in pair_records]
        positive_ratios = [r['positive_ratio']['current'] for r in pair_records]

        # 计算阈值变化速度（每小时）
        if len(timestamps) >= 2:
            time_span_hours = (timestamps[-1] - timestamps[0]).total_seconds() / 3600
            threshold_change_rate = (thresholds[-1] - thresholds[0]) / time_span_hours if time_span_hours > 0 else 0

            # 计算正样本比例变化速度（每小时）
            ratio_change_rate = (positive_ratios[-1] - positive_ratios[0]) / time_span_hours if time_span_hours > 0 else 0
        else:
            threshold_change_rate = 0
            ratio_change_rate = 0

        # 计算收敛程度（最后 5 次调整的标准差）
        recent_thresholds = thresholds[-5:]
        threshold_volatility = sum((t - sum(recent_thresholds)/len(recent_thresholds))**2 for t in recent_thresholds) / len(recent_thresholds) if len(recent_thresholds) > 1 else 0

        # 判断是否收敛（最后一次是否在理想范围）
        converged = pair_records[-1]['convergence']['in_ideal_range']

        analysis[pair] = {
            'total_adjustments': len(pair_records),
            'first_threshold': thresholds[0],
            'last_threshold': thresholds[-1],
            'threshold_change': thresholds[-1] - thresholds[0],
            'threshold_change_rate_per_hour': round(threshold_change_rate, 4),
            'first_positive_ratio': positive_ratios[0],
            'last_positive_ratio': positive_ratios[-1],
            'positive_ratio_change': positive_ratios[-1] - positive_ratios[0],
            'ratio_change_rate_per_hour': round(ratio_change_rate, 4),
            'threshold_volatility': round(threshold_volatility, 6),
            'converged': converged,
            'distance_to_ideal': pair_records[-1]['convergence']['distance_to_ideal']
        }

    return analysis


def print_analysis_report(analysis: Dict[str, Any]) -> None:
    """打印分析报告"""
    print("=" * 80)
    print(f"阈值收敛分析报告")
    print(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 80)
    print()

    if not analysis:
        print("⚠️ 未找到数据")
        return

    # 统计概览
    total_pairs = len(analysis)
    converged_pairs = sum(1 for v in analysis.values() if v['converged'])

    print(f"📊 概览")
    print(f"  总币对数: {total_pairs}")
    print(f"  已收敛: {converged_pairs} ({converged_pairs/total_pairs*100:.1f}%)")
    print()

    # 详细分析
    print(f"📈 详细分析")
    print("-" * 80)

    for pair, data in sorted(analysis.items()):
        print(f"\n🪙 {pair}")
        print(f"  调整次数: {data['total_adjustments']}")
        print(f"  阈值变化: {data['first_threshold']:.4f} → {data['last_threshold']:.4f} ({data['threshold_change']:+.4f})")
        print(f"  阈值变化速度: {data['threshold_change_rate_per_hour']:+.4f} / 小时")
        print(f"  正样本比例: {data['first_positive_ratio']:.4f} → {data['last_positive_ratio']:.4f} ({data['positive_ratio_change']:+.4f})")
        print(f"  比例变化速度: {data['ratio_change_rate_per_hour']:+.4f} / 小时")
        print(f"  阈值波动性: {data['threshold_volatility']:.6f}")
        print(f"  收敛状态: {'✅ 已收敛' if data['converged'] else f'⚠️ 未收敛（距离理想: {data["distance_to_ideal"]:.4f}）'}")

    print()
    print("=" * 80)

    # 生成建议
    print("📝 建议:")
    print()

    # 检查是否有币对长期不收敛
    slow_converge = [pair for pair, data in analysis.items()
                     if not data['converged'] and data['total_adjustments'] > 10]

    if slow_converge:
        print(f"⚠️ 以下币对调整次数过多但仍未收敛，建议检查：")
        for pair in slow_converge:
            print(f"    - {pair}")

    # 检查是否有阈值震荡
    oscillating = [pair for pair, data in analysis.items()
                   if data['threshold_volatility'] > 0.001]

    if oscillating:
        print(f"\n⚠️ 以下币对阈值震荡较大，建议增加防震荡限制：")
        for pair in oscillating:
            print(f"    - {pair} (波动性: {analysis[pair]['threshold_volatility']:.6f})")

    # 检查速度关系
    print(f"\n📊 正样本比例变化速度 vs 阈值变化速度：")
    for pair, data in sorted(analysis.items(), key=lambda x: abs(x[1]['ratio_change_rate_per_hour']), reverse=True)[:5]:
        ratio = abs(data['ratio_change_rate_per_hour']) / abs(data['threshold_change_rate_per_hour']) if abs(data['threshold_change_rate_per_hour']) > 1e-6 else 0
        print(f"  {pair}: 比例速度 {data['ratio_change_rate_per_hour']:+.4f} / 阈值速度 {data['threshold_change_rate_per_hour']:+.4f} = {ratio:.2f}x")

    print()
    print("=" * 80)


def main():
    """主函数"""
    import argparse

    parser = argparse.ArgumentParser(description='阈值收敛分析工具')
    parser.add_argument('--pair', type=str, help='币对名称（留空表示所有币对）')
    parser.add_argument('--label', type=str, default='&s-entry_signal', help='标签名称')
    parser.add_argument('--hours', type=int, default=24, help='查询最近 N 小时的数据')

    args = parser.parse_args()

    print("🚀 开始查询阈值调整历史...")
    records = query_threshold_history(args.pair, args.label, args.hours)

    if not records:
        print("⚠️ 未找到数据")
        return

    print(f"✅ 找到 {len(records)} 条记录")
    print()

    # 分析收敛情况
    analysis = analyze_convergence(records)

    # 打印报告
    print_analysis_report(analysis)


if __name__ == "__main__":
    main()