myTestFreqAI/tools/analyze_threshold_convergence.py
2026-01-18 22:42:52 +08:00

255 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
阈值收敛分析工具
从 Elasticsearch 读取阈值调整历史,分析收敛趋势
"""
import requests
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any
from collections import defaultdict
# ES 配置
ES_URL = "http://elastic.k8s.xunlang.home/"
ES_USERNAME = "elastic"
ES_PASSWORD = "your_secure_password"
ES_INDEX_PREFIX = "freqai.livelog"
def query_threshold_history(
pair: str = None,
label_name: str = "&s-entry_signal",
hours: int = 24
) -> List[Dict[str, Any]]:
"""
查询阈值调整历史
Args:
pair: 币对名称None 表示所有币对)
label_name: 标签名称
hours: 查询最近 N 小时的数据
Returns:
阈值调整记录列表(按时间升序)
"""
# 获取当前年月
now = datetime.utcnow()
year_month = now.strftime('%Y.%m')
index_name = f"{ES_INDEX_PREFIX}.threshold_adjustment-{year_month}"
# 构建查询
must_clauses = [
{"term": {"label_name": label_name}},
{
"range": {
"@timestamp": {
"gte": f"now-{hours}h",
"lte": "now"
}
}
}
]
if pair:
must_clauses.append({"term": {"pair.keyword": pair}})
query = {
"query": {
"bool": {
"must": must_clauses
}
},
"sort": [{"@timestamp": {"order": "asc"}}],
"size": 10000 # 最多返回 10000 条记录
}
try:
response = requests.post(
f"{ES_URL}{index_name}/_search",
auth=(ES_USERNAME, ES_PASSWORD),
headers={"Content-Type": "application/json"},
data=json.dumps(query),
timeout=10
)
if response.status_code == 200:
result = response.json()
records = []
for hit in result['hits']['hits']:
source = hit['_source']
records.append(source)
return records
else:
print(f"❌ ES 查询失败: {response.status_code}")
return []
except Exception as e:
print(f"❌ ES 查询异常: {e}")
return []
def analyze_convergence(records: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
分析阈值收敛情况
Returns:
分析结果:
- total_adjustments: 总调整次数
- threshold_trend: 阈值变化趋势
- positive_ratio_trend: 正样本比例变化趋势
- convergence_rate: 收敛速度
"""
if not records:
return {}
# 按币对分组
by_pair = defaultdict(list)
for record in records:
by_pair[record['pair']].append(record)
analysis = {}
for pair, pair_records in by_pair.items():
# 提取时间序列数据
timestamps = [datetime.fromisoformat(r['@timestamp']) for r in pair_records]
thresholds = [r['threshold']['new'] for r in pair_records]
positive_ratios = [r['positive_ratio']['current'] for r in pair_records]
# 计算阈值变化速度(每小时)
if len(timestamps) >= 2:
time_span_hours = (timestamps[-1] - timestamps[0]).total_seconds() / 3600
threshold_change_rate = (thresholds[-1] - thresholds[0]) / time_span_hours if time_span_hours > 0 else 0
# 计算正样本比例变化速度(每小时)
ratio_change_rate = (positive_ratios[-1] - positive_ratios[0]) / time_span_hours if time_span_hours > 0 else 0
else:
threshold_change_rate = 0
ratio_change_rate = 0
# 计算收敛程度(最后 5 次调整的标准差)
recent_thresholds = thresholds[-5:]
threshold_volatility = sum((t - sum(recent_thresholds)/len(recent_thresholds))**2 for t in recent_thresholds) / len(recent_thresholds) if len(recent_thresholds) > 1 else 0
# 判断是否收敛(最后一次是否在理想范围)
converged = pair_records[-1]['convergence']['in_ideal_range']
analysis[pair] = {
'total_adjustments': len(pair_records),
'first_threshold': thresholds[0],
'last_threshold': thresholds[-1],
'threshold_change': thresholds[-1] - thresholds[0],
'threshold_change_rate_per_hour': round(threshold_change_rate, 4),
'first_positive_ratio': positive_ratios[0],
'last_positive_ratio': positive_ratios[-1],
'positive_ratio_change': positive_ratios[-1] - positive_ratios[0],
'ratio_change_rate_per_hour': round(ratio_change_rate, 4),
'threshold_volatility': round(threshold_volatility, 6),
'converged': converged,
'distance_to_ideal': pair_records[-1]['convergence']['distance_to_ideal']
}
return analysis
def print_analysis_report(analysis: Dict[str, Any]) -> None:
"""打印分析报告"""
print("=" * 80)
print(f"阈值收敛分析报告")
print(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 80)
print()
if not analysis:
print("⚠️ 未找到数据")
return
# 统计概览
total_pairs = len(analysis)
converged_pairs = sum(1 for v in analysis.values() if v['converged'])
print(f"📊 概览")
print(f" 总币对数: {total_pairs}")
print(f" 已收敛: {converged_pairs} ({converged_pairs/total_pairs*100:.1f}%)")
print()
# 详细分析
print(f"📈 详细分析")
print("-" * 80)
for pair, data in sorted(analysis.items()):
print(f"\n🪙 {pair}")
print(f" 调整次数: {data['total_adjustments']}")
print(f" 阈值变化: {data['first_threshold']:.4f}{data['last_threshold']:.4f} ({data['threshold_change']:+.4f})")
print(f" 阈值变化速度: {data['threshold_change_rate_per_hour']:+.4f} / 小时")
print(f" 正样本比例: {data['first_positive_ratio']:.4f}{data['last_positive_ratio']:.4f} ({data['positive_ratio_change']:+.4f})")
print(f" 比例变化速度: {data['ratio_change_rate_per_hour']:+.4f} / 小时")
print(f" 阈值波动性: {data['threshold_volatility']:.6f}")
print(f" 收敛状态: {'✅ 已收敛' if data['converged'] else f'⚠️ 未收敛(距离理想: {data["distance_to_ideal"]:.4f}'}")
print()
print("=" * 80)
# 生成建议
print("📝 建议:")
print()
# 检查是否有币对长期不收敛
slow_converge = [pair for pair, data in analysis.items()
if not data['converged'] and data['total_adjustments'] > 10]
if slow_converge:
print(f"⚠️ 以下币对调整次数过多但仍未收敛,建议检查:")
for pair in slow_converge:
print(f" - {pair}")
# 检查是否有阈值震荡
oscillating = [pair for pair, data in analysis.items()
if data['threshold_volatility'] > 0.001]
if oscillating:
print(f"\n⚠️ 以下币对阈值震荡较大,建议增加防震荡限制:")
for pair in oscillating:
print(f" - {pair} (波动性: {analysis[pair]['threshold_volatility']:.6f})")
# 检查速度关系
print(f"\n📊 正样本比例变化速度 vs 阈值变化速度:")
for pair, data in sorted(analysis.items(), key=lambda x: abs(x[1]['ratio_change_rate_per_hour']), reverse=True)[:5]:
ratio = abs(data['ratio_change_rate_per_hour']) / abs(data['threshold_change_rate_per_hour']) if abs(data['threshold_change_rate_per_hour']) > 1e-6 else 0
print(f" {pair}: 比例速度 {data['ratio_change_rate_per_hour']:+.4f} / 阈值速度 {data['threshold_change_rate_per_hour']:+.4f} = {ratio:.2f}x")
print()
print("=" * 80)
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description='阈值收敛分析工具')
parser.add_argument('--pair', type=str, help='币对名称(留空表示所有币对)')
parser.add_argument('--label', type=str, default='&s-entry_signal', help='标签名称')
parser.add_argument('--hours', type=int, default=24, help='查询最近 N 小时的数据')
args = parser.parse_args()
print("🚀 开始查询阈值调整历史...")
records = query_threshold_history(args.pair, args.label, args.hours)
if not records:
print("⚠️ 未找到数据")
return
print(f"✅ 找到 {len(records)} 条记录")
print()
# 分析收敛情况
analysis = analyze_convergence(records)
# 打印报告
print_analysis_report(analysis)
if __name__ == "__main__":
main()