255 lines
8.7 KiB
Python
255 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
阈值收敛分析工具
|
||
从 Elasticsearch 读取阈值调整历史,分析收敛趋势
|
||
"""
|
||
|
||
import requests
|
||
import json
|
||
from datetime import datetime, timedelta
|
||
from typing import Dict, List, Any
|
||
from collections import defaultdict
|
||
|
||
# ES 配置
|
||
ES_URL = "http://elastic.k8s.xunlang.home/"
|
||
ES_USERNAME = "elastic"
|
||
ES_PASSWORD = "your_secure_password"
|
||
ES_INDEX_PREFIX = "freqai.livelog"
|
||
|
||
|
||
def query_threshold_history(
|
||
pair: str = None,
|
||
label_name: str = "&s-entry_signal",
|
||
hours: int = 24
|
||
) -> List[Dict[str, Any]]:
|
||
"""
|
||
查询阈值调整历史
|
||
|
||
Args:
|
||
pair: 币对名称(None 表示所有币对)
|
||
label_name: 标签名称
|
||
hours: 查询最近 N 小时的数据
|
||
|
||
Returns:
|
||
阈值调整记录列表(按时间升序)
|
||
"""
|
||
# 获取当前年月
|
||
now = datetime.utcnow()
|
||
year_month = now.strftime('%Y.%m')
|
||
index_name = f"{ES_INDEX_PREFIX}.threshold_adjustment-{year_month}"
|
||
|
||
# 构建查询
|
||
must_clauses = [
|
||
{"term": {"label_name": label_name}},
|
||
{
|
||
"range": {
|
||
"@timestamp": {
|
||
"gte": f"now-{hours}h",
|
||
"lte": "now"
|
||
}
|
||
}
|
||
}
|
||
]
|
||
|
||
if pair:
|
||
must_clauses.append({"term": {"pair.keyword": pair}})
|
||
|
||
query = {
|
||
"query": {
|
||
"bool": {
|
||
"must": must_clauses
|
||
}
|
||
},
|
||
"sort": [{"@timestamp": {"order": "asc"}}],
|
||
"size": 10000 # 最多返回 10000 条记录
|
||
}
|
||
|
||
try:
|
||
response = requests.post(
|
||
f"{ES_URL}{index_name}/_search",
|
||
auth=(ES_USERNAME, ES_PASSWORD),
|
||
headers={"Content-Type": "application/json"},
|
||
data=json.dumps(query),
|
||
timeout=10
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
result = response.json()
|
||
records = []
|
||
for hit in result['hits']['hits']:
|
||
source = hit['_source']
|
||
records.append(source)
|
||
return records
|
||
else:
|
||
print(f"❌ ES 查询失败: {response.status_code}")
|
||
return []
|
||
|
||
except Exception as e:
|
||
print(f"❌ ES 查询异常: {e}")
|
||
return []
|
||
|
||
|
||
def analyze_convergence(records: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||
"""
|
||
分析阈值收敛情况
|
||
|
||
Returns:
|
||
分析结果:
|
||
- total_adjustments: 总调整次数
|
||
- threshold_trend: 阈值变化趋势
|
||
- positive_ratio_trend: 正样本比例变化趋势
|
||
- convergence_rate: 收敛速度
|
||
"""
|
||
if not records:
|
||
return {}
|
||
|
||
# 按币对分组
|
||
by_pair = defaultdict(list)
|
||
for record in records:
|
||
by_pair[record['pair']].append(record)
|
||
|
||
analysis = {}
|
||
|
||
for pair, pair_records in by_pair.items():
|
||
# 提取时间序列数据
|
||
timestamps = [datetime.fromisoformat(r['@timestamp']) for r in pair_records]
|
||
thresholds = [r['threshold']['new'] for r in pair_records]
|
||
positive_ratios = [r['positive_ratio']['current'] for r in pair_records]
|
||
|
||
# 计算阈值变化速度(每小时)
|
||
if len(timestamps) >= 2:
|
||
time_span_hours = (timestamps[-1] - timestamps[0]).total_seconds() / 3600
|
||
threshold_change_rate = (thresholds[-1] - thresholds[0]) / time_span_hours if time_span_hours > 0 else 0
|
||
|
||
# 计算正样本比例变化速度(每小时)
|
||
ratio_change_rate = (positive_ratios[-1] - positive_ratios[0]) / time_span_hours if time_span_hours > 0 else 0
|
||
else:
|
||
threshold_change_rate = 0
|
||
ratio_change_rate = 0
|
||
|
||
# 计算收敛程度(最后 5 次调整的标准差)
|
||
recent_thresholds = thresholds[-5:]
|
||
threshold_volatility = sum((t - sum(recent_thresholds)/len(recent_thresholds))**2 for t in recent_thresholds) / len(recent_thresholds) if len(recent_thresholds) > 1 else 0
|
||
|
||
# 判断是否收敛(最后一次是否在理想范围)
|
||
converged = pair_records[-1]['convergence']['in_ideal_range']
|
||
|
||
analysis[pair] = {
|
||
'total_adjustments': len(pair_records),
|
||
'first_threshold': thresholds[0],
|
||
'last_threshold': thresholds[-1],
|
||
'threshold_change': thresholds[-1] - thresholds[0],
|
||
'threshold_change_rate_per_hour': round(threshold_change_rate, 4),
|
||
'first_positive_ratio': positive_ratios[0],
|
||
'last_positive_ratio': positive_ratios[-1],
|
||
'positive_ratio_change': positive_ratios[-1] - positive_ratios[0],
|
||
'ratio_change_rate_per_hour': round(ratio_change_rate, 4),
|
||
'threshold_volatility': round(threshold_volatility, 6),
|
||
'converged': converged,
|
||
'distance_to_ideal': pair_records[-1]['convergence']['distance_to_ideal']
|
||
}
|
||
|
||
return analysis
|
||
|
||
|
||
def print_analysis_report(analysis: Dict[str, Any]) -> None:
|
||
"""打印分析报告"""
|
||
print("=" * 80)
|
||
print(f"阈值收敛分析报告")
|
||
print(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
print("=" * 80)
|
||
print()
|
||
|
||
if not analysis:
|
||
print("⚠️ 未找到数据")
|
||
return
|
||
|
||
# 统计概览
|
||
total_pairs = len(analysis)
|
||
converged_pairs = sum(1 for v in analysis.values() if v['converged'])
|
||
|
||
print(f"📊 概览")
|
||
print(f" 总币对数: {total_pairs}")
|
||
print(f" 已收敛: {converged_pairs} ({converged_pairs/total_pairs*100:.1f}%)")
|
||
print()
|
||
|
||
# 详细分析
|
||
print(f"📈 详细分析")
|
||
print("-" * 80)
|
||
|
||
for pair, data in sorted(analysis.items()):
|
||
print(f"\n🪙 {pair}")
|
||
print(f" 调整次数: {data['total_adjustments']}")
|
||
print(f" 阈值变化: {data['first_threshold']:.4f} → {data['last_threshold']:.4f} ({data['threshold_change']:+.4f})")
|
||
print(f" 阈值变化速度: {data['threshold_change_rate_per_hour']:+.4f} / 小时")
|
||
print(f" 正样本比例: {data['first_positive_ratio']:.4f} → {data['last_positive_ratio']:.4f} ({data['positive_ratio_change']:+.4f})")
|
||
print(f" 比例变化速度: {data['ratio_change_rate_per_hour']:+.4f} / 小时")
|
||
print(f" 阈值波动性: {data['threshold_volatility']:.6f}")
|
||
print(f" 收敛状态: {'✅ 已收敛' if data['converged'] else f'⚠️ 未收敛(距离理想: {data["distance_to_ideal"]:.4f})'}")
|
||
|
||
print()
|
||
print("=" * 80)
|
||
|
||
# 生成建议
|
||
print("📝 建议:")
|
||
print()
|
||
|
||
# 检查是否有币对长期不收敛
|
||
slow_converge = [pair for pair, data in analysis.items()
|
||
if not data['converged'] and data['total_adjustments'] > 10]
|
||
|
||
if slow_converge:
|
||
print(f"⚠️ 以下币对调整次数过多但仍未收敛,建议检查:")
|
||
for pair in slow_converge:
|
||
print(f" - {pair}")
|
||
|
||
# 检查是否有阈值震荡
|
||
oscillating = [pair for pair, data in analysis.items()
|
||
if data['threshold_volatility'] > 0.001]
|
||
|
||
if oscillating:
|
||
print(f"\n⚠️ 以下币对阈值震荡较大,建议增加防震荡限制:")
|
||
for pair in oscillating:
|
||
print(f" - {pair} (波动性: {analysis[pair]['threshold_volatility']:.6f})")
|
||
|
||
# 检查速度关系
|
||
print(f"\n📊 正样本比例变化速度 vs 阈值变化速度:")
|
||
for pair, data in sorted(analysis.items(), key=lambda x: abs(x[1]['ratio_change_rate_per_hour']), reverse=True)[:5]:
|
||
ratio = abs(data['ratio_change_rate_per_hour']) / abs(data['threshold_change_rate_per_hour']) if abs(data['threshold_change_rate_per_hour']) > 1e-6 else 0
|
||
print(f" {pair}: 比例速度 {data['ratio_change_rate_per_hour']:+.4f} / 阈值速度 {data['threshold_change_rate_per_hour']:+.4f} = {ratio:.2f}x")
|
||
|
||
print()
|
||
print("=" * 80)
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description='阈值收敛分析工具')
|
||
parser.add_argument('--pair', type=str, help='币对名称(留空表示所有币对)')
|
||
parser.add_argument('--label', type=str, default='&s-entry_signal', help='标签名称')
|
||
parser.add_argument('--hours', type=int, default=24, help='查询最近 N 小时的数据')
|
||
|
||
args = parser.parse_args()
|
||
|
||
print("🚀 开始查询阈值调整历史...")
|
||
records = query_threshold_history(args.pair, args.label, args.hours)
|
||
|
||
if not records:
|
||
print("⚠️ 未找到数据")
|
||
return
|
||
|
||
print(f"✅ 找到 {len(records)} 条记录")
|
||
print()
|
||
|
||
# 分析收敛情况
|
||
analysis = analyze_convergence(records)
|
||
|
||
# 打印报告
|
||
print_analysis_report(analysis)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|