myTestFreqAI/tools/view_feather.py
2025-07-09 17:30:49 +08:00

70 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import argparse
import pandas as pd
def analyze_candlestick_data(file_path):
# 读取feather文件
df = pd.read_feather(file_path)
# 查看数据集行数和列数
rows, columns = df.shape
if rows < 1000:
# 短表数据行数少于500查看全量数据信息
print('数据全部内容信息:')
print(df.to_csv(sep='\t', na_rep='nan'))
else:
# 长表数据查看数据前几行信息
print('数据前几行内容信息:')
print(df.head().to_csv(sep='\t', na_rep='nan'))
# 查看数据最后几行信息
print('数据最后几行内容信息:')
print(df.tail().to_csv(sep='\t', na_rep='nan'))
# 查看数据的基本信息
print('数据基本信息:')
df.info()
# 查看数据集行数和列数
rows, columns = df.shape
if columns < 10 and rows < 500:
# 短表窄数据列少于10且行数少于500查看全量统计信息
print('数据全部内容描述性统计信息:')
print(df.describe(include='all', percentiles=[.25, .5, .75]).to_csv(sep='\t', na_rep='nan'))
else:
# 长表数据查看数据前几行统计信息
print('数据前几行描述性统计信息:')
print(df.head().describe(include='all', percentiles=[.25, .5, .75]).to_csv(sep='\t', na_rep='nan'))
# 计算时间跨度
min_date = df['date'].min()
max_date = df['date'].max()
time_span = max_date - min_date
# 检查时间序列完整性
df = df.sort_values('date') # 确保数据按时间排序
df['time_diff'] = df['date'].diff().dt.total_seconds() # 计算相邻时间点的差值(秒)
expected_freq = df['time_diff'].mode()[0] # 使用最常见的间隔作为预期频率
missing_intervals = df[df['time_diff'] > expected_freq] # 找出间隔大于预期的位置
print(f"\n数据时间跨度:{time_span}")
print(f"开始时间:{min_date}")
print(f"结束时间:{max_date}")
if missing_intervals.empty:
print("数据完整性:完整,未发现缺失的蜡烛图数据")
else:
print(f"数据完整性:不完整,发现 {len(missing_intervals)} 处可能的缺失")
print("缺失位置示例:")
for _, row in missing_intervals.head(5).iterrows(): # 显示前5个缺失示例
gap_duration = pd.Timedelta(seconds=row['time_diff'])
print(f" - 在 {row['date']} 之前缺失了 {gap_duration}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='分析Freqtrade蜡烛图Feather文件')
parser.add_argument('--path', required=True, help='Feather文件路径')
args = parser.parse_args()
analyze_candlestick_data(args.path)