diff --git a/tools/download.sh b/tools/download.sh index dfb24354..566199c7 100755 --- a/tools/download.sh +++ b/tools/download.sh @@ -60,11 +60,14 @@ if [[ "$@" == *"--timerange"* ]] && [[ "$@" == *"--days"* ]]; then fi # Get timerange or days from parameters +timerange="" +days="" if [[ "$@" == *"--timerange"* ]]; then timerange=$(get_param_value "--timerange" "$@") elif [[ "$@" == *"--days"* ]]; then days=$(get_param_value "--days" "$@") fi + # Get pairs and timeframe from parameters or use defaults pairs=$(get_csv_param_value "--pairs" "$@") timeframe=$(get_csv_param_value "--timeframe" "$@") @@ -75,18 +78,22 @@ if [[ -z "$pairs" ]]; then fi if [[ -z "$timeframe" ]]; then - timeframe="3m,5m,15m,30m,1h,4h,6h,12h,1d" + timeframe="5m,15m,30m,1h,4h,6h,12h,1d" fi +# Convert timeframe string to array +IFS=',' read -r -a timeframe_array <<<"$timeframe" +timeframe_array_str=$(printf " '%s'" "${timeframe_array[@]}") + # Initialize the base command -cmd="docker-compose run --rm freqtrade download-data --config /freqtrade/config_examples/basic.json --pairs $pairs --timeframe $timeframe" +cmd="docker-compose run --rm freqtrade download-data --config /freqtrade/config_examples/basic.json --pairs $pairs --timeframe$timeframe_array_str" # Add timerange or days if provided if [[ -n "$timerange" ]]; then - cmd+=" --timerange $timerange" + cmd+=" --timerange='$timerange'" elif [[ -n "$days" ]]; then - cmd+=" --days $days" + cmd+=" --days=$days" fi # Execute the command -eval $cmd +eval "$cmd" diff --git a/tools/view_feather.py b/tools/view_feather.py new file mode 100644 index 00000000..f9e648ec --- /dev/null +++ b/tools/view_feather.py @@ -0,0 +1,65 @@ +import argparse +import pandas as pd + +def analyze_candlestick_data(file_path): + # 读取feather文件 + df = pd.read_feather(file_path) + + # 查看数据集行数和列数 + rows, columns = df.shape + + if rows < 500: + # 短表数据(行数少于500)查看全量数据信息 + print('数据全部内容信息:') + print(df.to_csv(sep='\t', na_rep='nan')) + else: + # 长表数据查看数据前几行信息 + print('数据前几行内容信息:') + print(df.head().to_csv(sep='\t', na_rep='nan')) + + # 查看数据的基本信息 + print('数据基本信息:') + df.info() + + # 查看数据集行数和列数 + rows, columns = df.shape + + if columns < 10 and rows < 500: + # 短表窄数据(列少于10且行数少于500)查看全量统计信息 + print('数据全部内容描述性统计信息:') + print(df.describe(include='all', percentiles=[.25, .5, .75]).to_csv(sep='\t', na_rep='nan')) + else: + # 长表数据查看数据前几行统计信息 + print('数据前几行描述性统计信息:') + print(df.head().describe(include='all', percentiles=[.25, .5, .75]).to_csv(sep='\t', na_rep='nan')) + + # 计算时间跨度 + min_date = df['date'].min() + max_date = df['date'].max() + time_span = max_date - min_date + + # 检查时间序列完整性 + df = df.sort_values('date') # 确保数据按时间排序 + df['time_diff'] = df['date'].diff().dt.total_seconds() # 计算相邻时间点的差值(秒) + expected_freq = df['time_diff'].mode()[0] # 使用最常见的间隔作为预期频率 + missing_intervals = df[df['time_diff'] > expected_freq] # 找出间隔大于预期的位置 + + print(f"\n数据时间跨度:{time_span}") + print(f"开始时间:{min_date}") + print(f"结束时间:{max_date}") + + if missing_intervals.empty: + print("数据完整性:完整,未发现缺失的蜡烛图数据") + else: + print(f"数据完整性:不完整,发现 {len(missing_intervals)} 处可能的缺失") + print("缺失位置示例:") + for _, row in missing_intervals.head(5).iterrows(): # 显示前5个缺失示例 + gap_duration = pd.Timedelta(seconds=row['time_diff']) + print(f" - 在 {row['date']} 之前缺失了 {gap_duration}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='分析Freqtrade蜡烛图Feather文件') + parser.add_argument('--path', required=True, help='Feather文件路径') + args = parser.parse_args() + + analyze_candlestick_data(args.path)