python view_feather.py --path ../user_data/data/okx/TRUMP_USDT-5m.feather
This commit is contained in:
parent
cf6a7c83c7
commit
bdf079264c
@ -60,11 +60,14 @@ if [[ "$@" == *"--timerange"* ]] && [[ "$@" == *"--days"* ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Get timerange or days from parameters
|
# Get timerange or days from parameters
|
||||||
|
timerange=""
|
||||||
|
days=""
|
||||||
if [[ "$@" == *"--timerange"* ]]; then
|
if [[ "$@" == *"--timerange"* ]]; then
|
||||||
timerange=$(get_param_value "--timerange" "$@")
|
timerange=$(get_param_value "--timerange" "$@")
|
||||||
elif [[ "$@" == *"--days"* ]]; then
|
elif [[ "$@" == *"--days"* ]]; then
|
||||||
days=$(get_param_value "--days" "$@")
|
days=$(get_param_value "--days" "$@")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Get pairs and timeframe from parameters or use defaults
|
# Get pairs and timeframe from parameters or use defaults
|
||||||
pairs=$(get_csv_param_value "--pairs" "$@")
|
pairs=$(get_csv_param_value "--pairs" "$@")
|
||||||
timeframe=$(get_csv_param_value "--timeframe" "$@")
|
timeframe=$(get_csv_param_value "--timeframe" "$@")
|
||||||
@ -75,18 +78,22 @@ if [[ -z "$pairs" ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -z "$timeframe" ]]; then
|
if [[ -z "$timeframe" ]]; then
|
||||||
timeframe="3m,5m,15m,30m,1h,4h,6h,12h,1d"
|
timeframe="5m,15m,30m,1h,4h,6h,12h,1d"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Convert timeframe string to array
|
||||||
|
IFS=',' read -r -a timeframe_array <<<"$timeframe"
|
||||||
|
timeframe_array_str=$(printf " '%s'" "${timeframe_array[@]}")
|
||||||
|
|
||||||
# Initialize the base command
|
# Initialize the base command
|
||||||
cmd="docker-compose run --rm freqtrade download-data --config /freqtrade/config_examples/basic.json --pairs $pairs --timeframe $timeframe"
|
cmd="docker-compose run --rm freqtrade download-data --config /freqtrade/config_examples/basic.json --pairs $pairs --timeframe$timeframe_array_str"
|
||||||
|
|
||||||
# Add timerange or days if provided
|
# Add timerange or days if provided
|
||||||
if [[ -n "$timerange" ]]; then
|
if [[ -n "$timerange" ]]; then
|
||||||
cmd+=" --timerange $timerange"
|
cmd+=" --timerange='$timerange'"
|
||||||
elif [[ -n "$days" ]]; then
|
elif [[ -n "$days" ]]; then
|
||||||
cmd+=" --days $days"
|
cmd+=" --days=$days"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Execute the command
|
# Execute the command
|
||||||
eval $cmd
|
eval "$cmd"
|
||||||
|
|||||||
65
tools/view_feather.py
Normal file
65
tools/view_feather.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import argparse
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def analyze_candlestick_data(file_path):
|
||||||
|
# 读取feather文件
|
||||||
|
df = pd.read_feather(file_path)
|
||||||
|
|
||||||
|
# 查看数据集行数和列数
|
||||||
|
rows, columns = df.shape
|
||||||
|
|
||||||
|
if rows < 500:
|
||||||
|
# 短表数据(行数少于500)查看全量数据信息
|
||||||
|
print('数据全部内容信息:')
|
||||||
|
print(df.to_csv(sep='\t', na_rep='nan'))
|
||||||
|
else:
|
||||||
|
# 长表数据查看数据前几行信息
|
||||||
|
print('数据前几行内容信息:')
|
||||||
|
print(df.head().to_csv(sep='\t', na_rep='nan'))
|
||||||
|
|
||||||
|
# 查看数据的基本信息
|
||||||
|
print('数据基本信息:')
|
||||||
|
df.info()
|
||||||
|
|
||||||
|
# 查看数据集行数和列数
|
||||||
|
rows, columns = df.shape
|
||||||
|
|
||||||
|
if columns < 10 and rows < 500:
|
||||||
|
# 短表窄数据(列少于10且行数少于500)查看全量统计信息
|
||||||
|
print('数据全部内容描述性统计信息:')
|
||||||
|
print(df.describe(include='all', percentiles=[.25, .5, .75]).to_csv(sep='\t', na_rep='nan'))
|
||||||
|
else:
|
||||||
|
# 长表数据查看数据前几行统计信息
|
||||||
|
print('数据前几行描述性统计信息:')
|
||||||
|
print(df.head().describe(include='all', percentiles=[.25, .5, .75]).to_csv(sep='\t', na_rep='nan'))
|
||||||
|
|
||||||
|
# 计算时间跨度
|
||||||
|
min_date = df['date'].min()
|
||||||
|
max_date = df['date'].max()
|
||||||
|
time_span = max_date - min_date
|
||||||
|
|
||||||
|
# 检查时间序列完整性
|
||||||
|
df = df.sort_values('date') # 确保数据按时间排序
|
||||||
|
df['time_diff'] = df['date'].diff().dt.total_seconds() # 计算相邻时间点的差值(秒)
|
||||||
|
expected_freq = df['time_diff'].mode()[0] # 使用最常见的间隔作为预期频率
|
||||||
|
missing_intervals = df[df['time_diff'] > expected_freq] # 找出间隔大于预期的位置
|
||||||
|
|
||||||
|
print(f"\n数据时间跨度:{time_span}")
|
||||||
|
print(f"开始时间:{min_date}")
|
||||||
|
print(f"结束时间:{max_date}")
|
||||||
|
|
||||||
|
if missing_intervals.empty:
|
||||||
|
print("数据完整性:完整,未发现缺失的蜡烛图数据")
|
||||||
|
else:
|
||||||
|
print(f"数据完整性:不完整,发现 {len(missing_intervals)} 处可能的缺失")
|
||||||
|
print("缺失位置示例:")
|
||||||
|
for _, row in missing_intervals.head(5).iterrows(): # 显示前5个缺失示例
|
||||||
|
gap_duration = pd.Timedelta(seconds=row['time_diff'])
|
||||||
|
print(f" - 在 {row['date']} 之前缺失了 {gap_duration}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='分析Freqtrade蜡烛图Feather文件')
|
||||||
|
parser.add_argument('--path', required=True, help='Feather文件路径')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
analyze_candlestick_data(args.path)
|
||||||
Loading…
x
Reference in New Issue
Block a user