notes_study/qishang/test_debug.py
zhangkun9038@dingtalk.com 4546fdde45 first add
2026-02-24 14:05:38 +08:00

147 lines
8.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# 测试脚本,用于调试词形变化标记问题
def test_enhance_verb_inflections(inflection):
"""测试用的简化版动词词形变化增强函数"""
if not inflection:
return inflection
# 分割词形变化(通常是逗号分隔)
forms = [form.strip() for form in inflection.split('')] # 使用中文逗号
print(f"分割后的forms: {forms}")
enhanced_forms = []
for i, form in enumerate(forms):
print(f"处理第{i+1}个form: '{form}'")
form = form.strip()
if not form:
enhanced_forms.append(form)
continue
# 检测不同的动词形式
if form.endswith('ed') and len(form) > 2 and form not in ['led', 'fed', 'bed', 'red', 'wed']:
# 可能是过去式或过去分词
result = f"{form} [过去式/过去分词]"
print(f" '{form}' 匹配 ed 结尾: {result}")
enhanced_forms.append(result)
elif form.endswith('ing') and len(form) > 3:
# 现在分词/动名词
result = f"{form} [现在分词/动名词]"
print(f" '{form}' 匹配 ing 结尾: {result}")
enhanced_forms.append(result)
elif form.endswith('s') and len(form) > 1 and not form.endswith('ss'):
# 第三人称单数
result = f"{form} [第三人称单数]"
print(f" '{form}' 匹配 s 结尾: {result}")
enhanced_forms.append(result)
elif form.endswith('en') and len(form) > 2 and form not in ['been', 'seen', 'gone', 'given', 'taken', 'eaten', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown', 'blown', 'flown', 'known', 'worn', 'torn', 'lain', 'risen', 'fallen', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown']:
# 过去分词(不规则)
result = f"{form} [过去分词]"
print(f" '{form}' 匹配 en 结尾: {result}")
enhanced_forms.append(result)
elif form in ['did', 'had', 'would', 'could', 'should', 'might', 'must']:
result = f"{form} [助动词]"
print(f" '{form}' 匹配助动词: {result}")
enhanced_forms.append(result)
elif form in ['done', 'had', 'been']:
result = f"{form} [过去分词]"
print(f" '{form}' 匹配过去分词: {result}")
enhanced_forms.append(result)
elif form in ['doing', 'being']:
result = f"{form} [现在分词]"
print(f" '{form}' 匹配现在分词: {result}")
enhanced_forms.append(result)
elif form in ['do', 'does']:
result = f"{form} [原形/第三人称单数]"
print(f" '{form}' 匹配原形/第三人称单数: {result}")
enhanced_forms.append(result)
else:
# 尝试检测不规则动词
irregular_patterns = {
('begin', 'began', 'begun'): '[原形/过去式/过去分词]',
('break', 'broke', 'broken'): '[原形/过去式/过去分词]',
('choose', 'chose', 'chosen'): '[原形/过去式/过去分词]',
('drink', 'drank', 'drunk'): '[原形/过去式/过去分词]',
('drive', 'drove', 'driven'): '[原形/过去式/过去分词]',
('eat', 'ate', 'eaten'): '[原形/过去式/过去分词]',
('fall', 'fell', 'fallen'): '[原形/过去式/过去分词]',
('fly', 'flew', 'flown'): '[原形/过去式/过去分词]',
('go', 'went', 'gone'): '[原形/过去式/过去分词]',
('know', 'knew', 'known'): '[原形/过去式/过去分词]',
('see', 'saw', 'seen'): '[原形/过去式/过去分词]',
('take', 'took', 'taken'): '[原形/过去式/过去分词]',
('write', 'wrote', 'written'): '[原形/过去式/过去分词]',
('sing', 'sang', 'sung'): '[原形/过去式/过去分词]',
('swim', 'swam', 'swum'): '[原形/过去式/过去分词]',
('run', 'ran', 'run'): '[原形/过去式/过去分词]',
('cut', 'cut', 'cut'): '[原形/过去式/过去分词]',
('put', 'put', 'put'): '[原形/过去式/过去分词]',
('read', 'read', 'read'): '[原形/过去式/过去分词]', # 特殊:发音不同
('buy', 'bought', 'bought'): '[原形/过去式/过去分词]',
('catch', 'caught', 'caught'): '[原形/过去式/过去分词]',
('fight', 'fought', 'fought'): '[原形/过去式/过去分词]',
('think', 'thought', 'thought'): '[原形/过去式/过去分词]',
('bring', 'brought', 'brought'): '[原形/过去式/过去分词]',
('teach', 'taught', 'taught'): '[原形/过去式/过去分词]',
('sell', 'sold', 'sold'): '[原形/过去式/过去分词]',
('tell', 'told', 'told'): '[原形/过去式/过去分词]',
('feel', 'felt', 'felt'): '[原形/过去式/过去分词]',
('keep', 'kept', 'kept'): '[原形/过去式/过去分词]',
('sleep', 'slept', 'slept'): '[原形/过去式/过去分词]',
('speak', 'spoke', 'spoken'): '[原形/过去式/过去分词]',
('steal', 'stole', 'stolen'): '[原形/过去式/过去分词]',
('wear', 'wore', 'worn'): '[原形/过去式/过去分词]',
('wake', 'woke', 'woken'): '[原形/过去式/过去分词]',
('awake', 'awoke', 'awoken'): '[原形/过去式/过去分词]',
('become', 'became', 'become'): '[原形/过去式/过去分词]',
('come', 'came', 'come'): '[原形/过去式/过去分词]',
('arise', 'arose', 'arisen'): '[原形/过去式/过去分词]',
('arouse', 'aroused', 'aroused'): '[原形/过去式/过去分词]',
('bear', 'bore', 'borne/born'): '[原形/过去式/过去分词]',
('beat', 'beat', 'beaten'): '[原形/过去式/过去分词]',
('bend', 'bent', 'bent'): '[原形/过去式/过去分词]',
('bet', 'bet', 'bet'): '[原形/过去式/过去分词]',
('bind', 'bound', 'bound'): '[原形/过去式/过去分词]',
('bite', 'bit', 'bitten'): '[原形/过去式/过去分词]',
('bleed', 'bled', 'bled'): '[原形/过去式/过去分词]',
('blow', 'blew', 'blown'): '[原形/过去式/过去分词]',
('breed', 'bred', 'bred'): '[原形/过去式/过去分词]',
('build', 'built', 'built'): '[原形/过去式/过去分词]',
('burn', 'burnt/burned', 'burnt/burned'): '[原形/过去式/过去分词]',
('burst', 'burst', 'burst'): '[原形/过去式/过去分词]',
('cast', 'cast', 'cast'): '[原形/过去式/过去分词]',
('cling', 'clung', 'clung'): '[原形/过去式/过去分词]',
('cost', 'cost', 'cost'): '[原形/过去式/过去分词]',
('creep', 'crept', 'crept'): '[原形/过去式/过去分词]',
('deal', 'dealt', 'dealt'): '[原形/过去式/过去分词]',
('dig', 'dug', 'dug'): '[原形/过去式/过去分词]',
('do', 'did', 'done'): '[原形/过去式/过去分词]',
('draw', 'drew', 'drawn'): '[原形/过去式/过去分词]',
('dream', 'dreamt/dreamed', 'dreamt/dreamed'): '[原形/过去式/过去分词]',
('forget', 'forgot', 'forgotten'): '[原形/过去式/过去分词]',
}
# 检查是否是不规则动词的一部分
found_irregular = False
for key, tag in irregular_patterns.items():
if form in key:
result = f"{form} {tag}"
print(f" '{form}' 匹配不规则动词 {key}: {result}")
enhanced_forms.append(result)
found_irregular = True
break
if not found_irregular:
print(f" '{form}' 没有匹配任何特殊模式,保持原样")
enhanced_forms.append(form)
result = ''.join(enhanced_forms) # 使用中文逗号
print(f"最终结果: {result}")
return result
# 测试
test_input = 'forgotforgottenforgetting'
print(f"输入: {test_input}")
output = test_enhance_verb_inflections(test_input)
print(f"\n输出: {output}")