147 lines
8.6 KiB
Python
147 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
||
# 测试脚本,用于调试词形变化标记问题
|
||
|
||
def test_enhance_verb_inflections(inflection):
|
||
"""测试用的简化版动词词形变化增强函数"""
|
||
if not inflection:
|
||
return inflection
|
||
|
||
# 分割词形变化(通常是逗号分隔)
|
||
forms = [form.strip() for form in inflection.split(',')] # 使用中文逗号
|
||
print(f"分割后的forms: {forms}")
|
||
|
||
enhanced_forms = []
|
||
|
||
for i, form in enumerate(forms):
|
||
print(f"处理第{i+1}个form: '{form}'")
|
||
form = form.strip()
|
||
if not form:
|
||
enhanced_forms.append(form)
|
||
continue
|
||
|
||
# 检测不同的动词形式
|
||
if form.endswith('ed') and len(form) > 2 and form not in ['led', 'fed', 'bed', 'red', 'wed']:
|
||
# 可能是过去式或过去分词
|
||
result = f"{form} [过去式/过去分词]"
|
||
print(f" '{form}' 匹配 ed 结尾: {result}")
|
||
enhanced_forms.append(result)
|
||
elif form.endswith('ing') and len(form) > 3:
|
||
# 现在分词/动名词
|
||
result = f"{form} [现在分词/动名词]"
|
||
print(f" '{form}' 匹配 ing 结尾: {result}")
|
||
enhanced_forms.append(result)
|
||
elif form.endswith('s') and len(form) > 1 and not form.endswith('ss'):
|
||
# 第三人称单数
|
||
result = f"{form} [第三人称单数]"
|
||
print(f" '{form}' 匹配 s 结尾: {result}")
|
||
enhanced_forms.append(result)
|
||
elif form.endswith('en') and len(form) > 2 and form not in ['been', 'seen', 'gone', 'given', 'taken', 'eaten', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown', 'blown', 'flown', 'known', 'worn', 'torn', 'lain', 'risen', 'fallen', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown']:
|
||
# 过去分词(不规则)
|
||
result = f"{form} [过去分词]"
|
||
print(f" '{form}' 匹配 en 结尾: {result}")
|
||
enhanced_forms.append(result)
|
||
elif form in ['did', 'had', 'would', 'could', 'should', 'might', 'must']:
|
||
result = f"{form} [助动词]"
|
||
print(f" '{form}' 匹配助动词: {result}")
|
||
enhanced_forms.append(result)
|
||
elif form in ['done', 'had', 'been']:
|
||
result = f"{form} [过去分词]"
|
||
print(f" '{form}' 匹配过去分词: {result}")
|
||
enhanced_forms.append(result)
|
||
elif form in ['doing', 'being']:
|
||
result = f"{form} [现在分词]"
|
||
print(f" '{form}' 匹配现在分词: {result}")
|
||
enhanced_forms.append(result)
|
||
elif form in ['do', 'does']:
|
||
result = f"{form} [原形/第三人称单数]"
|
||
print(f" '{form}' 匹配原形/第三人称单数: {result}")
|
||
enhanced_forms.append(result)
|
||
else:
|
||
# 尝试检测不规则动词
|
||
irregular_patterns = {
|
||
('begin', 'began', 'begun'): '[原形/过去式/过去分词]',
|
||
('break', 'broke', 'broken'): '[原形/过去式/过去分词]',
|
||
('choose', 'chose', 'chosen'): '[原形/过去式/过去分词]',
|
||
('drink', 'drank', 'drunk'): '[原形/过去式/过去分词]',
|
||
('drive', 'drove', 'driven'): '[原形/过去式/过去分词]',
|
||
('eat', 'ate', 'eaten'): '[原形/过去式/过去分词]',
|
||
('fall', 'fell', 'fallen'): '[原形/过去式/过去分词]',
|
||
('fly', 'flew', 'flown'): '[原形/过去式/过去分词]',
|
||
('go', 'went', 'gone'): '[原形/过去式/过去分词]',
|
||
('know', 'knew', 'known'): '[原形/过去式/过去分词]',
|
||
('see', 'saw', 'seen'): '[原形/过去式/过去分词]',
|
||
('take', 'took', 'taken'): '[原形/过去式/过去分词]',
|
||
('write', 'wrote', 'written'): '[原形/过去式/过去分词]',
|
||
('sing', 'sang', 'sung'): '[原形/过去式/过去分词]',
|
||
('swim', 'swam', 'swum'): '[原形/过去式/过去分词]',
|
||
('run', 'ran', 'run'): '[原形/过去式/过去分词]',
|
||
('cut', 'cut', 'cut'): '[原形/过去式/过去分词]',
|
||
('put', 'put', 'put'): '[原形/过去式/过去分词]',
|
||
('read', 'read', 'read'): '[原形/过去式/过去分词]', # 特殊:发音不同
|
||
('buy', 'bought', 'bought'): '[原形/过去式/过去分词]',
|
||
('catch', 'caught', 'caught'): '[原形/过去式/过去分词]',
|
||
('fight', 'fought', 'fought'): '[原形/过去式/过去分词]',
|
||
('think', 'thought', 'thought'): '[原形/过去式/过去分词]',
|
||
('bring', 'brought', 'brought'): '[原形/过去式/过去分词]',
|
||
('teach', 'taught', 'taught'): '[原形/过去式/过去分词]',
|
||
('sell', 'sold', 'sold'): '[原形/过去式/过去分词]',
|
||
('tell', 'told', 'told'): '[原形/过去式/过去分词]',
|
||
('feel', 'felt', 'felt'): '[原形/过去式/过去分词]',
|
||
('keep', 'kept', 'kept'): '[原形/过去式/过去分词]',
|
||
('sleep', 'slept', 'slept'): '[原形/过去式/过去分词]',
|
||
('speak', 'spoke', 'spoken'): '[原形/过去式/过去分词]',
|
||
('steal', 'stole', 'stolen'): '[原形/过去式/过去分词]',
|
||
('wear', 'wore', 'worn'): '[原形/过去式/过去分词]',
|
||
('wake', 'woke', 'woken'): '[原形/过去式/过去分词]',
|
||
('awake', 'awoke', 'awoken'): '[原形/过去式/过去分词]',
|
||
('become', 'became', 'become'): '[原形/过去式/过去分词]',
|
||
('come', 'came', 'come'): '[原形/过去式/过去分词]',
|
||
('arise', 'arose', 'arisen'): '[原形/过去式/过去分词]',
|
||
('arouse', 'aroused', 'aroused'): '[原形/过去式/过去分词]',
|
||
('bear', 'bore', 'borne/born'): '[原形/过去式/过去分词]',
|
||
('beat', 'beat', 'beaten'): '[原形/过去式/过去分词]',
|
||
('bend', 'bent', 'bent'): '[原形/过去式/过去分词]',
|
||
('bet', 'bet', 'bet'): '[原形/过去式/过去分词]',
|
||
('bind', 'bound', 'bound'): '[原形/过去式/过去分词]',
|
||
('bite', 'bit', 'bitten'): '[原形/过去式/过去分词]',
|
||
('bleed', 'bled', 'bled'): '[原形/过去式/过去分词]',
|
||
('blow', 'blew', 'blown'): '[原形/过去式/过去分词]',
|
||
('breed', 'bred', 'bred'): '[原形/过去式/过去分词]',
|
||
('build', 'built', 'built'): '[原形/过去式/过去分词]',
|
||
('burn', 'burnt/burned', 'burnt/burned'): '[原形/过去式/过去分词]',
|
||
('burst', 'burst', 'burst'): '[原形/过去式/过去分词]',
|
||
('cast', 'cast', 'cast'): '[原形/过去式/过去分词]',
|
||
('cling', 'clung', 'clung'): '[原形/过去式/过去分词]',
|
||
('cost', 'cost', 'cost'): '[原形/过去式/过去分词]',
|
||
('creep', 'crept', 'crept'): '[原形/过去式/过去分词]',
|
||
('deal', 'dealt', 'dealt'): '[原形/过去式/过去分词]',
|
||
('dig', 'dug', 'dug'): '[原形/过去式/过去分词]',
|
||
('do', 'did', 'done'): '[原形/过去式/过去分词]',
|
||
('draw', 'drew', 'drawn'): '[原形/过去式/过去分词]',
|
||
('dream', 'dreamt/dreamed', 'dreamt/dreamed'): '[原形/过去式/过去分词]',
|
||
('forget', 'forgot', 'forgotten'): '[原形/过去式/过去分词]',
|
||
}
|
||
|
||
# 检查是否是不规则动词的一部分
|
||
found_irregular = False
|
||
for key, tag in irregular_patterns.items():
|
||
if form in key:
|
||
result = f"{form} {tag}"
|
||
print(f" '{form}' 匹配不规则动词 {key}: {result}")
|
||
enhanced_forms.append(result)
|
||
found_irregular = True
|
||
break
|
||
|
||
if not found_irregular:
|
||
print(f" '{form}' 没有匹配任何特殊模式,保持原样")
|
||
enhanced_forms.append(form)
|
||
|
||
result = ','.join(enhanced_forms) # 使用中文逗号
|
||
print(f"最终结果: {result}")
|
||
return result
|
||
|
||
# 测试
|
||
test_input = 'forgot,forgotten,forgetting'
|
||
print(f"输入: {test_input}")
|
||
output = test_enhance_verb_inflections(test_input)
|
||
print(f"\n输出: {output}") |