#!/usr/bin/env python3 # 测试脚本,用于调试词形变化标记问题 def test_enhance_verb_inflections(inflection): """测试用的简化版动词词形变化增强函数""" if not inflection: return inflection # 分割词形变化(通常是逗号分隔) forms = [form.strip() for form in inflection.split(',')] # 使用中文逗号 print(f"分割后的forms: {forms}") enhanced_forms = [] for i, form in enumerate(forms): print(f"处理第{i+1}个form: '{form}'") form = form.strip() if not form: enhanced_forms.append(form) continue # 检测不同的动词形式 if form.endswith('ed') and len(form) > 2 and form not in ['led', 'fed', 'bed', 'red', 'wed']: # 可能是过去式或过去分词 result = f"{form} [过去式/过去分词]" print(f" '{form}' 匹配 ed 结尾: {result}") enhanced_forms.append(result) elif form.endswith('ing') and len(form) > 3: # 现在分词/动名词 result = f"{form} [现在分词/动名词]" print(f" '{form}' 匹配 ing 结尾: {result}") enhanced_forms.append(result) elif form.endswith('s') and len(form) > 1 and not form.endswith('ss'): # 第三人称单数 result = f"{form} [第三人称单数]" print(f" '{form}' 匹配 s 结尾: {result}") enhanced_forms.append(result) elif form.endswith('en') and len(form) > 2 and form not in ['been', 'seen', 'gone', 'given', 'taken', 'eaten', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown', 'blown', 'flown', 'known', 'worn', 'torn', 'lain', 'risen', 'fallen', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown']: # 过去分词(不规则) result = f"{form} [过去分词]" print(f" '{form}' 匹配 en 结尾: {result}") enhanced_forms.append(result) elif form in ['did', 'had', 'would', 'could', 'should', 'might', 'must']: result = f"{form} [助动词]" print(f" '{form}' 匹配助动词: {result}") enhanced_forms.append(result) elif form in ['done', 'had', 'been']: result = f"{form} [过去分词]" print(f" '{form}' 匹配过去分词: {result}") enhanced_forms.append(result) elif form in ['doing', 'being']: result = f"{form} [现在分词]" print(f" '{form}' 匹配现在分词: {result}") enhanced_forms.append(result) elif form in ['do', 'does']: result = f"{form} [原形/第三人称单数]" print(f" '{form}' 匹配原形/第三人称单数: {result}") enhanced_forms.append(result) else: # 尝试检测不规则动词 irregular_patterns = { ('begin', 'began', 'begun'): '[原形/过去式/过去分词]', ('break', 'broke', 'broken'): '[原形/过去式/过去分词]', ('choose', 'chose', 'chosen'): '[原形/过去式/过去分词]', ('drink', 'drank', 'drunk'): '[原形/过去式/过去分词]', ('drive', 'drove', 'driven'): '[原形/过去式/过去分词]', ('eat', 'ate', 'eaten'): '[原形/过去式/过去分词]', ('fall', 'fell', 'fallen'): '[原形/过去式/过去分词]', ('fly', 'flew', 'flown'): '[原形/过去式/过去分词]', ('go', 'went', 'gone'): '[原形/过去式/过去分词]', ('know', 'knew', 'known'): '[原形/过去式/过去分词]', ('see', 'saw', 'seen'): '[原形/过去式/过去分词]', ('take', 'took', 'taken'): '[原形/过去式/过去分词]', ('write', 'wrote', 'written'): '[原形/过去式/过去分词]', ('sing', 'sang', 'sung'): '[原形/过去式/过去分词]', ('swim', 'swam', 'swum'): '[原形/过去式/过去分词]', ('run', 'ran', 'run'): '[原形/过去式/过去分词]', ('cut', 'cut', 'cut'): '[原形/过去式/过去分词]', ('put', 'put', 'put'): '[原形/过去式/过去分词]', ('read', 'read', 'read'): '[原形/过去式/过去分词]', # 特殊:发音不同 ('buy', 'bought', 'bought'): '[原形/过去式/过去分词]', ('catch', 'caught', 'caught'): '[原形/过去式/过去分词]', ('fight', 'fought', 'fought'): '[原形/过去式/过去分词]', ('think', 'thought', 'thought'): '[原形/过去式/过去分词]', ('bring', 'brought', 'brought'): '[原形/过去式/过去分词]', ('teach', 'taught', 'taught'): '[原形/过去式/过去分词]', ('sell', 'sold', 'sold'): '[原形/过去式/过去分词]', ('tell', 'told', 'told'): '[原形/过去式/过去分词]', ('feel', 'felt', 'felt'): '[原形/过去式/过去分词]', ('keep', 'kept', 'kept'): '[原形/过去式/过去分词]', ('sleep', 'slept', 'slept'): '[原形/过去式/过去分词]', ('speak', 'spoke', 'spoken'): '[原形/过去式/过去分词]', ('steal', 'stole', 'stolen'): '[原形/过去式/过去分词]', ('wear', 'wore', 'worn'): '[原形/过去式/过去分词]', ('wake', 'woke', 'woken'): '[原形/过去式/过去分词]', ('awake', 'awoke', 'awoken'): '[原形/过去式/过去分词]', ('become', 'became', 'become'): '[原形/过去式/过去分词]', ('come', 'came', 'come'): '[原形/过去式/过去分词]', ('arise', 'arose', 'arisen'): '[原形/过去式/过去分词]', ('arouse', 'aroused', 'aroused'): '[原形/过去式/过去分词]', ('bear', 'bore', 'borne/born'): '[原形/过去式/过去分词]', ('beat', 'beat', 'beaten'): '[原形/过去式/过去分词]', ('bend', 'bent', 'bent'): '[原形/过去式/过去分词]', ('bet', 'bet', 'bet'): '[原形/过去式/过去分词]', ('bind', 'bound', 'bound'): '[原形/过去式/过去分词]', ('bite', 'bit', 'bitten'): '[原形/过去式/过去分词]', ('bleed', 'bled', 'bled'): '[原形/过去式/过去分词]', ('blow', 'blew', 'blown'): '[原形/过去式/过去分词]', ('breed', 'bred', 'bred'): '[原形/过去式/过去分词]', ('build', 'built', 'built'): '[原形/过去式/过去分词]', ('burn', 'burnt/burned', 'burnt/burned'): '[原形/过去式/过去分词]', ('burst', 'burst', 'burst'): '[原形/过去式/过去分词]', ('cast', 'cast', 'cast'): '[原形/过去式/过去分词]', ('cling', 'clung', 'clung'): '[原形/过去式/过去分词]', ('cost', 'cost', 'cost'): '[原形/过去式/过去分词]', ('creep', 'crept', 'crept'): '[原形/过去式/过去分词]', ('deal', 'dealt', 'dealt'): '[原形/过去式/过去分词]', ('dig', 'dug', 'dug'): '[原形/过去式/过去分词]', ('do', 'did', 'done'): '[原形/过去式/过去分词]', ('draw', 'drew', 'drawn'): '[原形/过去式/过去分词]', ('dream', 'dreamt/dreamed', 'dreamt/dreamed'): '[原形/过去式/过去分词]', ('forget', 'forgot', 'forgotten'): '[原形/过去式/过去分词]', } # 检查是否是不规则动词的一部分 found_irregular = False for key, tag in irregular_patterns.items(): if form in key: result = f"{form} {tag}" print(f" '{form}' 匹配不规则动词 {key}: {result}") enhanced_forms.append(result) found_irregular = True break if not found_irregular: print(f" '{form}' 没有匹配任何特殊模式,保持原样") enhanced_forms.append(form) result = ','.join(enhanced_forms) # 使用中文逗号 print(f"最终结果: {result}") return result # 测试 test_input = 'forgot,forgotten,forgetting' print(f"输入: {test_input}") output = test_enhance_verb_inflections(test_input) print(f"\n输出: {output}")