431 lines
24 KiB
Python
431 lines
24 KiB
Python
import re
|
||
|
||
def enhance_inflections():
|
||
# 读取原文件
|
||
with open('外研社七年级上.txt', 'r', encoding='utf-8') as f:
|
||
lines = f.readlines()
|
||
|
||
# 分离头部和数据行
|
||
header_lines = []
|
||
data_lines = []
|
||
for line in lines:
|
||
if line.startswith('#'):
|
||
header_lines.append(line)
|
||
else:
|
||
data_lines.append(line.rstrip('\n'))
|
||
|
||
# 处理数据行
|
||
processed_lines = []
|
||
for line in data_lines:
|
||
if not line.strip():
|
||
continue
|
||
|
||
parts = line.split('\t')
|
||
if len(parts) < 10: # 确保有足够的列
|
||
processed_lines.append(line)
|
||
continue
|
||
|
||
word = parts[0] # 单词
|
||
pos = parts[2] # 词性
|
||
inflection = parts[4] # 词形变化
|
||
|
||
# 根据词性和词形变化模式添加词性标记
|
||
enhanced_inflection = add_inflection_tags(word, pos, inflection)
|
||
|
||
# 更新第5列(索引4)
|
||
parts[4] = enhanced_inflection
|
||
|
||
# 重新组合行
|
||
new_line = '\t'.join(parts)
|
||
processed_lines.append(new_line)
|
||
|
||
# 写入新文件
|
||
with open('enhanced_外研社七年级上.txt', 'w', encoding='utf-8') as f:
|
||
for header in header_lines:
|
||
f.write(header)
|
||
for line in processed_lines:
|
||
f.write(line + '\n')
|
||
|
||
def add_inflection_tags(word, pos, inflection):
|
||
"""根据词性和词形变化添加词性标记"""
|
||
if not inflection or inflection == word: # 如果没有词形变化或与原词相同
|
||
return inflection
|
||
|
||
# 解析词性字段,可能包含中文解释和英文词性
|
||
# 例如:"醒着的;v. 唤醒" 或 "v. 忘记,遗忘"
|
||
|
||
# 检查是否包含动词标记
|
||
if 'v.' in pos:
|
||
return enhance_verb_inflections(inflection)
|
||
elif 'n.' in pos: # 名词
|
||
return enhance_noun_inflections(inflection)
|
||
elif 'adj.' in pos: # 形容词
|
||
# 特殊情况:某些形容词也可能有动词形式(如awake的awoke, awoken)
|
||
# 检查词形变化中是否包含动词形式
|
||
if contains_verb_forms(inflection):
|
||
return enhance_verb_inflections(inflection)
|
||
else:
|
||
return enhance_adjective_inflections(inflection)
|
||
elif 'adv.' in pos: # 副词
|
||
return enhance_adverb_inflections(inflection)
|
||
else:
|
||
# 默认处理方式
|
||
return enhance_general_inflections(inflection)
|
||
|
||
def contains_verb_forms(inflection):
|
||
"""检查词形变化中是否包含动词形式"""
|
||
if not inflection:
|
||
return False
|
||
|
||
# 检查是否包含常见的动词后缀
|
||
verb_endings = ['ed', 'ing', 's', 'en']# 分割词形变化(可能是中文逗号或英文逗号)
|
||
# 先尝试按中文逗号分割,如果没有则按英文逗号分割
|
||
if ',' in inflection:
|
||
forms = [form.strip() for form in inflection.split(',') if form.strip()]
|
||
else:
|
||
forms = [form.strip() for form in inflection.split(',') if form.strip()]
|
||
|
||
for form in forms:
|
||
# 检查是否是常见的不规则动词
|
||
irregular_verbs = [
|
||
'awoke', 'awoken', 'arose', 'arisen', 'bore', 'born', 'borne',
|
||
'beat', 'beaten', 'became', 'been', 'began', 'begun', 'bent',
|
||
'bit', 'bitten', 'bled', 'blew', 'blown', 'bought', 'brought',
|
||
'caught', 'chose', 'chosen', 'did', 'done', 'drew', 'drawn',
|
||
'drank', 'drunk', 'drove', 'driven', 'ate', 'eaten', 'fell',
|
||
'fallen', 'fed', 'felt', 'fought', 'found', 'fled', 'flew',
|
||
'flown', 'forgot', 'forgotten', 'froze', 'frozen', 'gave',
|
||
'given', 'went', 'gone', 'grew', 'grown', 'hid', 'hidden',
|
||
'hung', 'knelt', 'kept', 'knew', 'known', 'laid', 'led',
|
||
'left', 'lent', 'lay', 'lain', 'lost', 'made', 'meant',
|
||
'met', 'paid', 'ran', 'rang', 'rung', 'rose', 'risen',
|
||
'said', 'saw', 'seen', 'sold', 'sent', 'shook', 'shaken',
|
||
'shone', 'shot', 'showed', 'shown', 'shrank', 'shrunk',
|
||
'sang', 'sung', 'sat', 'sank', 'sunk', 'slid', 'spoke',
|
||
'spoken', 'spent', 'spun', 'spread', 'sprang', 'sprung',
|
||
'stood', 'stole', 'stolen', 'stuck', 'stung', 'stank',
|
||
'stunk', 'swam', 'swum', 'swung', 'took', 'taken', 'taught',
|
||
'tore', 'torn', 'told', 'thought', 'threw', 'thrown',
|
||
'understood', 'woke', 'woken', 'wore', 'worn', 'wove',
|
||
'woven', 'won', 'wound', 'withdrew', 'withdrawn', 'wrote',
|
||
'written'
|
||
]
|
||
|
||
if form.strip() in irregular_verbs:
|
||
return True
|
||
|
||
# 检查常见动词后缀
|
||
form_clean = form.strip().lower()
|
||
for ending in verb_endings:
|
||
if form_clean.endswith(ending) and len(form_clean) > len(ending):
|
||
# 排除一些非动词的常见词
|
||
if form_clean not in ['led', 'fed', 'bed', 'red', 'wed', 'led']:
|
||
return True
|
||
|
||
return False
|
||
|
||
def enhance_verb_inflections(inflection):
|
||
"""增强动词词形变化"""
|
||
if not inflection:
|
||
return inflection
|
||
|
||
# 分割词形变化(可能是中文逗号或英文逗号)
|
||
import re
|
||
# 先尝试按中文逗号分割,如果没有则按英文逗号分割
|
||
if ',' in inflection:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
else:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
enhanced_forms = []
|
||
|
||
for form in forms:
|
||
form = form.strip()
|
||
if not form:
|
||
enhanced_forms.append(form)
|
||
continue
|
||
|
||
# 检测不同的动词形式
|
||
if form.endswith('ed') and len(form) > 2 and form not in ['led', 'fed', 'bed', 'red', 'wed']:
|
||
# 可能是过去式或过去分词
|
||
enhanced_forms.append(f"{form} [过去式/过去分词]")
|
||
elif form.endswith('ing') and len(form) > 3:
|
||
# 现在分词/动名词
|
||
enhanced_forms.append(f"{form} [现在分词/动名词]")
|
||
elif form.endswith('s') and len(form) > 1 and not form.endswith('ss'):
|
||
# 第三人称单数
|
||
enhanced_forms.append(f"{form} [第三人称单数]")
|
||
elif form.endswith('en') and len(form) > 2 and form not in ['been', 'seen', 'gone', 'given', 'taken', 'eaten', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown', 'blown', 'flown', 'known', 'worn', 'torn', 'lain', 'risen', 'fallen', 'broken', 'spoken', 'chosen', 'hidden', 'ridden', 'written', 'driven', 'forgotten', 'drawn', 'grown', 'thrown', 'shown']:
|
||
# 过去分词(不规则)
|
||
enhanced_forms.append(f"{form} [过去分词]")
|
||
elif form in ['did', 'had', 'would', 'could', 'should', 'might', 'must']:
|
||
enhanced_forms.append(f"{form} [助动词]")
|
||
elif form in ['done', 'had', 'been']:
|
||
enhanced_forms.append(f"{form} [过去分词]")
|
||
elif form in ['doing', 'being']:
|
||
enhanced_forms.append(f"{form} [现在分词]")
|
||
elif form in ['do', 'does']:
|
||
enhanced_forms.append(f"{form} [原形/第三人称单数]")
|
||
else:
|
||
# 尝试检测不规则动词
|
||
irregular_patterns = {
|
||
('begin', 'began', 'begun'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('break', 'broke', 'broken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('choose', 'chose', 'chosen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('drink', 'drank', 'drunk'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('drive', 'drove', 'driven'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('eat', 'ate', 'eaten'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('fall', 'fell', 'fallen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('fly', 'flew', 'flown'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('go', 'went', 'gone'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('know', 'knew', 'known'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('see', 'saw', 'seen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('take', 'took', 'taken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('write', 'wrote', 'written'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('sing', 'sang', 'sung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('swim', 'swam', 'swum'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('run', 'ran', 'run'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('cut', 'cut', 'cut'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('put', 'put', 'put'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('read', 'read', 'read'): ('[原形]', '[过去式]', '[过去分词]'), # 特殊:发音不同
|
||
('buy', 'bought', 'bought'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('catch', 'caught', 'caught'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('fight', 'fought', 'fought'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('think', 'thought', 'thought'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('bring', 'brought', 'brought'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('teach', 'taught', 'taught'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('sell', 'sold', 'sold'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('tell', 'told', 'told'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('feel', 'felt', 'felt'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('keep', 'kept', 'kept'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('sleep', 'slept', 'slept'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('speak', 'spoke', 'spoken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('steal', 'stole', 'stolen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('wear', 'wore', 'worn'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('wake', 'woke', 'woken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('awake', 'awoke', 'awoken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('become', 'became', 'become'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('come', 'came', 'come'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('arise', 'arose', 'arisen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('arouse', 'aroused', 'aroused'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('bear', 'bore', 'borne/born'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('beat', 'beat', 'beaten'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('bend', 'bent', 'bent'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('bet', 'bet', 'bet'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('bind', 'bound', 'bound'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('bite', 'bit', 'bitten'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('bleed', 'bled', 'bled'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('blow', 'blew', 'blown'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('breed', 'bred', 'bred'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('build', 'built', 'built'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('burn', 'burnt/burned', 'burnt/burned'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('burst', 'burst', 'burst'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('cast', 'cast', 'cast'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('cling', 'clung', 'clung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('cost', 'cost', 'cost'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('creep', 'crept', 'crept'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('deal', 'dealt', 'dealt'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('dig', 'dug', 'dug'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('do', 'did', 'done'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('draw', 'drew', 'drawn'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('dream', 'dreamt/dreamed', 'dreamt/dreamed'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('forget', 'forgot', 'forgotten'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('forgive', 'forgave', 'forgiven'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('freeze', 'froze', 'frozen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('get', 'got', 'gotten/got'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('give', 'gave', 'given'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('go', 'went', 'gone'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('grind', 'ground', 'ground'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('grow', 'grew', 'grown'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('hang', 'hung', 'hung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('have', 'had', 'had'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('hear', 'heard', 'heard'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('hide', 'hid', 'hidden'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('hit', 'hit', 'hit'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('hold', 'held', 'held'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('hurt', 'hurt', 'hurt'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('keep', 'kept', 'kept'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('kneel', 'knelt', 'knelt'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('know', 'knew', 'known'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('lay', 'laid', 'laid'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('lead', 'led', 'led'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('lean', 'leant/leaned', 'leant/leaned'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('learn', 'learnt/learned', 'learnt/learned'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('leave', 'left', 'left'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('lend', 'lent', 'lent'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('let', 'let', 'let'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('lie', 'lay', 'lain'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('light', 'lit/lighted', 'lit/lighted'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('lose', 'lost', 'lost'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('make', 'made', 'made'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('mean', 'meant', 'meant'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('meet', 'met', 'met'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('overcome', 'overcame', 'overcome'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('pay', 'paid', 'paid'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('quit', 'quit', 'quit'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('read', 'read', 'read'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('ride', 'rode', 'ridden'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('ring', 'rang', 'rung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('rise', 'rose', 'risen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('say', 'said', 'said'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('seek', 'sought', 'sought'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('send', 'sent', 'sent'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('set', 'set', 'set'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('shake', 'shook', 'shaken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('shine', 'shone', 'shone'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('shoot', 'shot', 'shot'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('show', 'showed', 'shown'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('shrink', 'shrank', 'shrunk'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('sing', 'sang', 'sung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('sink', 'sank', 'sunk'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('slide', 'slid', 'slid'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('speak', 'spoke', 'spoken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('speed', 'sped', 'sped'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('spell', 'spelt/spelled', 'spelt/spelled'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('spend', 'spent', 'spent'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('spill', 'spilt/spilled', 'spilt/spilled'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('spin', 'span/spun', 'spun'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('spit', 'spat', 'spat'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('spread', 'spread', 'spread'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('spring', 'sprang', 'sprung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('stand', 'stood', 'stood'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('steal', 'stole', 'stolen'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('stick', 'stuck', 'stuck'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('sting', 'stung', 'stung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('stink', 'stank', 'stunk'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('stride', 'strode', 'stridden'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('strike', 'struck', 'stricken'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('string', 'strung', 'strung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('strive', 'strove', 'striven'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('swear', 'swore', 'sworn'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('sweep', 'swept', 'swept'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('swim', 'swam', 'swum'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('swing', 'swung', 'swung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('tear', 'tore', 'torn'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('throw', 'threw', 'thrown'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('thrust', 'thrust', 'thrust'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('tread', 'trod', 'trodden'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('understand', 'understood', 'understood'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('upset', 'upset', 'upset'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('weave', 'wove', 'woven'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('win', 'won', 'won'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('wind', 'wound', 'wound'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('withdraw', 'withdrew', 'withdrawn'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('wring', 'wrung', 'wrung'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
('write', 'wrote', 'written'): ('[原形]', '[过去式]', '[过去分词]'),
|
||
}
|
||
|
||
# 检查是否是不规则动词的一部分
|
||
found_irregular = False
|
||
for key, tags in irregular_patterns.items():
|
||
if form in key:
|
||
# 获取词在元组中的位置并分配相应标记
|
||
position = key.index(form)
|
||
tag = tags[position]
|
||
enhanced_forms.append(f"{form} {tag}")
|
||
found_irregular = True
|
||
break
|
||
|
||
if not found_irregular:
|
||
enhanced_forms.append(form)
|
||
|
||
return ', '.join(enhanced_forms)
|
||
|
||
def enhance_noun_inflections(inflection):
|
||
"""增强名词词形变化"""
|
||
if not inflection:
|
||
return inflection
|
||
|
||
# 分割词形变化(可能是中文逗号或英文逗号)
|
||
# 先尝试按中文逗号分割,如果没有则按英文逗号分割
|
||
if ',' in inflection:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
else:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
enhanced_forms = []
|
||
|
||
for form in forms:
|
||
form = form.strip()
|
||
if not form:
|
||
enhanced_forms.append(form)
|
||
continue
|
||
|
||
# 检测复数形式
|
||
if form.endswith('s') and len(form) > 1:
|
||
if form.endswith('ies'):
|
||
enhanced_forms.append(f"{form} [复数]")
|
||
elif form.endswith('es'):
|
||
enhanced_forms.append(f"{form} [复数]")
|
||
elif form.endswith('ves'):
|
||
enhanced_forms.append(f"{form} [复数]")
|
||
else:
|
||
enhanced_forms.append(f"{form} [复数]")
|
||
else:
|
||
enhanced_forms.append(form)
|
||
|
||
return ', '.join(enhanced_forms)
|
||
|
||
def enhance_adjective_inflections(inflection):
|
||
"""增强形容词词形变化"""
|
||
if not inflection:
|
||
return inflection
|
||
|
||
# 分割词形变化(可能是中文逗号或英文逗号)
|
||
# 先尝试按中文逗号分割,如果没有则按英文逗号分割
|
||
if ',' in inflection:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
else:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
enhanced_forms = []
|
||
|
||
for form in forms:
|
||
form = form.strip()
|
||
if not form:
|
||
enhanced_forms.append(form)
|
||
continue
|
||
|
||
# 检测比较级和最高级
|
||
if form.endswith('er') and len(form) > 2:
|
||
enhanced_forms.append(f"{form} [比较级]")
|
||
elif form.endswith('est') and len(form) > 3:
|
||
enhanced_forms.append(f"{form} [最高级]")
|
||
else:
|
||
enhanced_forms.append(form)
|
||
|
||
return ', '.join(enhanced_forms)
|
||
|
||
def enhance_adverb_inflections(inflection):
|
||
"""增强副词词形变化"""
|
||
if not inflection:
|
||
return inflection
|
||
|
||
# 分割词形变化(可能是中文逗号或英文逗号)
|
||
# 先尝试按中文逗号分割,如果没有则按英文逗号分割
|
||
if ',' in inflection:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
else:
|
||
forms = [form.strip() for form in inflection.split(',')]
|
||
enhanced_forms = []
|
||
|
||
for form in forms:
|
||
form = form.strip()
|
||
if not form:
|
||
enhanced_forms.append(form)
|
||
continue
|
||
|
||
# 检测比较级和最高级
|
||
if form.endswith('er') and len(form) > 2:
|
||
enhanced_forms.append(f"{form} [比较级]")
|
||
elif form.endswith('est') and len(form) > 3:
|
||
enhanced_forms.append(f"{form} [最高级]")
|
||
else:
|
||
enhanced_forms.append(form)
|
||
|
||
return ', '.join(enhanced_forms)
|
||
|
||
def enhance_general_inflections(inflection):
|
||
"""通用词形变化增强"""
|
||
return inflection # 暂时不做处理
|
||
|
||
if __name__ == '__main__':
|
||
enhance_inflections()
|
||
print("文件已处理完成,输出保存到 'enhanced_外研社七年级上.txt'") |