1from typing import Callable
2
3from labml.configs import BaseConfigs, option6class TokenizerConfigs(BaseConfigs):13 tokenizer: Callable = 'character'15 def __init__(self):
16 super().__init__(_primary='tokenizer')19@option(TokenizerConfigs.tokenizer)
20def basic_english():34 from torchtext.data import get_tokenizer
35 return get_tokenizer('basic_english')38def character_tokenizer(x: str):42 return list(x)角色级别分词器配置
45@option(TokenizerConfigs.tokenizer)
46def character():50 return character_tokenizer