Wed, 29 Jul 2020 23:45:53 +0300
begin work on bus compiler
#!/usr/bin/env python3 # why isn't this in functools... # https://www.geeksforgeeks.org/function-composition-in-python/ def compose(*func): import functools return functools.reduce(lambda f, g: lambda x: f(g(x)), func, lambda x: x) # mapping of romaji to katakana RAW_KATAKANA_TABLE = { 'a': 'ア', 'ba': 'バ', 'be': 'ベ', 'bi': 'ビ', 'bo': 'ボ', 'bu': 'ブ', 'bya': 'ビャ', 'byo': 'ビョ', 'byu': 'ビュ', 'cha': 'チャ', 'che': 'チェ', 'chi': 'チ', 'cho': 'チョ', 'chu': 'チュ', 'da': 'ダ', 'de': 'デ', 'di': 'ディ', 'do': 'ド', 'du': 'ドゥ', 'dyu': 'デュ', 'e': 'エ', 'fa': 'ファ', 'fe': 'フェ', 'fi': 'フィ', 'fo': 'フォ', 'fu': 'フ', 'fyu': 'フュ', 'ga': 'ガ', 'ge': 'ゲ', 'gi': 'ギ', 'go': 'ゴ', 'gu': 'グ', 'gya': 'ギャ', 'gyo': 'ギョ', 'gyu': 'ギュ', 'ha': 'ハ', 'he': 'ヘ', 'hi': 'ヒ', 'ho': 'ホ', 'hya': 'ヒャ', 'hyo': 'ヒョ', 'hyu': 'ヒュ', 'i': 'イ', 'ja': 'ジャ', 'je': 'ジェ', 'ji': 'ジ', 'jo': 'ジョ', 'ju': 'ジュ', 'ka': 'カ', 'ke': 'ケ', 'ki': 'キ', 'ko': 'コ', 'ku': 'ク', 'kya': 'キャ', 'kyo': 'キョ', 'kyu': 'キュ', 'ma': 'マ', 'me': 'メ', 'mi': 'ミ', 'mo': 'モ', 'mu': 'ム', 'mya': 'ミャ', 'myo': 'ミョ', 'myu': 'ミュ', 'na': 'ナ', 'ne': 'ネ', 'ni': 'ニ', 'no': 'ノ', 'nu': 'ヌ', 'nya': 'ニャ', 'nyo': 'ニョ', 'nyu': 'ニュ', 'n': 'ン', 'o': 'オ', 'pa': 'パ', 'pe': 'ペ', 'pi': 'ピ', 'po': 'ポ', 'pu': 'プ', 'pya': 'ピャ', 'pyo': 'ピョ', 'pyu': 'ピュ', 'ra': 'ラ', 're': 'レ', 'ri': 'リ', 'ro': 'ロ', 'ru': 'ル', 'rya': 'リャ', 'ryo': 'リョ', 'ryu': 'リュ', 'sa': 'サ', 'se': 'セ', 'so': 'ソ', 'su': 'ス', 'sha': 'シャ', 'she': 'シェ', 'shi': 'シ', 'sho': 'ショ', 'shu': 'シュ', 'ta': 'タ', 'te': 'テ', 'ti': 'ティ', 'to': 'ト', 'tu': 'トゥ', 'tsa': 'ツァ', 'tse': 'ツェ', 'tso': 'ツォ', 'tsu': 'ツ', 'tyu': 'テュ', 'u': 'ウ', 'va': 'ヴァ', 've': 'ヴェ', 'vi': 'ヴィ', 'vo': 'ヴォ', 'vu': 'ヴ', 'wa': 'ワ', 'we': 'ウェ', 'wi': 'ウィ', 'wo': 'ウォ', 'ya': 'ヤ', 'ye': 'イェ', 'yi': 'ヤィ', 'yo': 'ヨ', 'yu': 'ユ', 'za': 'ザ', 'ze': 'ゼ', 'zo': 'ゾ', 'zu': 'ズ', } def full_katakana_table(raw_table): ''' adds small tsu and long vowel variants to the katakana table ''' from copy import copy katakana = copy(raw_table) katakana['hu'] = katakana['fu'] katakana['si'] = katakana['shi'] # add small tsu versions for latin in copy(list(katakana.keys())): if len(latin) > 1 and latin[0] != 'n': # we do not need a small tsu version for n because n is its own kana katakana[latin[0] + latin] = 'ッ' + katakana[latin] # add long vowel versions for latin in copy(list(katakana.keys())): if latin != 'n': katakana[latin + latin[-1]] = katakana[latin] + 'ー' return katakana def katakana_keys(kana_table): return sorted(kana_table.keys(), key = len)[::-1] def finnish_to_romaji(finnish): # translates finnish text to Japanese romaji # does not, however, fill in 'u' vowels to consonants, that is done # by the splice_romaji function from re import sub cleanup = lambda str: sub(r'[^a-zåäö]', '', str) return cleanup(finnish .lower() .replace('y', 'u') .replace('w', 'v') .replace('j', 'y') .replace('l', 'r') .replace('ä', 'a') .replace('ö', 'o') .replace('x', 'ks') .replace('c', 'k') .replace('å', 'oo')) def splice_romaji(romaji, keys): while len(romaji) > 0: for key in keys: if romaji.startswith(key): yield key romaji = romaji[len(key):] break else: yield romaji[0] + 'u' romaji = romaji[1:] def splices_to_katakana(splices, katakana_table): to_katakana = lambda romaji: katakana_table[romaji] return ''.join(map(to_katakana, splices)) class Transliterator: def __init__(self): self.cached_katakana_table = full_katakana_table(RAW_KATAKANA_TABLE) self.cached_katakana_keys = katakana_keys(self.cached_katakana_table) def __call__(self, finnish): return compose( lambda k: splices_to_katakana(k, katakana_table = self.cached_katakana_table), lambda k: splice_romaji(k, keys = self.cached_katakana_keys), finnish_to_romaji, )(finnish) def __repr__(self): return 'Transliterator()' transliterate = Transliterator()