Fri, 05 Feb 2021 12:16:29 +0200
update
0 | 1 | #!/usr/bin/env python3 |
2 | ||
3 | # why isn't this in functools... | |
4 | # https://www.geeksforgeeks.org/function-composition-in-python/ | |
5 | def compose(*func): | |
6 | import functools | |
7 | return functools.reduce(lambda f, g: lambda x: f(g(x)), func, lambda x: x) | |
8 | ||
9 | # mapping of romaji to katakana | |
10 | RAW_KATAKANA_TABLE = { | |
11 | 'a': 'ア', | |
12 | 'ba': 'バ', | |
13 | 'be': 'ベ', | |
14 | 'bi': 'ビ', | |
15 | 'bo': 'ボ', | |
16 | 'bu': 'ブ', | |
17 | 'bya': 'ビャ', | |
18 | 'byo': 'ビョ', | |
19 | 'byu': 'ビュ', | |
20 | 'cha': 'チャ', | |
21 | 'che': 'チェ', | |
22 | 'chi': 'チ', | |
23 | 'cho': 'チョ', | |
24 | 'chu': 'チュ', | |
25 | 'da': 'ダ', | |
26 | 'de': 'デ', | |
27 | 'di': 'ディ', | |
28 | 'do': 'ド', | |
29 | 'du': 'ドゥ', | |
30 | 'dyu': 'デュ', | |
31 | 'e': 'エ', | |
32 | 'fa': 'ファ', | |
33 | 'fe': 'フェ', | |
34 | 'fi': 'フィ', | |
35 | 'fo': 'フォ', | |
36 | 'fu': 'フ', | |
37 | 'fyu': 'フュ', | |
38 | 'ga': 'ガ', | |
39 | 'ge': 'ゲ', | |
40 | 'gi': 'ギ', | |
41 | 'go': 'ゴ', | |
42 | 'gu': 'グ', | |
43 | 'gya': 'ギャ', | |
44 | 'gyo': 'ギョ', | |
45 | 'gyu': 'ギュ', | |
46 | 'ha': 'ハ', | |
47 | 'he': 'ヘ', | |
48 | 'hi': 'ヒ', | |
49 | 'ho': 'ホ', | |
50 | 'hya': 'ヒャ', | |
51 | 'hyo': 'ヒョ', | |
52 | 'hyu': 'ヒュ', | |
53 | 'i': 'イ', | |
54 | 'ja': 'ジャ', | |
55 | 'je': 'ジェ', | |
56 | 'ji': 'ジ', | |
57 | 'jo': 'ジョ', | |
58 | 'ju': 'ジュ', | |
59 | 'ka': 'カ', | |
60 | 'ke': 'ケ', | |
61 | 'ki': 'キ', | |
62 | 'ko': 'コ', | |
63 | 'ku': 'ク', | |
64 | 'kya': 'キャ', | |
65 | 'kyo': 'キョ', | |
66 | 'kyu': 'キュ', | |
67 | 'ma': 'マ', | |
68 | 'me': 'メ', | |
69 | 'mi': 'ミ', | |
70 | 'mo': 'モ', | |
71 | 'mu': 'ム', | |
72 | 'mya': 'ミャ', | |
73 | 'myo': 'ミョ', | |
74 | 'myu': 'ミュ', | |
75 | 'na': 'ナ', | |
76 | 'ne': 'ネ', | |
77 | 'ni': 'ニ', | |
78 | 'no': 'ノ', | |
79 | 'nu': 'ヌ', | |
80 | 'nya': 'ニャ', | |
81 | 'nyo': 'ニョ', | |
82 | 'nyu': 'ニュ', | |
83 | 'n': 'ン', | |
84 | 'o': 'オ', | |
85 | 'pa': 'パ', | |
86 | 'pe': 'ペ', | |
87 | 'pi': 'ピ', | |
88 | 'po': 'ポ', | |
89 | 'pu': 'プ', | |
90 | 'pya': 'ピャ', | |
91 | 'pyo': 'ピョ', | |
92 | 'pyu': 'ピュ', | |
93 | 'ra': 'ラ', | |
94 | 're': 'レ', | |
95 | 'ri': 'リ', | |
96 | 'ro': 'ロ', | |
97 | 'ru': 'ル', | |
98 | 'rya': 'リャ', | |
99 | 'ryo': 'リョ', | |
100 | 'ryu': 'リュ', | |
101 | 'sa': 'サ', | |
102 | 'se': 'セ', | |
103 | 'so': 'ソ', | |
104 | 'su': 'ス', | |
105 | 'sha': 'シャ', | |
106 | 'she': 'シェ', | |
107 | 'shi': 'シ', | |
108 | 'sho': 'ショ', | |
109 | 'shu': 'シュ', | |
110 | 'ta': 'タ', | |
111 | 'te': 'テ', | |
112 | 'ti': 'ティ', | |
113 | 'to': 'ト', | |
114 | 'tu': 'トゥ', | |
115 | 'tsa': 'ツァ', | |
116 | 'tse': 'ツェ', | |
117 | 'tso': 'ツォ', | |
118 | 'tsu': 'ツ', | |
119 | 'tyu': 'テュ', | |
120 | 'u': 'ウ', | |
121 | 'va': 'ヴァ', | |
122 | 've': 'ヴェ', | |
123 | 'vi': 'ヴィ', | |
124 | 'vo': 'ヴォ', | |
125 | 'vu': 'ヴ', | |
126 | 'wa': 'ワ', | |
127 | 'we': 'ウェ', | |
128 | 'wi': 'ウィ', | |
129 | 'wo': 'ウォ', | |
130 | 'ya': 'ヤ', | |
131 | 'ye': 'イェ', | |
132 | 'yi': 'ヤィ', | |
133 | 'yo': 'ヨ', | |
134 | 'yu': 'ユ', | |
135 | 'za': 'ザ', | |
136 | 'ze': 'ゼ', | |
137 | 'zo': 'ゾ', | |
138 | 'zu': 'ズ', | |
139 | } | |
140 | ||
141 | def full_katakana_table(raw_table): | |
142 | ''' | |
143 | adds small tsu and long vowel variants to the katakana table | |
144 | ''' | |
145 | from copy import copy | |
146 | katakana = copy(raw_table) | |
147 | katakana['hu'] = katakana['fu'] | |
148 | katakana['si'] = katakana['shi'] | |
149 | # add small tsu versions | |
150 | for latin in copy(list(katakana.keys())): | |
151 | if len(latin) > 1 and latin[0] != 'n': | |
152 | # we do not need a small tsu version for n because n is its own kana | |
153 | katakana[latin[0] + latin] = 'ッ' + katakana[latin] | |
154 | # add long vowel versions | |
155 | for latin in copy(list(katakana.keys())): | |
1
f9788970fa46
begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
0
diff
changeset
|
156 | if latin != 'n': |
f9788970fa46
begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
0
diff
changeset
|
157 | katakana[latin + latin[-1]] = katakana[latin] + 'ー' |
0 | 158 | return katakana |
159 | ||
160 | def katakana_keys(kana_table): | |
161 | return sorted(kana_table.keys(), key = len)[::-1] | |
162 | ||
163 | def finnish_to_romaji(finnish): | |
164 | # translates finnish text to Japanese romaji | |
165 | # does not, however, fill in 'u' vowels to consonants, that is done | |
166 | # by the splice_romaji function | |
167 | from re import sub | |
168 | cleanup = lambda str: sub(r'[^a-zåäö]', '', str) | |
169 | return cleanup(finnish | |
170 | .lower() | |
171 | .replace('y', 'u') | |
172 | .replace('w', 'v') | |
173 | .replace('j', 'y') | |
174 | .replace('l', 'r') | |
175 | .replace('ä', 'a') | |
176 | .replace('ö', 'o') | |
1
f9788970fa46
begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
0
diff
changeset
|
177 | .replace('x', 'ks') |
f9788970fa46
begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
0
diff
changeset
|
178 | .replace('c', 'k') |
0 | 179 | .replace('å', 'oo')) |
180 | ||
181 | def splice_romaji(romaji, keys): | |
182 | while len(romaji) > 0: | |
183 | for key in keys: | |
184 | if romaji.startswith(key): | |
185 | yield key | |
186 | romaji = romaji[len(key):] | |
187 | break | |
188 | else: | |
189 | yield romaji[0] + 'u' | |
190 | romaji = romaji[1:] | |
191 | ||
192 | def splices_to_katakana(splices, katakana_table): | |
193 | to_katakana = lambda romaji: katakana_table[romaji] | |
194 | return ''.join(map(to_katakana, splices)) | |
195 | ||
196 | class Transliterator: | |
197 | def __init__(self): | |
198 | self.cached_katakana_table = full_katakana_table(RAW_KATAKANA_TABLE) | |
199 | self.cached_katakana_keys = katakana_keys(self.cached_katakana_table) | |
200 | def __call__(self, finnish): | |
201 | return compose( | |
202 | lambda k: splices_to_katakana(k, katakana_table = self.cached_katakana_table), | |
203 | lambda k: splice_romaji(k, keys = self.cached_katakana_keys), | |
204 | finnish_to_romaji, | |
205 | )(finnish) | |
206 | def __repr__(self): | |
207 | return 'Transliterator()' | |
208 | ||
209 | transliterate = Transliterator() |