|
1 #!/usr/bin/env python3 |
|
2 |
|
3 # why isn't this in functools... |
|
4 # https://www.geeksforgeeks.org/function-composition-in-python/ |
|
5 def compose(*func): |
|
6 import functools |
|
7 return functools.reduce(lambda f, g: lambda x: f(g(x)), func, lambda x: x) |
|
8 |
|
9 # mapping of romaji to katakana |
|
10 RAW_KATAKANA_TABLE = { |
|
11 'a': 'ア', |
|
12 'ba': 'バ', |
|
13 'be': 'ベ', |
|
14 'bi': 'ビ', |
|
15 'bo': 'ボ', |
|
16 'bu': 'ブ', |
|
17 'bya': 'ビャ', |
|
18 'byo': 'ビョ', |
|
19 'byu': 'ビュ', |
|
20 'cha': 'チャ', |
|
21 'che': 'チェ', |
|
22 'chi': 'チ', |
|
23 'cho': 'チョ', |
|
24 'chu': 'チュ', |
|
25 'da': 'ダ', |
|
26 'de': 'デ', |
|
27 'di': 'ディ', |
|
28 'do': 'ド', |
|
29 'du': 'ドゥ', |
|
30 'dyu': 'デュ', |
|
31 'e': 'エ', |
|
32 'fa': 'ファ', |
|
33 'fe': 'フェ', |
|
34 'fi': 'フィ', |
|
35 'fo': 'フォ', |
|
36 'fu': 'フ', |
|
37 'fyu': 'フュ', |
|
38 'ga': 'ガ', |
|
39 'ge': 'ゲ', |
|
40 'gi': 'ギ', |
|
41 'go': 'ゴ', |
|
42 'gu': 'グ', |
|
43 'gya': 'ギャ', |
|
44 'gyo': 'ギョ', |
|
45 'gyu': 'ギュ', |
|
46 'ha': 'ハ', |
|
47 'he': 'ヘ', |
|
48 'hi': 'ヒ', |
|
49 'ho': 'ホ', |
|
50 'hya': 'ヒャ', |
|
51 'hyo': 'ヒョ', |
|
52 'hyu': 'ヒュ', |
|
53 'i': 'イ', |
|
54 'ja': 'ジャ', |
|
55 'je': 'ジェ', |
|
56 'ji': 'ジ', |
|
57 'jo': 'ジョ', |
|
58 'ju': 'ジュ', |
|
59 'ka': 'カ', |
|
60 'ke': 'ケ', |
|
61 'ki': 'キ', |
|
62 'ko': 'コ', |
|
63 'ku': 'ク', |
|
64 'kya': 'キャ', |
|
65 'kyo': 'キョ', |
|
66 'kyu': 'キュ', |
|
67 'ma': 'マ', |
|
68 'me': 'メ', |
|
69 'mi': 'ミ', |
|
70 'mo': 'モ', |
|
71 'mu': 'ム', |
|
72 'mya': 'ミャ', |
|
73 'myo': 'ミョ', |
|
74 'myu': 'ミュ', |
|
75 'na': 'ナ', |
|
76 'ne': 'ネ', |
|
77 'ni': 'ニ', |
|
78 'no': 'ノ', |
|
79 'nu': 'ヌ', |
|
80 'nya': 'ニャ', |
|
81 'nyo': 'ニョ', |
|
82 'nyu': 'ニュ', |
|
83 'n': 'ン', |
|
84 'o': 'オ', |
|
85 'pa': 'パ', |
|
86 'pe': 'ペ', |
|
87 'pi': 'ピ', |
|
88 'po': 'ポ', |
|
89 'pu': 'プ', |
|
90 'pya': 'ピャ', |
|
91 'pyo': 'ピョ', |
|
92 'pyu': 'ピュ', |
|
93 'ra': 'ラ', |
|
94 're': 'レ', |
|
95 'ri': 'リ', |
|
96 'ro': 'ロ', |
|
97 'ru': 'ル', |
|
98 'rya': 'リャ', |
|
99 'ryo': 'リョ', |
|
100 'ryu': 'リュ', |
|
101 'sa': 'サ', |
|
102 'se': 'セ', |
|
103 'so': 'ソ', |
|
104 'su': 'ス', |
|
105 'sha': 'シャ', |
|
106 'she': 'シェ', |
|
107 'shi': 'シ', |
|
108 'sho': 'ショ', |
|
109 'shu': 'シュ', |
|
110 'ta': 'タ', |
|
111 'te': 'テ', |
|
112 'ti': 'ティ', |
|
113 'to': 'ト', |
|
114 'tu': 'トゥ', |
|
115 'tsa': 'ツァ', |
|
116 'tse': 'ツェ', |
|
117 'tso': 'ツォ', |
|
118 'tsu': 'ツ', |
|
119 'tyu': 'テュ', |
|
120 'u': 'ウ', |
|
121 'va': 'ヴァ', |
|
122 've': 'ヴェ', |
|
123 'vi': 'ヴィ', |
|
124 'vo': 'ヴォ', |
|
125 'vu': 'ヴ', |
|
126 'wa': 'ワ', |
|
127 'we': 'ウェ', |
|
128 'wi': 'ウィ', |
|
129 'wo': 'ウォ', |
|
130 'ya': 'ヤ', |
|
131 'ye': 'イェ', |
|
132 'yi': 'ヤィ', |
|
133 'yo': 'ヨ', |
|
134 'yu': 'ユ', |
|
135 'za': 'ザ', |
|
136 'ze': 'ゼ', |
|
137 'zo': 'ゾ', |
|
138 'zu': 'ズ', |
|
139 } |
|
140 |
|
141 def full_katakana_table(raw_table): |
|
142 ''' |
|
143 adds small tsu and long vowel variants to the katakana table |
|
144 ''' |
|
145 from copy import copy |
|
146 katakana = copy(raw_table) |
|
147 katakana['hu'] = katakana['fu'] |
|
148 katakana['si'] = katakana['shi'] |
|
149 # add small tsu versions |
|
150 for latin in copy(list(katakana.keys())): |
|
151 if len(latin) > 1 and latin[0] != 'n': |
|
152 # we do not need a small tsu version for n because n is its own kana |
|
153 katakana[latin[0] + latin] = 'ッ' + katakana[latin] |
|
154 # add long vowel versions |
|
155 for latin in copy(list(katakana.keys())): |
|
156 katakana[latin + latin[-1]] = katakana[latin] + 'ー' |
|
157 return katakana |
|
158 |
|
159 def katakana_keys(kana_table): |
|
160 return sorted(kana_table.keys(), key = len)[::-1] |
|
161 |
|
162 katakana_table = full_katakana_table(RAW_KATAKANA_TABLE) |
|
163 |
|
164 def finnish_to_romaji(finnish): |
|
165 # translates finnish text to Japanese romaji |
|
166 # does not, however, fill in 'u' vowels to consonants, that is done |
|
167 # by the splice_romaji function |
|
168 from re import sub |
|
169 cleanup = lambda str: sub(r'[^a-zåäö]', '', str) |
|
170 return cleanup(finnish |
|
171 .lower() |
|
172 .replace('y', 'u') |
|
173 .replace('w', 'v') |
|
174 .replace('j', 'y') |
|
175 .replace('l', 'r') |
|
176 .replace('ä', 'a') |
|
177 .replace('ö', 'o') |
|
178 .replace('å', 'oo')) |
|
179 |
|
180 def splice_romaji(romaji, keys): |
|
181 while len(romaji) > 0: |
|
182 for key in keys: |
|
183 if romaji.startswith(key): |
|
184 yield key |
|
185 romaji = romaji[len(key):] |
|
186 break |
|
187 else: |
|
188 yield romaji[0] + 'u' |
|
189 romaji = romaji[1:] |
|
190 |
|
191 def splices_to_katakana(splices, katakana_table): |
|
192 to_katakana = lambda romaji: katakana_table[romaji] |
|
193 return ''.join(map(to_katakana, splices)) |
|
194 |
|
195 class Transliterator: |
|
196 def __init__(self): |
|
197 self.cached_katakana_table = full_katakana_table(RAW_KATAKANA_TABLE) |
|
198 self.cached_katakana_keys = katakana_keys(self.cached_katakana_table) |
|
199 def __call__(self, finnish): |
|
200 return compose( |
|
201 lambda k: splices_to_katakana(k, katakana_table = self.cached_katakana_table), |
|
202 lambda k: splice_romaji(k, keys = self.cached_katakana_keys), |
|
203 finnish_to_romaji, |
|
204 )(finnish) |
|
205 def __repr__(self): |
|
206 return 'Transliterator()' |
|
207 |
|
208 transliterate = Transliterator() |