katakana.py

changeset 0
659ab465152e
child 1
f9788970fa46
equal deleted inserted replaced
-1:000000000000 0:659ab465152e
1 #!/usr/bin/env python3
2
3 # why isn't this in functools...
4 # https://www.geeksforgeeks.org/function-composition-in-python/
5 def compose(*func):
6 import functools
7 return functools.reduce(lambda f, g: lambda x: f(g(x)), func, lambda x: x)
8
9 # mapping of romaji to katakana
10 RAW_KATAKANA_TABLE = {
11 'a': 'ア',
12 'ba': 'バ',
13 'be': 'ベ',
14 'bi': 'ビ',
15 'bo': 'ボ',
16 'bu': 'ブ',
17 'bya': 'ビャ',
18 'byo': 'ビョ',
19 'byu': 'ビュ',
20 'cha': 'チャ',
21 'che': 'チェ',
22 'chi': 'チ',
23 'cho': 'チョ',
24 'chu': 'チュ',
25 'da': 'ダ',
26 'de': 'デ',
27 'di': 'ディ',
28 'do': 'ド',
29 'du': 'ドゥ',
30 'dyu': 'デュ',
31 'e': 'エ',
32 'fa': 'ファ',
33 'fe': 'フェ',
34 'fi': 'フィ',
35 'fo': 'フォ',
36 'fu': 'フ',
37 'fyu': 'フュ',
38 'ga': 'ガ',
39 'ge': 'ゲ',
40 'gi': 'ギ',
41 'go': 'ゴ',
42 'gu': 'グ',
43 'gya': 'ギャ',
44 'gyo': 'ギョ',
45 'gyu': 'ギュ',
46 'ha': 'ハ',
47 'he': 'ヘ',
48 'hi': 'ヒ',
49 'ho': 'ホ',
50 'hya': 'ヒャ',
51 'hyo': 'ヒョ',
52 'hyu': 'ヒュ',
53 'i': 'イ',
54 'ja': 'ジャ',
55 'je': 'ジェ',
56 'ji': 'ジ',
57 'jo': 'ジョ',
58 'ju': 'ジュ',
59 'ka': 'カ',
60 'ke': 'ケ',
61 'ki': 'キ',
62 'ko': 'コ',
63 'ku': 'ク',
64 'kya': 'キャ',
65 'kyo': 'キョ',
66 'kyu': 'キュ',
67 'ma': 'マ',
68 'me': 'メ',
69 'mi': 'ミ',
70 'mo': 'モ',
71 'mu': 'ム',
72 'mya': 'ミャ',
73 'myo': 'ミョ',
74 'myu': 'ミュ',
75 'na': 'ナ',
76 'ne': 'ネ',
77 'ni': 'ニ',
78 'no': 'ノ',
79 'nu': 'ヌ',
80 'nya': 'ニャ',
81 'nyo': 'ニョ',
82 'nyu': 'ニュ',
83 'n': 'ン',
84 'o': 'オ',
85 'pa': 'パ',
86 'pe': 'ペ',
87 'pi': 'ピ',
88 'po': 'ポ',
89 'pu': 'プ',
90 'pya': 'ピャ',
91 'pyo': 'ピョ',
92 'pyu': 'ピュ',
93 'ra': 'ラ',
94 're': 'レ',
95 'ri': 'リ',
96 'ro': 'ロ',
97 'ru': 'ル',
98 'rya': 'リャ',
99 'ryo': 'リョ',
100 'ryu': 'リュ',
101 'sa': 'サ',
102 'se': 'セ',
103 'so': 'ソ',
104 'su': 'ス',
105 'sha': 'シャ',
106 'she': 'シェ',
107 'shi': 'シ',
108 'sho': 'ショ',
109 'shu': 'シュ',
110 'ta': 'タ',
111 'te': 'テ',
112 'ti': 'ティ',
113 'to': 'ト',
114 'tu': 'トゥ',
115 'tsa': 'ツァ',
116 'tse': 'ツェ',
117 'tso': 'ツォ',
118 'tsu': 'ツ',
119 'tyu': 'テュ',
120 'u': 'ウ',
121 'va': 'ヴァ',
122 've': 'ヴェ',
123 'vi': 'ヴィ',
124 'vo': 'ヴォ',
125 'vu': 'ヴ',
126 'wa': 'ワ',
127 'we': 'ウェ',
128 'wi': 'ウィ',
129 'wo': 'ウォ',
130 'ya': 'ヤ',
131 'ye': 'イェ',
132 'yi': 'ヤィ',
133 'yo': 'ヨ',
134 'yu': 'ユ',
135 'za': 'ザ',
136 'ze': 'ゼ',
137 'zo': 'ゾ',
138 'zu': 'ズ',
139 }
140
141 def full_katakana_table(raw_table):
142 '''
143 adds small tsu and long vowel variants to the katakana table
144 '''
145 from copy import copy
146 katakana = copy(raw_table)
147 katakana['hu'] = katakana['fu']
148 katakana['si'] = katakana['shi']
149 # add small tsu versions
150 for latin in copy(list(katakana.keys())):
151 if len(latin) > 1 and latin[0] != 'n':
152 # we do not need a small tsu version for n because n is its own kana
153 katakana[latin[0] + latin] = 'ッ' + katakana[latin]
154 # add long vowel versions
155 for latin in copy(list(katakana.keys())):
156 katakana[latin + latin[-1]] = katakana[latin] + 'ー'
157 return katakana
158
159 def katakana_keys(kana_table):
160 return sorted(kana_table.keys(), key = len)[::-1]
161
162 katakana_table = full_katakana_table(RAW_KATAKANA_TABLE)
163
164 def finnish_to_romaji(finnish):
165 # translates finnish text to Japanese romaji
166 # does not, however, fill in 'u' vowels to consonants, that is done
167 # by the splice_romaji function
168 from re import sub
169 cleanup = lambda str: sub(r'[^a-zåäö]', '', str)
170 return cleanup(finnish
171 .lower()
172 .replace('y', 'u')
173 .replace('w', 'v')
174 .replace('j', 'y')
175 .replace('l', 'r')
176 .replace('ä', 'a')
177 .replace('ö', 'o')
178 .replace('å', 'oo'))
179
180 def splice_romaji(romaji, keys):
181 while len(romaji) > 0:
182 for key in keys:
183 if romaji.startswith(key):
184 yield key
185 romaji = romaji[len(key):]
186 break
187 else:
188 yield romaji[0] + 'u'
189 romaji = romaji[1:]
190
191 def splices_to_katakana(splices, katakana_table):
192 to_katakana = lambda romaji: katakana_table[romaji]
193 return ''.join(map(to_katakana, splices))
194
195 class Transliterator:
196 def __init__(self):
197 self.cached_katakana_table = full_katakana_table(RAW_KATAKANA_TABLE)
198 self.cached_katakana_keys = katakana_keys(self.cached_katakana_table)
199 def __call__(self, finnish):
200 return compose(
201 lambda k: splices_to_katakana(k, katakana_table = self.cached_katakana_table),
202 lambda k: splice_romaji(k, keys = self.cached_katakana_keys),
203 finnish_to_romaji,
204 )(finnish)
205 def __repr__(self):
206 return 'Transliterator()'
207
208 transliterate = Transliterator()

mercurial