katakana.py

Thu, 05 Nov 2020 14:52:50 +0200

author
Teemu Piippo <teemu@hecknology.net>
date
Thu, 05 Nov 2020 14:52:50 +0200
changeset 3
10ce28475e9c
parent 1
f9788970fa46
permissions
-rw-r--r--

things

0
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
1 #!/usr/bin/env python3
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
2
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
3 # why isn't this in functools...
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
4 # https://www.geeksforgeeks.org/function-composition-in-python/
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
5 def compose(*func):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
6 import functools
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
7 return functools.reduce(lambda f, g: lambda x: f(g(x)), func, lambda x: x)
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
8
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
9 # mapping of romaji to katakana
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
10 RAW_KATAKANA_TABLE = {
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
11 'a': 'ア',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
12 'ba': 'バ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
13 'be': 'ベ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
14 'bi': 'ビ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
15 'bo': 'ボ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
16 'bu': 'ブ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
17 'bya': 'ビャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
18 'byo': 'ビョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
19 'byu': 'ビュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
20 'cha': 'チャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
21 'che': 'チェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
22 'chi': 'チ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
23 'cho': 'チョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
24 'chu': 'チュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
25 'da': 'ダ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
26 'de': 'デ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
27 'di': 'ディ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
28 'do': 'ド',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
29 'du': 'ドゥ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
30 'dyu': 'デュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
31 'e': 'エ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
32 'fa': 'ファ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
33 'fe': 'フェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
34 'fi': 'フィ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
35 'fo': 'フォ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
36 'fu': 'フ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
37 'fyu': 'フュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
38 'ga': 'ガ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
39 'ge': 'ゲ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
40 'gi': 'ギ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
41 'go': 'ゴ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
42 'gu': 'グ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
43 'gya': 'ギャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
44 'gyo': 'ギョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
45 'gyu': 'ギュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
46 'ha': 'ハ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
47 'he': 'ヘ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
48 'hi': 'ヒ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
49 'ho': 'ホ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
50 'hya': 'ヒャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
51 'hyo': 'ヒョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
52 'hyu': 'ヒュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
53 'i': 'イ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
54 'ja': 'ジャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
55 'je': 'ジェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
56 'ji': 'ジ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
57 'jo': 'ジョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
58 'ju': 'ジュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
59 'ka': 'カ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
60 'ke': 'ケ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
61 'ki': 'キ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
62 'ko': 'コ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
63 'ku': 'ク',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
64 'kya': 'キャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
65 'kyo': 'キョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
66 'kyu': 'キュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
67 'ma': 'マ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
68 'me': 'メ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
69 'mi': 'ミ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
70 'mo': 'モ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
71 'mu': 'ム',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
72 'mya': 'ミャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
73 'myo': 'ミョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
74 'myu': 'ミュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
75 'na': 'ナ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
76 'ne': 'ネ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
77 'ni': 'ニ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
78 'no': 'ノ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
79 'nu': 'ヌ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
80 'nya': 'ニャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
81 'nyo': 'ニョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
82 'nyu': 'ニュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
83 'n': 'ン',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
84 'o': 'オ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
85 'pa': 'パ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
86 'pe': 'ペ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
87 'pi': 'ピ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
88 'po': 'ポ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
89 'pu': 'プ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
90 'pya': 'ピャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
91 'pyo': 'ピョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
92 'pyu': 'ピュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
93 'ra': 'ラ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
94 're': 'レ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
95 'ri': 'リ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
96 'ro': 'ロ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
97 'ru': 'ル',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
98 'rya': 'リャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
99 'ryo': 'リョ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
100 'ryu': 'リュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
101 'sa': 'サ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
102 'se': 'セ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
103 'so': 'ソ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
104 'su': 'ス',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
105 'sha': 'シャ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
106 'she': 'シェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
107 'shi': 'シ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
108 'sho': 'ショ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
109 'shu': 'シュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
110 'ta': 'タ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
111 'te': 'テ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
112 'ti': 'ティ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
113 'to': 'ト',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
114 'tu': 'トゥ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
115 'tsa': 'ツァ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
116 'tse': 'ツェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
117 'tso': 'ツォ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
118 'tsu': 'ツ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
119 'tyu': 'テュ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
120 'u': 'ウ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
121 'va': 'ヴァ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
122 've': 'ヴェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
123 'vi': 'ヴィ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
124 'vo': 'ヴォ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
125 'vu': 'ヴ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
126 'wa': 'ワ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
127 'we': 'ウェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
128 'wi': 'ウィ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
129 'wo': 'ウォ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
130 'ya': 'ヤ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
131 'ye': 'イェ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
132 'yi': 'ヤィ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
133 'yo': 'ヨ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
134 'yu': 'ユ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
135 'za': 'ザ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
136 'ze': 'ゼ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
137 'zo': 'ゾ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
138 'zu': 'ズ',
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
139 }
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
140
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
141 def full_katakana_table(raw_table):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
142 '''
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
143 adds small tsu and long vowel variants to the katakana table
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
144 '''
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
145 from copy import copy
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
146 katakana = copy(raw_table)
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
147 katakana['hu'] = katakana['fu']
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
148 katakana['si'] = katakana['shi']
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
149 # add small tsu versions
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
150 for latin in copy(list(katakana.keys())):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
151 if len(latin) > 1 and latin[0] != 'n':
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
152 # we do not need a small tsu version for n because n is its own kana
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
153 katakana[latin[0] + latin] = 'ッ' + katakana[latin]
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
154 # add long vowel versions
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
155 for latin in copy(list(katakana.keys())):
1
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents: 0
diff changeset
156 if latin != 'n':
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents: 0
diff changeset
157 katakana[latin + latin[-1]] = katakana[latin] + 'ー'
0
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
158 return katakana
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
159
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
160 def katakana_keys(kana_table):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
161 return sorted(kana_table.keys(), key = len)[::-1]
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
162
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
163 def finnish_to_romaji(finnish):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
164 # translates finnish text to Japanese romaji
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
165 # does not, however, fill in 'u' vowels to consonants, that is done
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
166 # by the splice_romaji function
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
167 from re import sub
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
168 cleanup = lambda str: sub(r'[^a-zåäö]', '', str)
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
169 return cleanup(finnish
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
170 .lower()
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
171 .replace('y', 'u')
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
172 .replace('w', 'v')
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
173 .replace('j', 'y')
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
174 .replace('l', 'r')
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
175 .replace('ä', 'a')
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
176 .replace('ö', 'o')
1
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents: 0
diff changeset
177 .replace('x', 'ks')
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents: 0
diff changeset
178 .replace('c', 'k')
0
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
179 .replace('å', 'oo'))
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
180
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
181 def splice_romaji(romaji, keys):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
182 while len(romaji) > 0:
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
183 for key in keys:
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
184 if romaji.startswith(key):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
185 yield key
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
186 romaji = romaji[len(key):]
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
187 break
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
188 else:
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
189 yield romaji[0] + 'u'
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
190 romaji = romaji[1:]
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
191
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
192 def splices_to_katakana(splices, katakana_table):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
193 to_katakana = lambda romaji: katakana_table[romaji]
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
194 return ''.join(map(to_katakana, splices))
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
195
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
196 class Transliterator:
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
197 def __init__(self):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
198 self.cached_katakana_table = full_katakana_table(RAW_KATAKANA_TABLE)
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
199 self.cached_katakana_keys = katakana_keys(self.cached_katakana_table)
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
200 def __call__(self, finnish):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
201 return compose(
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
202 lambda k: splices_to_katakana(k, katakana_table = self.cached_katakana_table),
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
203 lambda k: splice_romaji(k, keys = self.cached_katakana_keys),
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
204 finnish_to_romaji,
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
205 )(finnish)
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
206 def __repr__(self):
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
207 return 'Transliterator()'
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
208
659ab465152e initial commit
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
209 transliterate = Transliterator()

mercurial