# Old Hungarian script (ISO 15924 code: Hung) # Transliterate numbers and words # convert words with traditional or foreign "i" written as "y" # e.g. Áprily, Champs-Élysées, Élysée-palota, Dolly, Folly, Hollywood, jolly... "^(Áp​?ri​?l|Champs-Él|[cC]i​?t|Do​?lák-Sa​?l|[dfhjDFHJ]ol​?l|Él|Fesz​?t|[gG]rizz​?l|Ha​?rasz​?t|Hat​?va​?n|Husz​?t|[iI]n​?ter​?ci​?t|Kéth​?l|Ku​?ko​?r​?el​?l|Mind​?szen​?t|Nosz​?t|[pP]en​n|Pes​?t|Re​?gu​?l|So​?n|Szi​?l|Szte​?va​?no​?vi​?t|Thö​?kö​?l|Vö​?rös​?mar​?t|[zZ][lł]ot)y(.*) 0$" $1𐳐$2 "^(ÁP​?RI​?L|CHAMPS-ÉL|CI​?T|DO​?LÁK-SA​?L|[DFHJ]OL​?L|ÉL|FESZ​?T|GRIZZ​?L|HA​?RASZ​?T|HAT​?VA​?N|HUSZ​?T|IN​?TER​?CI​?T|KÉTH​?L|KU​?KO​?R​?EL​?L|MIND​?SZEN​?T|NOSZ​?T|PEN​?N|PES​?T|RE​GU​?L|SO​N|SZI​?L|SZTE​?VA​?NO​?VI​?T|THÖ​?KÖ​?L|VÖ​?RÖS​?MAR​?T|Z[LŁ]OT)Y(.*) 0$" $1𐲐$2 # if the original word contains an unknown character, return without modification "^(.*[^-0-9a-zA-ZáéëóöőúüűÁÉËÓÖŐÚÜŰ​–,„”\?\;]) 0$" \1 # words with y "^y(ard.*) 0$" 𐳒$1 "^Y([aA][rR][dD].*|[uU]​[cC][oO][nN].*) 0$" 𐲒$1 "^Y([bB][lL].*) 0$" 𐲑$1 "^Y(vet​?te.*) 0$" 𐲐$1 "^([bB]o|[cC]ow​bo|[dD]isp​la|[gG]ra|[pP]la)y(.*) 0$" $1𐳒$2 "^(BO|COW​BO|DISP​LA|GRA|PLA)Y(.*) 0$" $1𐲒$2 # don't transliterate other words with starting y "(^[yY].*) 0$" \1 # don't transliterate words with q, but not with qu "(^.*[qQ][^uU].*) 0$" \1 # avoid of exceeding recursion depth # convert by 200-character parts (.{200})(.+) $1$2 # numbers # remove space separated zero (in LibreOffice integration) "(\d+) 0" $1 "0: (.*) (.*)" "1: (.*) (.*)" \1 "2: (.*) (.*)" \1\1 "3: (.*) (.*)" \1\1\1 "4: (.*) (.*)" \1\1\1\1 "5: (.*) (.*)" \2 "6: (.*) (.*)" \2\1 "7: (.*) (.*)" \2\1\1 "8: (.*) (.*)" \2\1\1\1 "9: (.*) (.*)" \2\1\1\1\1 (\d) $(\1: 𐳺 𐳻) (\d)(\d) $(\1: 𐳼 𐳽)$2 1(\d\d) $1𐳾 (\d)(\d\d) $1𐳾$2 1(\d\d\d)$ $1𐳿 (\d{1,3})(\d\d\d) $1𐳿$2 1(\d{6})$ $1𐳿𐳿 (\d{1,3})(\d{6}) $1𐳿𐳿$2 1(\d{9})$ $1𐳿𐳿𐳿 (\d{1,3})(\d{9}) $1𐳿𐳿𐳿$2 # numbers with letters, for example dates with affixes "(\d+)([^ ]+)" $1$2 # letters "^(.*) 0$" $1 a(.*) 𐳀$1 A(.*) 𐲀$1 á(.*) 𐳁$1 Á(.*) 𐲁$1 b(.*) 𐳂$1 B(.*) 𐲂$1 ccs(.*) 𐳆𐳆$1 CCS(.*) 𐲆𐲆$1 cs(.*) 𐳆$1 C[sS](.*) 𐲆$1 c(.*) 𐳄$1 C(.*) 𐲄$1 d(.*) 𐳇$1 D(.*) 𐲇$1 e(.*) 𐳉$1 E(.*) 𐲉$1 é(.*) 𐳋$1 É(.*) 𐲋$1 ä(.*) 𐳋$1 Ä(.*) 𐲋$1 ë(.*) 𐳊$1 Ë(.*) 𐲊$1 f(.*) 𐳌$1 F(.*) 𐲌$1 ggy(.*) 𐳎𐳎$1 GGY(.*) 𐲎𐲎$1 gy(.*) 𐳎$1 G[yY](.*) 𐲎$1 g(.*) 𐳍$1 G(.*) 𐲍$1 h(.*) 𐳏$1 H(.*) 𐲏$1 i(.*) 𐳐$1 I(.*) 𐲐$1 í(.*) 𐳑$1 Í(.*) 𐲑$1 j(.*) 𐳒$1 J(.*) 𐲒$1 k(.*) 𐳓$1 K(.*) 𐲓$1 lly(.*) 𐳗𐳗$1 LLY(.*) 𐲗𐲗$1 ly(.*) 𐳗$1 L[yY](.*) 𐲗$1 l(.*) 𐳖$1 L(.*) 𐲖$1 m(.*) 𐳘$1 M(.*) 𐲘$1 nny(.*) 𐳚𐳚$1 NNY(.*) 𐲚𐲚$1 ny(.*) 𐳚$1 N[ny](.*) 𐲚$1 n(.*) 𐳙$1 N(.*) 𐲙$1 o(.*) 𐳛$1 O(.*) 𐲛$1 ó(.*) 𐳜$1 Ó(.*) 𐲜$1 ö(.*) 𐳞$1 Ö(.*) 𐲞$1 ő(.*) 𐳟$1 Ő(.*) 𐲟$1 p(.*) 𐳠$1 P(.*) 𐲠$1 qu(.*) 𐳓𐳮$1 # qu->kv Qu(.*) 𐲓𐳮$1 # Qu->Kv QU(.*) 𐲓𐲮$1 # QU->KV r(.*) 𐳢$1 R(.*) 𐲢$1 ssz(.*) 𐳥𐳥$1 SSZ(.*) 𐲥𐲥$1 sz(.*) 𐳥$1 S[zZ](.*) 𐲥$1 sch(.*) 𐳤$1 Sch(.*) 𐲤$1 s(.*) 𐳤$1 S(.*) 𐲤$1 tty(.*) 𐳨𐳨$1 TTY(.*) 𐲨𐲨$1 ty(.*) 𐳨$1 T[yY](.*) 𐲨$1 t(.*) 𐳦$1 T(.*) 𐲦$1 u(.*) 𐳪$1 U(.*) 𐲪$1 ú(.*) 𐳫$1 Ú(.*) 𐲫$1 ü(.*) 𐳭$1 Ü(.*) 𐲭$1 ű(.*) 𐳬$1 Ű(.*) 𐲬$1 [vw](.*) 𐳮$1 [VW](.*) 𐲮$1 x(.*) 𐳓𐳥$1 # x->ksz ^X 𐲓𐳥$1 # X->KSz X 𐲓𐲥$1 # X->KSZ X([A-ZÁÉËÍÓÖŐÚÜŰ].*) 𐲓𐲥$1 # X->KSZ X(.*) 𐲓𐳥$1 # X->Ksz y(.*) 𐳐$1 # .+y->i Y(.*) 𐲐$1 # .+Y->I zzs(.*) 𐳰𐳰$1 ZZS(.*) 𐲰𐲰$1 zs(.*) 𐳰$1 Z[sS](.*) 𐲰$1 z(.*) 𐳯$1 Z(.*) 𐲯$1 # remove ZWSP (used for consonant disambiguation) ​(.*) $1 # punctuation ”(.*) ‟$1 \;(.*) ⁏$1 \?(.*) ⸮$1 ,(.*) ⹁$1 „(.*) ⹂$1 # don't modify unknown characters (.)(.*) \1$2 (.*) \1