181 lines
4.1 KiB
Java
181 lines
4.1 KiB
Java
![]() |
# Old Hungarian script (ISO 15924 code: Hung)
|
|||
|
|
|||
|
# Transliterate numbers and words
|
|||
|
|
|||
|
# convert words with traditional or foreign "i" written as "y"
|
|||
|
# e.g. Áprily, Champs-Élysées, Élysée-palota, Dolly, Folly, Hollywood, jolly...
|
|||
|
"^(Áp?ri?l|Champs-Él|[cC]i?t|Do?lák-Sa?l|[dfhjDFHJ]ol?l|Él|Fesz?t|[gG]rizz?l|Ha?rasz?t|Hat?va?n|Husz?t|[iI]n?ter?ci?t|Kéth?l|Ku?ko?r?el?l|Mind?szen?t|Nosz?t|[pP]enn|Pes?t|Re?gu?l|So?n|Szi?l|Szte?va?no?vi?t|Thö?kö?l|Vö?rös?mar?t|[zZ][lł]ot)y(.*) 0$" $1𐳐$2
|
|||
|
"^(ÁP?RI?L|CHAMPS-ÉL|CI?T|DO?LÁK-SA?L|[DFHJ]OL?L|ÉL|FESZ?T|GRIZZ?L|HA?RASZ?T|HAT?VA?N|HUSZ?T|IN?TER?CI?T|KÉTH?L|KU?KO?R?EL?L|MIND?SZEN?T|NOSZ?T|PEN?N|PES?T|REGU?L|SON|SZI?L|SZTE?VA?NO?VI?T|THÖ?KÖ?L|VÖ?RÖS?MAR?T|Z[LŁ]OT)Y(.*) 0$" $1𐲐$2
|
|||
|
# if the original word contains an unknown character, return without modification
|
|||
|
"^(.*[^-0-9a-zA-ZáéëóöőúüűÁÉËÓÖŐÚÜŰ–,„”\?\;]) 0$" \1
|
|||
|
# words with y
|
|||
|
"^y(ard.*) 0$" 𐳒$1
|
|||
|
"^Y([aA][rR][dD].*|[uU][cC][oO][nN].*) 0$" 𐲒$1
|
|||
|
"^Y([bB][lL].*) 0$" 𐲑$1
|
|||
|
"^Y(vet?te.*) 0$" 𐲐$1
|
|||
|
"^([bB]o|[cC]owbo|[dD]ispla|[gG]ra|[pP]la)y(.*) 0$" $1𐳒$2
|
|||
|
"^(BO|COWBO|DISPLA|GRA|PLA)Y(.*) 0$" $1𐲒$2
|
|||
|
# don't transliterate other words with starting y
|
|||
|
"(^[yY].*) 0$" \1
|
|||
|
# don't transliterate words with q, but not with qu
|
|||
|
"(^.*[qQ][^uU].*) 0$" \1
|
|||
|
|
|||
|
# avoid of exceeding recursion depth
|
|||
|
# convert by 200-character parts
|
|||
|
(.{200})(.+) $1$2
|
|||
|
|
|||
|
# numbers
|
|||
|
|
|||
|
# remove space separated zero (in LibreOffice integration)
|
|||
|
"(\d+) 0" $1
|
|||
|
|
|||
|
"0: (.*) (.*)"
|
|||
|
"1: (.*) (.*)" \1
|
|||
|
"2: (.*) (.*)" \1\1
|
|||
|
"3: (.*) (.*)" \1\1\1
|
|||
|
"4: (.*) (.*)" \1\1\1\1
|
|||
|
"5: (.*) (.*)" \2
|
|||
|
"6: (.*) (.*)" \2\1
|
|||
|
"7: (.*) (.*)" \2\1\1
|
|||
|
"8: (.*) (.*)" \2\1\1\1
|
|||
|
"9: (.*) (.*)" \2\1\1\1\1
|
|||
|
|
|||
|
(\d) $(\1: 𐳺 𐳻)
|
|||
|
(\d)(\d) $(\1: 𐳼 𐳽)$2
|
|||
|
1(\d\d) $1𐳾
|
|||
|
(\d)(\d\d) $1𐳾$2
|
|||
|
1(\d\d\d)$ $1𐳿
|
|||
|
(\d{1,3})(\d\d\d) $1𐳿$2
|
|||
|
1(\d{6})$ $1𐳿𐳿
|
|||
|
(\d{1,3})(\d{6}) $1𐳿𐳿$2
|
|||
|
1(\d{9})$ $1𐳿𐳿𐳿
|
|||
|
(\d{1,3})(\d{9}) $1𐳿𐳿𐳿$2
|
|||
|
|
|||
|
# numbers with letters, for example dates with affixes
|
|||
|
|
|||
|
"(\d+)([^ ]+)" $1$2
|
|||
|
|
|||
|
# letters
|
|||
|
|
|||
|
"^(.*) 0$" $1
|
|||
|
a(.*) 𐳀$1
|
|||
|
A(.*) 𐲀$1
|
|||
|
á(.*) 𐳁$1
|
|||
|
Á(.*) 𐲁$1
|
|||
|
b(.*) 𐳂$1
|
|||
|
B(.*) 𐲂$1
|
|||
|
ccs(.*) 𐳆𐳆$1
|
|||
|
CCS(.*) 𐲆𐲆$1
|
|||
|
cs(.*) 𐳆$1
|
|||
|
C[sS](.*) 𐲆$1
|
|||
|
c(.*) 𐳄$1
|
|||
|
C(.*) 𐲄$1
|
|||
|
d(.*) 𐳇$1
|
|||
|
D(.*) 𐲇$1
|
|||
|
e(.*) 𐳉$1
|
|||
|
E(.*) 𐲉$1
|
|||
|
é(.*) 𐳋$1
|
|||
|
É(.*) 𐲋$1
|
|||
|
ä(.*) 𐳋$1
|
|||
|
Ä(.*) 𐲋$1
|
|||
|
ë(.*) 𐳊$1
|
|||
|
Ë(.*) 𐲊$1
|
|||
|
f(.*) 𐳌$1
|
|||
|
F(.*) 𐲌$1
|
|||
|
ggy(.*) 𐳎𐳎$1
|
|||
|
GGY(.*) 𐲎𐲎$1
|
|||
|
gy(.*) 𐳎$1
|
|||
|
G[yY](.*) 𐲎$1
|
|||
|
g(.*) 𐳍$1
|
|||
|
G(.*) 𐲍$1
|
|||
|
h(.*) 𐳏$1
|
|||
|
H(.*) 𐲏$1
|
|||
|
i(.*) 𐳐$1
|
|||
|
I(.*) 𐲐$1
|
|||
|
í(.*) 𐳑$1
|
|||
|
Í(.*) 𐲑$1
|
|||
|
j(.*) 𐳒$1
|
|||
|
J(.*) 𐲒$1
|
|||
|
k(.*) 𐳓$1
|
|||
|
K(.*) 𐲓$1
|
|||
|
lly(.*) 𐳗𐳗$1
|
|||
|
LLY(.*) 𐲗𐲗$1
|
|||
|
ly(.*) 𐳗$1
|
|||
|
L[yY](.*) 𐲗$1
|
|||
|
l(.*) 𐳖$1
|
|||
|
L(.*) 𐲖$1
|
|||
|
m(.*) 𐳘$1
|
|||
|
M(.*) 𐲘$1
|
|||
|
nny(.*) 𐳚𐳚$1
|
|||
|
NNY(.*) 𐲚𐲚$1
|
|||
|
ny(.*) 𐳚$1
|
|||
|
N[ny](.*) 𐲚$1
|
|||
|
n(.*) 𐳙$1
|
|||
|
N(.*) 𐲙$1
|
|||
|
o(.*) 𐳛$1
|
|||
|
O(.*) 𐲛$1
|
|||
|
ó(.*) 𐳜$1
|
|||
|
Ó(.*) 𐲜$1
|
|||
|
ö(.*) 𐳞$1
|
|||
|
Ö(.*) 𐲞$1
|
|||
|
ő(.*) 𐳟$1
|
|||
|
Ő(.*) 𐲟$1
|
|||
|
p(.*) 𐳠$1
|
|||
|
P(.*) 𐲠$1
|
|||
|
qu(.*) 𐳓𐳮$1 # qu->kv
|
|||
|
Qu(.*) 𐲓𐳮$1 # Qu->Kv
|
|||
|
QU(.*) 𐲓𐲮$1 # QU->KV
|
|||
|
r(.*) 𐳢$1
|
|||
|
R(.*) 𐲢$1
|
|||
|
ssz(.*) 𐳥𐳥$1
|
|||
|
SSZ(.*) 𐲥𐲥$1
|
|||
|
sz(.*) 𐳥$1
|
|||
|
S[zZ](.*) 𐲥$1
|
|||
|
sch(.*) 𐳤$1
|
|||
|
Sch(.*) 𐲤$1
|
|||
|
s(.*) 𐳤$1
|
|||
|
S(.*) 𐲤$1
|
|||
|
tty(.*) 𐳨𐳨$1
|
|||
|
TTY(.*) 𐲨𐲨$1
|
|||
|
ty(.*) 𐳨$1
|
|||
|
T[yY](.*) 𐲨$1
|
|||
|
t(.*) 𐳦$1
|
|||
|
T(.*) 𐲦$1
|
|||
|
u(.*) 𐳪$1
|
|||
|
U(.*) 𐲪$1
|
|||
|
ú(.*) 𐳫$1
|
|||
|
Ú(.*) 𐲫$1
|
|||
|
ü(.*) 𐳭$1
|
|||
|
Ü(.*) 𐲭$1
|
|||
|
ű(.*) 𐳬$1
|
|||
|
Ű(.*) 𐲬$1
|
|||
|
[vw](.*) 𐳮$1
|
|||
|
[VW](.*) 𐲮$1
|
|||
|
x(.*) 𐳓𐳥$1 # x->ksz
|
|||
|
^X 𐲓𐳥$1 # X->KSz
|
|||
|
X 𐲓𐲥$1 # X->KSZ
|
|||
|
X([A-ZÁÉËÍÓÖŐÚÜŰ].*) 𐲓𐲥$1 # X->KSZ
|
|||
|
X(.*) 𐲓𐳥$1 # X->Ksz
|
|||
|
y(.*) 𐳐$1 # .+y->i
|
|||
|
Y(.*) 𐲐$1 # .+Y->I
|
|||
|
zzs(.*) 𐳰𐳰$1
|
|||
|
ZZS(.*) 𐲰𐲰$1
|
|||
|
zs(.*) 𐳰$1
|
|||
|
Z[sS](.*) 𐲰$1
|
|||
|
z(.*) 𐳯$1
|
|||
|
Z(.*) 𐲯$1
|
|||
|
|
|||
|
# remove ZWSP (used for consonant disambiguation)
|
|||
|
(.*) $1
|
|||
|
|
|||
|
# punctuation
|
|||
|
”(.*) ‟$1
|
|||
|
\;(.*) ⁏$1
|
|||
|
\?(.*) ⸮$1
|
|||
|
,(.*) ⹁$1
|
|||
|
„(.*) ⹂$1
|
|||
|
|
|||
|
# don't modify unknown characters
|
|||
|
(.)(.*) \1$2
|
|||
|
(.*) \1
|