file-online-preview/server/libreoffice/share/numbertext/hu_Hung.sor

181 lines
4.1 KiB
Java
Raw Normal View History

2021-06-23 02:26:22 +00:00
# Old Hungarian script (ISO 15924 code: Hung)
# Transliterate numbers and words
# convert words with traditional or foreign "i" written as "y"
# e.g. Áprily, Champs-Élysées, Élysée-palota, Dolly, Folly, Hollywood, jolly...
"^(Áp?ri?l|Champs-Él|[cC]i?t|Do?lák-Sa?l|[dfhjDFHJ]ol?l|Él|Fesz?t|[gG]rizz?l|Ha?rasz?t|Hat?va?n|Husz?t|[iI]n?ter?ci?t|Kéth?l|Ku?ko?r?el?l|Mind?szen?t|Nosz?t|[pP]enn|Pes?t|Re?gu?l|So?n|Szi?l|Szte?va?no?vi?t|Thö?kö?l|Vö?rös?mar?t|[zZ][lł]ot)y(.*) 0$" $1𐳐$2
"^(ÁP?RI?L|CHAMPS-ÉL|CI?T|DO?LÁK-SA?L|[DFHJ]OL?L|ÉL|FESZ?T|GRIZZ?L|HA?RASZ?T|HAT?VA?N|HUSZ?T|IN?TER?CI?T|KÉTH?L|KU?KO?R?EL?L|MIND?SZEN?T|NOSZ?T|PEN?N|PES?T|REGU?L|SON|SZI?L|SZTE?VA?NO?VI?T|THÖ?KÖ?L|VÖ?RÖS?MAR?T|Z[LŁ]OT)Y(.*) 0$" $1𐲐$2
# if the original word contains an unknown character, return without modification
"^(.*[^-0-9a-zA-ZáéëóöőúüűÁÉËÓÖŐÚÜŰ,„”\?\;]) 0$" \1
# words with y
"^y(ard.*) 0$" 𐳒$1
"^Y([aA][rR][dD].*|[uU][cC][oO][nN].*) 0$" 𐲒$1
"^Y([bB][lL].*) 0$" 𐲑$1
"^Y(vet?te.*) 0$" 𐲐$1
"^([bB]o|[cC]owbo|[dD]ispla|[gG]ra|[pP]la)y(.*) 0$" $1𐳒$2
"^(BO|COWBO|DISPLA|GRA|PLA)Y(.*) 0$" $1𐲒$2
# don't transliterate other words with starting y
"(^[yY].*) 0$" \1
# don't transliterate words with q, but not with qu
"(^.*[qQ][^uU].*) 0$" \1
# avoid of exceeding recursion depth
# convert by 200-character parts
(.{200})(.+) $1$2
# numbers
# remove space separated zero (in LibreOffice integration)
"(\d+) 0" $1
"0: (.*) (.*)"
"1: (.*) (.*)" \1
"2: (.*) (.*)" \1\1
"3: (.*) (.*)" \1\1\1
"4: (.*) (.*)" \1\1\1\1
"5: (.*) (.*)" \2
"6: (.*) (.*)" \2\1
"7: (.*) (.*)" \2\1\1
"8: (.*) (.*)" \2\1\1\1
"9: (.*) (.*)" \2\1\1\1\1
(\d) $(\1: 𐳺 𐳻)
(\d)(\d) $(\1: 𐳼 𐳽)$2
1(\d\d) $1𐳾
(\d)(\d\d) $1𐳾$2
1(\d\d\d)$ $1𐳿
(\d{1,3})(\d\d\d) $1𐳿$2
1(\d{6})$ $1𐳿𐳿
(\d{1,3})(\d{6}) $1𐳿𐳿$2
1(\d{9})$ $1𐳿𐳿𐳿
(\d{1,3})(\d{9}) $1𐳿𐳿𐳿$2
# numbers with letters, for example dates with affixes
"(\d+)([^ ]+)" $1$2
# letters
"^(.*) 0$" $1
a(.*) 𐳀$1
A(.*) 𐲀$1
á(.*) 𐳁$1
Á(.*) 𐲁$1
b(.*) 𐳂$1
B(.*) 𐲂$1
ccs(.*) 𐳆𐳆$1
CCS(.*) 𐲆𐲆$1
cs(.*) 𐳆$1
C[sS](.*) 𐲆$1
c(.*) 𐳄$1
C(.*) 𐲄$1
d(.*) 𐳇$1
D(.*) 𐲇$1
e(.*) 𐳉$1
E(.*) 𐲉$1
é(.*) 𐳋$1
É(.*) 𐲋$1
ä(.*) 𐳋$1
Ä(.*) 𐲋$1
ë(.*) 𐳊$1
Ë(.*) 𐲊$1
f(.*) 𐳌$1
F(.*) 𐲌$1
ggy(.*) 𐳎𐳎$1
GGY(.*) 𐲎𐲎$1
gy(.*) 𐳎$1
G[yY](.*) 𐲎$1
g(.*) 𐳍$1
G(.*) 𐲍$1
h(.*) 𐳏$1
H(.*) 𐲏$1
i(.*) 𐳐$1
I(.*) 𐲐$1
í(.*) 𐳑$1
Í(.*) 𐲑$1
j(.*) 𐳒$1
J(.*) 𐲒$1
k(.*) 𐳓$1
K(.*) 𐲓$1
lly(.*) 𐳗𐳗$1
LLY(.*) 𐲗𐲗$1
ly(.*) 𐳗$1
L[yY](.*) 𐲗$1
l(.*) 𐳖$1
L(.*) 𐲖$1
m(.*) 𐳘$1
M(.*) 𐲘$1
nny(.*) 𐳚𐳚$1
NNY(.*) 𐲚𐲚$1
ny(.*) 𐳚$1
N[ny](.*) 𐲚$1
n(.*) 𐳙$1
N(.*) 𐲙$1
o(.*) 𐳛$1
O(.*) 𐲛$1
ó(.*) 𐳜$1
Ó(.*) 𐲜$1
ö(.*) 𐳞$1
Ö(.*) 𐲞$1
ő(.*) 𐳟$1
Ő(.*) 𐲟$1
p(.*) 𐳠$1
P(.*) 𐲠$1
qu(.*) 𐳓𐳮$1 # qu->kv
Qu(.*) 𐲓𐳮$1 # Qu->Kv
QU(.*) 𐲓𐲮$1 # QU->KV
r(.*) 𐳢$1
R(.*) 𐲢$1
ssz(.*) 𐳥𐳥$1
SSZ(.*) 𐲥𐲥$1
sz(.*) 𐳥$1
S[zZ](.*) 𐲥$1
sch(.*) 𐳤$1
Sch(.*) 𐲤$1
s(.*) 𐳤$1
S(.*) 𐲤$1
tty(.*) 𐳨𐳨$1
TTY(.*) 𐲨𐲨$1
ty(.*) 𐳨$1
T[yY](.*) 𐲨$1
t(.*) 𐳦$1
T(.*) 𐲦$1
u(.*) 𐳪$1
U(.*) 𐲪$1
ú(.*) 𐳫$1
Ú(.*) 𐲫$1
ü(.*) 𐳭$1
Ü(.*) 𐲭$1
ű(.*) 𐳬$1
Ű(.*) 𐲬$1
[vw](.*) 𐳮$1
[VW](.*) 𐲮$1
x(.*) 𐳓𐳥$1 # x->ksz
^X 𐲓𐳥$1 # X->KSz
X 𐲓𐲥$1 # X->KSZ
X([A-ZÁÉËÍÓÖŐÚÜŰ].*) 𐲓𐲥$1 # X->KSZ
X(.*) 𐲓𐳥$1 # X->Ksz
y(.*) 𐳐$1 # .+y->i
Y(.*) 𐲐$1 # .+Y->I
zzs(.*) 𐳰𐳰$1
ZZS(.*) 𐲰𐲰$1
zs(.*) 𐳰$1
Z[sS](.*) 𐲰$1
z(.*) 𐳯$1
Z(.*) 𐲯$1
# remove ZWSP (used for consonant disambiguation)
(.*) $1
# punctuation
(.*) $1
\;(.*) $1
\?(.*) $1
,(.*) $1
(.*) $1
# don't modify unknown characters
(.)(.*) \1$2
(.*) \1