181 lines
4.2 KiB
Java
181 lines
4.2 KiB
Java
# This file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to
|
|
# include country and encoding
|
|
#
|
|
# TO-DO: convert to BCP-47
|
|
#
|
|
# guess strings are made as following : language-country-encoding
|
|
#
|
|
# Based on a sample config file for the language models provided with Gertjan
|
|
# van Noords language guesser (http://odur.let.rug.nl/~vannoord/TextCat/)
|
|
#
|
|
# Notes:
|
|
# - Putting the most probable languages at the top of the list
|
|
# improves performance, because this will raise the threshold for
|
|
# likely candidates more quickly.
|
|
#
|
|
# Top 10 http://www.ethnologue.com/ethno_docs/distribution.asp?by=size
|
|
zh-Hans.lm zh-CN-utf8 #zh-Hans
|
|
es.lm es--utf8
|
|
en.lm en--utf8
|
|
ar.lm ar--utf8
|
|
hi.lm hi--utf8
|
|
bn.lm bn--utf8
|
|
pt.lm pt--utf8
|
|
ru.lm ru--utf8
|
|
ja.lm ja--utf8
|
|
de.lm de--utf8
|
|
|
|
ab.lm ab--utf8
|
|
ace.lm ace--utf8
|
|
ada.lm ada--utf8
|
|
af.lm af--utf8
|
|
ak.lm ak--utf8
|
|
alt.lm alt--utf8
|
|
am.lm am--utf8
|
|
arn.lm arn--utf8
|
|
ast.lm ast--utf8
|
|
az.lm az--utf8 #az-Latn
|
|
az-Cyrl.lm az-cyrillic-utf8
|
|
ay.lm ay--utf8
|
|
ban.lm ban--utf8
|
|
be.lm be--utf8
|
|
bem.lm bem--utf8
|
|
bg.lm bg--utf8
|
|
bho.lm bho--utf8
|
|
bi.lm bi--utf8
|
|
bik.lm bik--utf8
|
|
bm.lm bm--utf8
|
|
bo.lm bo--utf8
|
|
br.lm br--utf8
|
|
bs.lm bs--utf8 #Suppress-Script: Latn
|
|
ca.lm ca--utf8
|
|
ckb.lm ckb--utf8
|
|
cs.lm cs--utf8
|
|
cy.lm cy--utf8
|
|
da.lm da--utf8
|
|
dv.lm dv--utf8
|
|
ee.lm ee--utf8
|
|
el.lm el--utf8
|
|
emk-Latn.lm emk-Latn-utf8
|
|
eo.lm eo--utf8
|
|
et.lm et--utf8
|
|
eu.lm eu--utf8
|
|
fa.lm fa--utf8
|
|
fi.lm fi--utf8
|
|
fj.lm fj--utf8
|
|
fo.lm fo--utf8
|
|
fr.lm fr--utf8
|
|
fur.lm fur--utf8
|
|
fy.lm fy--utf8
|
|
ga.lm ga--utf8
|
|
gd.lm gd--utf8
|
|
gl.lm gl--utf8
|
|
grc.lm grc--utf8
|
|
gu.lm gu--utf8
|
|
gug.lm gug--utf8
|
|
gv.lm gv--utf8
|
|
ha-NG.lm ha-NG-utf8
|
|
haw.lm haw-utf8
|
|
he.lm he--utf8
|
|
hil.lm hil--utf8
|
|
hr.lm hr--utf8 #Suppress-Script: Latn
|
|
hsb.lm hsb--utf8
|
|
ht.lm ht--utf8
|
|
hu.lm hu--utf8
|
|
hy.lm hy--utf8
|
|
ia.lm ia--utf8
|
|
id.lm id--utf8
|
|
is.lm is--utf8
|
|
it.lm it--utf8
|
|
ka.lm ka--utf8
|
|
kk.lm kk--utf8
|
|
kl.lm kl--utf8
|
|
km.lm km--utf8
|
|
kn.lm kn--utf8
|
|
kng.lm kng--utf8
|
|
ko.lm ko--utf8
|
|
ktu.lm ktu--utf8
|
|
ky.lm ky--utf8
|
|
la.lm la--utf8
|
|
lb.lm lb--utf8
|
|
lg.lm lg--utf8
|
|
ln.lm ln--utf8
|
|
lo.lm lo--utf8
|
|
lt.lm lt--utf8
|
|
lv.lm lv--utf8
|
|
mai.lm mai--utf8
|
|
mi.lm mi--utf8
|
|
mk.lm mk--utf8
|
|
ml.lm ml--utf8
|
|
mn.lm mn--utf8 #mn-Cyrl
|
|
mos.lm mos--utf8
|
|
mr.lm mr--utf8
|
|
ms.lm ms--utf8 #ms-Latn
|
|
mt.lm mt--utf8
|
|
my.lm my--utf8
|
|
nb.lm nb--utf8
|
|
nds.lm nds--utf8
|
|
ne.lm ne--utf8
|
|
nl.lm nl--utf8
|
|
nn.lm nn--utf8
|
|
nr.lm nr--utf8
|
|
nso.lm nso--utf8
|
|
ny.lm ny--utf8
|
|
oc.lm oc--utf8
|
|
om.lm om--utf8
|
|
pa.lm pa--utf8
|
|
pl.lm pl--utf8
|
|
plt.lm plt--utf8
|
|
quz.lm quz--utf8
|
|
qxa.lm qxa--utf8
|
|
rm.lm rm--utf8
|
|
ro.lm ro--utf8
|
|
rue.lm rue--utf8
|
|
rw.lm rw--utf8
|
|
sa.lm sa--utf8
|
|
sc.lm sc--utf8
|
|
sco.lm sco--utf8
|
|
sd.lm sd--utf8 #sr-Arab
|
|
se.lm se--utf8
|
|
sg.lm sg--utf8
|
|
shs.lm shs--utf8
|
|
si.lm si--utf8
|
|
sk.lm sk--utf8
|
|
sl.lm sl--utf8
|
|
so.lm so--utf8
|
|
sq.lm sq--utf8
|
|
sr-Cyrl.lm sr--utf8 #sr-Cyrl
|
|
sr-Latn.lm sh--utf8 #sr-Latn
|
|
ss.lm ss--utf8
|
|
st.lm st--utf8
|
|
sv.lm sv--utf8
|
|
sw.lm sw--utf8
|
|
ta.lm ta--utf8
|
|
tet.lm tet--utf8
|
|
tg.lm tg--utf8
|
|
th.lm th--utf8
|
|
ti.lm ti--utf8
|
|
tk.lm tk--utf8 #tk-Latn
|
|
tl.lm tl--utf8
|
|
tn.lm tn--utf8
|
|
tpi.lm tpi--utf8
|
|
tr.lm tr--utf8
|
|
ts.lm ts--utf8
|
|
tt.lm tt--utf8
|
|
ty.lm ty--utf8
|
|
tzm-Latn.lm tzm-Latn-utf8
|
|
ug.lm ug--utf8 #ug-Arab
|
|
uk.lm uk--utf8
|
|
ur.lm ur--utf8
|
|
uz.lm uz--utf8 #uz-Latn
|
|
uz-Cyrl.lm uz-Cyrl-utf8
|
|
ve.lm ve--utf8
|
|
vep.lm vep--utf8
|
|
vi.lm vi--utf8
|
|
wa.lm wa--utf8
|
|
xh.lm xh--utf8
|
|
yi.lm yi--utf8
|
|
yo.lm yo--utf8
|
|
zh-Hant.lm zh-TW-utf8 #zh-Hant
|
|
zu.lm zu--utf8
|