Module:languages/data/2: Difference between revisions

From Linguifex
Jump to navigation Jump to search
m 1 revision imported
No edit summary
Tag: Manual revert
 
Line 20: Line 20:
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"


s["ka-stripdiacritics"] = {remove_diacritics = c.circ}
s["ka-entryname"] = {remove_diacritics = c.circ}


s["no-sortkey"] = {
s["no-sortkey"] = {
Line 31: Line 31:
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc


s["sa-Deva-stripdiacritics"] = { -- Don't use remove_diacritics for accent marks, as १ and ३ should also be removed if (and only if) they carry any.
s["tg-entryname"] = {remove_diacritics = c.grave .. c.acute}
from = {"[१३]?[" .. c.anudatta .. c.udatta .. c.dsvarita .. c.tsvarita .. "]+"},
to = {""},
}


s["tg-stripdiacritics"] = {remove_diacritics = c.grave .. c.acute}
s["tk-entryname"] = {remove_diacritics = c.macron}
 
s["tk-stripdiacritics"] = {remove_diacritics = c.macron}


local m = {}
local m = {}
Line 47: Line 42:
"cus-eas",
"cus-eas",
"Latn, Ethi",
"Latn, Ethi",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.acute},
Latn = {remove_diacritics = c.acute},
},
},
Line 59: Line 54:
translit = {
translit = {
Cyrl = "ab-translit",
Cyrl = "ab-translit",
-- Geor translit in [[Module:scripts/data]]
Geor = "Geor-translit",
},
},
override_translit = true,
override_translit = true,
Line 65: Line 60:
Cyrl = s["cau-Cyrl-displaytext"]
Cyrl = s["cau-Cyrl-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.acute,
remove_diacritics = c.acute,
Line 71: Line 66:
to = {"а"},
to = {"а"},
},
},
Latn = s["cau-Latn-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
},
},
sort_key = {
sort_key = {
Line 95: Line 90:
29572,
29572,
"ira-cen",
"ira-cen",
"Avst, Gujr, Deva",
"Avst, Gujr",
translit = {
translit = {
Avst = "Avst-translit"
Avst = "Avst-translit"
Line 142: Line 137:
13955,
13955,
"sem-arb",
"sem-arb",
"Arab, Hebr, Syrc, Brai, Nbat",
"Arab, Hebr, Syrc, Brai",
translit = {
translit = {
Arab = "ar-translit"
Arab = "ar-translit"
},
},
strip_diacritics = {
display_text = {
Arab = "ar-stripdiacritics",
Hebr = "Hebr-common",
},
entry_name = {
Arab = "ar-entryname",
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 175: Line 176:
Cyrl = s["cau-Cyrl-displaytext"],
Cyrl = s["cau-Cyrl-displaytext"],
},
},
strip_diacritics = {
entry_name = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Cyrl = s["cau-Cyrl-entryname"],
Latn = s["cau-Latn-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
},
},
sort_key = {
sort_key = {
Line 201: Line 202:
ancestors = "trk-oat",
ancestors = "trk-oat",
dotted_dotless_i = true,
dotted_dotless_i = true,
strip_diacritics = {
entry_name = {
Latn = {
Latn = {
from = {"ʼ"},
from = {"ʼ"},
Line 207: Line 208:
},
},
["fa-Arab"] = {
["fa-Arab"] = {
module = "ar-stripdiacritics",
module = "ar-entryname",
["from"] = {
["from"] = {
"ۆ",
"ۆ",
Line 266: Line 267:
"zle",
"zle",
"Cyrl, Latn",
"Cyrl, Latn",
ancestors = "zle-mbe",
ancestors = "zle-obe",
translit = {
translit = {
Cyrl = "be-translit",
Cyrl = "be-translit",
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 292: Line 293:
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
Line 306: Line 307:
ancestors = "cu-bgm",
ancestors = "cu-bgm",
translit = "bg-translit",
translit = "bg-translit",
strip_diacritics = {
entry_name = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
Line 314: Line 315:
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
},
},
standard_chars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc,
standardChars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc,
}
}


Line 362: Line 363:
"Tibt", -- sometimes Deva?
"Tibt", -- sometimes Deva?
ancestors = "xct",
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
override_translit = true,
-- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = s["Tibt-displaytext"],
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
}
}


Line 385: Line 389:
ancestors = "roa-oca",
ancestors = "roa-oca",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
standard_chars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
}
}


Line 401: Line 405:
Cyrl = s["cau-Cyrl-displaytext"]
Cyrl = s["cau-Cyrl-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Cyrl = s["cau-Cyrl-entryname"],
Latn = s["cau-Latn-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
},
},
sort_key = {
sort_key = {
Line 434: Line 438:
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
},
},
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc,
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc,
}
}


Line 457: Line 461:
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
},
},
standard_chars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc,
standardChars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc,
}
}


Line 464: Line 468:
35499,
35499,
"zls",
"zls",
"Cyrs, Glag, Zname",
"Cyrs, Glag",
translit = {
translit = {
Cyrs = "Cyrs-translit",
Cyrs = "Cyrs-translit",
Glag = "Glag-translit"
Glag = "Glag-translit"
},
},
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]]
entry_name = {
Cyrs = s["Cyrs-entryname"]
},
sort_key = {
Cyrs = s["Cyrs-sortkey"]
},
}
}


Line 497: Line 506:
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
},
},
standard_chars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc,
standardChars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc,
}
}


Line 512: Line 521:
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc,
}
}


Line 520: Line 529:
"gmw-hgm",
"gmw-hgm",
"Latn, Latf, Brai",
"Latn, Latf, Brai",
ancestors = "de-ear",
ancestors = "gmh",
sort_key = {
sort_key = {
Latn = s["de-Latn-sortkey"],
Latn = s["de-Latn-sortkey"],
Latf = s["de-Latn-sortkey"],
Latf = s["de-Latn-sortkey"],
},
},
standard_chars = {
standardChars = {
Latn = s["de-Latn-standardchars"],
Latn = s["de-Latn-standardchars"],
Latf = s["de-Latn-standardchars"],
Latf = s["de-Latn-standardchars"],
Line 542: Line 551:
Diak = "Diak-translit",
Diak = "Diak-translit",
},
},
    ancestors = "dv-old",
override_translit = true,
override_translit = true,
}
}
Line 552: Line 560:
"Tibt",
"Tibt",
ancestors = "xct",
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
override_translit = true,
-- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = s["Tibt-displaytext"],
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
}
}


Line 574: Line 585:
"Grek, Polyt, Brai",
"Grek, Polyt, Brai",
ancestors = "el-kth",
ancestors = "el-kth",
translit = "el-translit",
translit = {
Grek = "el-translit",
Polyt = "grc-translit",
},
override_translit = true,
override_translit = true,
-- Grek and Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = {
standard_chars = {
Grek = s["Grek-displaytext"],
Polyt = s["Polyt-displaytext"],
},
entry_name = {
Grek = s["Grek-entryname"],
Polyt = s["Polyt-entryname"],
},
sort_key = {
Grek = s["Grek-sortkey"],
Polyt = s["Polyt-sortkey"],
},
standardChars = {
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
Brai = c.braille,
Brai = c.braille,
Line 595: Line 620:
-- Many of these are needed for sorting language names.
-- Many of these are needed for sorting language names.
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
-- These are found in pagenames.
-- These are found in entry names.
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
to = {{
to = {{
Line 607: Line 632:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 624: Line 649:
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
},
},
standard_chars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc,
standardChars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc,
}
}


Line 641: Line 666:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 663: Line 688:
}
}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc,
}
}


Line 675: Line 700:
to = {"c" .. p[1], "n" .. p[1]}
to = {"c" .. p[1], "n" .. p[1]}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc,
}
}


Line 684: Line 709:
"fa-Arab, Hebr",
"fa-Arab, Hebr",
ancestors = "fa-cls",
ancestors = "fa-cls",
strip_diacritics = {
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
["fa-Arab"] = {
["fa-Arab"] = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
-- character "ۂ" code U+06C2 to "ه" and "هٔ"(U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
to = {"ه", "ا"},
to = {"ه", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
},
},
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 711: Line 742:
to = {"’"}
to = {"’"}
},
},
strip_diacritics = { -- used to indicate gemination of the next consonant
entry_name = { -- used to indicate gemination of the next consonant
remove_diacritics = "ˣ",
remove_diacritics = "ˣ",
from = {"’"},
from = {"’"},
Line 717: Line 748:
},
},
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö".
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö".
remove_diacritics = "':" .. c.diacritics,
remove_diacritics = "':" .. c.diacritics,
remove_exceptions = {
remove_exceptions = {
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ
Line 726: Line 757:
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"}
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc,
}
}


Line 745: Line 776:
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
},
},
standard_chars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc,
standardChars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc,
}
}


Line 753: Line 784:
"roa-oil",
"roa-oil",
"Latn, Brai",
"Latn, Brai",
display_text = {
Latn = {
from = {"'"},
to = {"’"}
},
},
entry_name = {
Latn = {
from = {"’"},
to = {"'"},
},
},
ancestors = "frm",
ancestors = "frm",
sort_key = {
sort_key = {
Latn = s["roa-oil-sortkey"]
Latn = s["roa-oil-sortkey"]
},
},
standard_chars = {
standardChars = {
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 774: Line 817:
to = {"i"}
to = {"i"}
},
},
standard_chars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc,
standardChars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc,
}
}


Line 788: Line 831:
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
},
},
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc,
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc,
}
}


Line 798: Line 841:
ancestors = "mga",
ancestors = "mga",
sort_key = {remove_diacritics = c.grave .. c.acute},
sort_key = {remove_diacritics = c.grave .. c.acute},
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc,
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc,
}
}


Line 811: Line 854:
to = {"n" .. p[1]}
to = {"n" .. p[1]}
},
},
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc,
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc,
}
 
m["gn"] = {
"Guaraní",
35876,
"tup-gua",
"Latn",
}
}


Line 823: Line 873:
Gujr = "gu-translit",
Gujr = "gu-translit",
},
},
strip_diacritics = {
entry_name = {
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun},
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun},
Gujr = {remove_diacritics = "઼"},
Gujr = {remove_diacritics = "઼"},
Line 836: Line 886:
ancestors = "mga",
ancestors = "mga",
sort_key = {remove_diacritics = c.cedilla .. "-"},
sort_key = {remove_diacritics = c.cedilla .. "-"},
standard_chars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc,
standardChars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc,
}
}


Line 844: Line 894:
"cdc-wst",
"cdc-wst",
"Latn, Arab",
"Latn, Arab",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}
},
},
Line 861: Line 911:
"Hebr, Phnx, Brai, Samr",
"Hebr, Phnx, Brai, Samr",
ancestors = "he-med",
ancestors = "he-med",
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = {
-- Samr strip_diacritics, sort_key in [[Module:scripts/data]]
Hebr = "Hebr-common",
-- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
},
entry_name = {
Hebr = "Hebr-common",
Samr = s["Samr-entryname"],
},
sort_key = {
Hebr = "Hebr-common",
Samr = s["Samr-sortkey"],
},
}
}


Line 874: Line 932:
Deva = "hi-translit"
Deva = "hi-translit"
},
},
standard_chars = {
standardChars = {
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
c.punc
c.punc
Line 924: Line 982:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
c.punc
c.punc
Line 936: Line 994:
"Armn, Brai",
"Armn, Brai",
ancestors = "axm",
ancestors = "axm",
-- Armn translit in [[Module:scripts/data]]
translit = {
Armn = "Armn-translit"
},
override_translit = true,
override_translit = true,
strip_diacritics = {
entry_name = {
Armn = {
Armn = {
remove_diacritics = "՛՜՞՟",
remove_diacritics = "՛՜՞՟",
Line 979: Line 1,039:
"Latn",
"Latn",
ancestors = "ms",
ancestors = "ms",
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc,
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc,
}
}


Line 988: Line 1,048:
"Latn",
"Latn",
type = "appendix-constructed",
type = "appendix-constructed",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ},
}
}


Line 996: Line 1,056:
"alv-igb",
"alv-igb",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.macron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.macron},
sort_key = {
sort_key = {
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
Line 1,044: Line 1,104:
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
},
standard_chars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc,
standardChars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc,
}
}


Line 1,054: Line 1,114:
ancestors = "roa-oit",
ancestors = "roa-oit",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
standard_chars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc,
standardChars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc,
}
}


Line 1,077: Line 1,137:
link_tr = true,
link_tr = true,
display_text = s["jpx-displaytext"],
display_text = s["jpx-displaytext"],
strip_diacritics = s["jpx-stripdiacritics"],
entry_name = s["jpx-entryname"],
sort_key = s["jpx-sortkey"],
sort_key = s["jpx-sortkey"],
}
}
Line 1,085: Line 1,145:
33549,
33549,
"poz",
"poz",
"Latn, Java, Arab",
"Latn, Java",
ancestors = "kaw",
ancestors = "kaw",
translit = {
translit = {
Line 1,091: Line 1,151:
},
},
link_tr = true,
link_tr = true,
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê
},
},
Line 1,108: Line 1,168:
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
ancestors = "ka-mid",
ancestors = "ka-mid",
-- Geor, Geok translit in [[Module:scripts/data]]
translit = {
Geor = "Geor-translit",
Geok = "Geok-translit",
},
override_translit = true,
override_translit = true,
strip_diacritics = {
display_text = {
Geor = s["ka-stripdiacritics"],
Hebr = "Hebr-common",
Geok = s["ka-stripdiacritics"],
},
entry_name = {
Geor = s["ka-entryname"],
Geok = s["ka-entryname"],
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
sort_key = {
Hebr = "Hebr-common",
}
}
}


Line 1,162: Line 1,231:
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
c.punc
c.punc
Line 1,194: Line 1,263:
"Knda, Tutg",
"Knda, Tutg",
ancestors = "dra-mkn",
ancestors = "dra-mkn",
-- Knda translit in [[Module:scripts/data]]
translit = {
Knda = "kn-translit",
},
}
}


Line 1,206: Line 1,277:
Kore = "ko-translit",
Kore = "ko-translit",
},
},
-- Kore strip_diacritics in [[Module:scripts/data]]
entry_name = {
Kore = s["Kore-entryname"],
},
}
}


Line 1,214: Line 1,287:
"ssa-sah",
"ssa-sah",
"Latn, Arab",
"Latn, Arab",
-- the sortkey and strip_diacritics are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
-- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}
},
},
Line 1,234: Line 1,307:
["ks-Arab"] = "ks-Arab-translit",
["ks-Arab"] = "ks-Arab-translit",
Deva = "ks-Deva-translit",
Deva = "ks-Deva-translit",
-- Shrd translit in [[Module:scripts/data]]
Shrd = "Shrd-translit",
},
},
}
}


-- "kv" is treated as "koi", "kpv", see [[WT:LT]]
-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT


m["kw"] = {
m["kw"] = {
Line 1,273: Line 1,346:
397,
397,
"itc-laf",
"itc-laf",
"Latn, Ital",
"Latn",
ancestors = "itc-ola",
ancestors = "itc-ola",
-- Ital translit in [[Module:scripts/data]] (NOTE: formerly not present, probably an accidental omission)
display_text = {
display_text = {
Latn = s["itc-Latn-displaytext"]
Latn = s["itc-Latn-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Latn = s["itc-Latn-stripdiacritics"]
Latn = s["itc-Latn-entryname"]
},
},
sort_key = {
sort_key = {
Latn = s["itc-Latn-sortkey"]
Latn = s["itc-Latn-sortkey"]
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
c.punc
c.punc
Line 1,310: Line 1,382:
"bnt-nyg",
"bnt-nyg",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute .. c.circ},
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
sort_key = {
from = {"ŋ"},
from = {"ŋ"},
Line 1,341: Line 1,413:
9211,
9211,
"tai-swe",
"tai-swe",
"Laoo", -- also Tai Noi/Lao Buhan script
"Laoo",
translit = "lo-translit",
translit = "lo-translit",
sort_key = "Laoo-sortkey",
sort_key = "Laoo-sortkey",
standard_chars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
}
}


Line 1,354: Line 1,426:
ancestors = "olt",
ancestors = "olt",
display_text = "lt-common",
display_text = "lt-common",
strip_diacritics = "lt-common",
entry_name = "lt-common",
sort_key = "lt-common",
sort_key = "lt-common",
standard_chars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
}
}


Line 1,371: Line 1,443:
"bat-eas",
"bat-eas",
"Latn",
"Latn",
strip_diacritics = {
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
Line 1,380: Line 1,452:
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
},
},
standard_chars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc,
standardChars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc,
}
}


Line 1,402: Line 1,474:


m["mi"] = {
m["mi"] = {
"Māori",
"Maori",
36451,
36451,
"poz-pep",
"poz-pep",
Line 1,409: Line 1,481:
remove_diacritics = c.macron,
remove_diacritics = c.macron,
from = {"ng", "wh"},
from = {"ng", "wh"},
to = {"n" .. p[1], "w" .. p[1]}
to = {"z" .. p[1], "z" .. p[2]}
},
},
}
}
Line 1,420: Line 1,492:
ancestors = "cu",
ancestors = "cu",
translit = {
translit = {
Cyrl = "mk-translit",
Cyrl = "mk-translit"
-- FIXME: formerly no translit specified for Polyt; unclear if the default [[Module:grc-translit]] is
},
-- acceptable, so we disable it for now
display_text = {
Polyt = false,
Polyt = s["Polyt-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.acute,
remove_diacritics = c.acute,
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
},
},
Polyt = s["Polyt-entryname"],
},
},
sort_key = {
sort_key = {
Line 1,438: Line 1,511:
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
},
},
Polyt = s["Polyt-sortkey"],
},
},
-- Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standardChars = {
standard_chars = {
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
c.punc
c.punc
Line 1,451: Line 1,524:
"dra-mal",
"dra-mal",
"Mlym",
"Mlym",
translit = "ml-translit",
override_translit = true,
override_translit = true,
-- Mlym translit in [[Module:scripts/data]]
}
}


Line 1,463: Line 1,536:
translit = {
translit = {
Cyrl = "mn-translit",
Cyrl = "mn-translit",
-- Mong translit in [[Module:scripts/data]]
Mong = "Mong-translit",
},
},
override_translit = true,
override_translit = true,
-- Mong display_text and strip_diacritics in [[Module:scripts/data]]
display_text = {
strip_diacritics = {
Mong = s["Mong-displaytext"]
},
entry_name = {
Cyrl = {remove_diacritics = c.grave .. c.acute},
Cyrl = {remove_diacritics = c.grave .. c.acute},
Mong = s["Mong-entryname"],
},
},
sort_key = {
sort_key = {
Line 1,477: Line 1,553:
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
Brai = c.braille,
Brai = c.braille,
Line 1,484: Line 1,560:
}
}


-- "mo" is treated as "ro", see [[WT:LT]]
-- "mo" IS TREATED AS "ro", SEE WT:LT


m["mr"] = {
m["mr"] = {
Line 1,496: Line 1,572:
Modi = "mr-Modi-translit",
Modi = "mr-Modi-translit",
},
},
strip_diacritics = {
entry_name = {
Deva = {
Deva = {
from = {"च़", "ज़", "झ़"},
from = {"च़", "ज़", "झ़"},
Line 1,510: Line 1,586:
"Latn, ms-Arab",
"Latn, ms-Arab",
ancestors = "ms-cla",
ancestors = "ms-cla",
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
c.punc
c.punc
Line 1,525: Line 1,601:
to = {"’"}
to = {"’"}
},
},
strip_diacritics = {
entry_name = {
from = {"’"},
from = {"’"},
to = {"'"},
to = {"'"},
Line 1,575: Line 1,651:
ancestors = "gmq-mno, da",  -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion
ancestors = "gmq-mno, da",  -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standard_chars = s["no-standardchars"],
standardChars = s["no-standardchars"],
}
}


Line 1,583: Line 1,659:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,612: Line 1,688:
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZzÄäËëÏïÖöÜü",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Brai = c.braille,
Brai = c.braille,
c.punc
c.punc
Line 1,625: Line 1,701:
"Latn",
"Latn",
ancestors = "gmq-mno",
ancestors = "gmq-mno",
strip_diacritics = {
entry_name = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
},
},
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standard_chars = s["no-standardchars"],
standardChars = s["no-standardchars"],
}
}


Line 1,639: Line 1,715:
ancestors = "gmq-mno",
ancestors = "gmq-mno",
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standard_chars = s["no-standardchars"],
standardChars = s["no-standardchars"],
}
}


Line 1,647: Line 1,723:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,675: Line 1,751:
"bnt-nys",
"bnt-nys",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute .. c.circ},
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
sort_key = {
from = {"ng'"},
from = {"ng'"},
Line 1,688: Line 1,764:
"Latn, Hebr",
"Latn, Hebr",
ancestors = "pro",
ancestors = "pro",
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
sort_key = {
Latn = {
Latn = {
Line 1,694: Line 1,776:
to = {"%1h"}
to = {"%1h"}
},
},
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 1,735: Line 1,817:
translit = {
translit = {
Cyrl = "os-translit",
Cyrl = "os-translit",
-- Geor translit in [[Module:scripts/data]]
Geor = "Geor-translit",
},
},
override_translit = true,
override_translit = true,
Line 1,748: Line 1,830:
},
},
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 1,772: Line 1,854:
"inc-pan",
"inc-pan",
"Guru, pa-Arab",
"Guru, pa-Arab",
ancestors = "inc-opa",
translit = {
translit = {
Guru = "Guru-translit",
Guru = "Guru-translit",
["pa-Arab"] = "pa-Arab-translit",
["pa-Arab"] = "pa-Arab-translit",
},
},
strip_diacritics = {
entry_name = {
["pa-Arab"] = {
["pa-Arab"] = {
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
Line 1,792: Line 1,875:
ancestors = "sa",
ancestors = "sa",
translit = {
translit = {
-- Brah translit in [[Module:scripts/data]]
Brah = "Brah-translit",
Deva = "sa-translit",
Deva = "sa-translit",
Beng = "pi-translit",
Beng = "pi-translit",
Line 1,803: Line 1,886:
Cakm = "Cakm-translit",
Cakm = "Cakm-translit",
},
},
strip_diacritics = {
entry_name = {
Thai = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
to = {"ิํ", "ฐ", "ญ"}
},
},
Mymr = {
remove_diacritics = c.VS01
remove_diacritics = c.VS01,
},
},
},
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
Line 1,828: Line 1,909:
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
},
},
standard_chars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc,
standardChars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc,
}
}


Line 1,836: Line 1,917:
"ira-pat",
"ira-pat",
"ps-Arab",
"ps-Arab",
strip_diacritics = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef},
entry_name = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef},
}
}


Line 1,851: Line 1,932:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
Brai = c.braille,
Brai = c.braille,
Line 1,866: Line 1,947:


m["rm"] = {
m["rm"] = {
"Romansh",
"Romansch",
13199,
13199,
"roa-rhe",
"roa-rhe",
ancestors = "rm-old",
"Latn",
"Latn",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e},
Line 1,893: Line 1,973:
},
},
},
},
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]]; presumably not present
standardChars = {
standard_chars = {
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Line 1,916: Line 1,995:
},
},
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_diacritics = c.grave .. c.acute .. c.diaer,
Line 1,927: Line 2,006:
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_exceptions = {"ё", "ѣ̈", "я̈"},
from = {
from = {
"і", "ѣ", "ѳ", "ѵ"
"ё", "ѣ̈", "я̈", -- 2 chars
"і", "ѣ", "ѳ", "ѵ" -- 1 char
},
},
to = {
to = {
"е" .. p[1], "ь" .. p[2], "я" .. p[1],
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
}
}
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
Brai = c.braille,
Brai = c.braille,
Line 1,947: Line 2,029:
"bnt-glb",
"bnt-glb",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,958: Line 2,040:
Beng = "sa-Beng-translit",
Beng = "sa-Beng-translit",
["as-Beng"] = "sa-Beng-translit",
["as-Beng"] = "sa-Beng-translit",
-- Brah translit in [[Module:scripts/data]]
Brah = "Brah-translit",
Deva = "sa-translit",
Deva = "sa-translit",
Gujr = "sa-Gujr-translit",
Gujr = "sa-Gujr-translit",
Line 1,970: Line 2,052:
Mlym = "sa-Mlym-translit",
Mlym = "sa-Mlym-translit",
Modi = "sa-Modi-translit",
Modi = "sa-Modi-translit",
-- Mong, mnc-Mong, xwo-Mong translit in [[Module:scripts/data]]
Mong = "Mong-translit",
-- NOTE: Formerly used xal-translit for transliterating xwo-Mong but that only handles Cyrillic; it has
["mnc-Mong"] = "mnc-translit",
-- code to transliterate xwo-Mong but it's broken so I've replaced it with the default xwo-translit.
["xwo-Mong"] = "xal-translit",
Mymr = "pi-translit",
Mymr = "pi-translit",
Orya = "sa-Orya-translit",
Orya = "sa-Orya-translit",
-- Shrd translit in [[Module:scripts/data]]
Shrd = "Shrd-translit",
-- Sidd translit in [[Module:scripts/data]]
Sidd = "Sidd-translit",
Sinh = "si-translit",
Sinh = "si-translit",
Taml = "sa-Taml-translit",
Taml = "sa-Taml-translit",
Telu = "sa-Telu-translit",
Telu = "sa-Telu-translit",
Thai = "pi-translit",
Thai = "pi-translit",
-- Tibt translit in [[Module:scripts/data]]
Tibt = "Tibt-translit",
},
display_text = {
Mong = s["Mong-displaytext"],
Tibt = s["Tibt-displaytext"],
},
},
-- Mong display_text and strip_diacritics in [[Module:scripts/data]]
entry_name = {
-- Tibt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
Mong = s["Mong-entryname"],
strip_diacritics = {
Tibt = s["Tibt-entryname"],
Deva = s["sa-Deva-stripdiacritics"],
Mymr = {
remove_diacritics = c.VS01,
},
Thai = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
to = {"ิํ", "ฐ", "ญ"}
},
},
remove_diacritics = c.VS01 .. c.udatta .. c.anudatta
},
},
sort_key = {
sort_key = {
Deva = s["sa-Deva-stripdiacritics"], -- until we have a proper Sanskrit sorting algorithm.
Tibt = "Tibt-sortkey",
Lana = { -- Tai Tham
{ -- FIXME: This needs to be converted into the current standardized format.
from = {"", "", "", "", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", ""},
from = {"ā", "ī", "ū", "", "", "", "m[" .. c.dotabove .. c.dotbelow .. "]", "", "ñ", "", "", "", "ś", "", "", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "", "", "", "", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "", u(0xFE00), u(0x200D)},
to = {"ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", ""},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"},
},
Laoo = "Laoo-sortkey",
Latn = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "", "ñ", "", "", "", "ś", "", "ṭ"},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~"},
},
Mymr = {
remove_diacritics = c.VS01,
},
},
Thai = "Thai-sortkey",
-- FIXME: The previous sort key which mixed all scripts removed ZWJ; I don't know which script(s) this was
-- intended for and there are no other languages which remove it in the sort key AFAIK. If it needs to be
-- removed, specify the script(s) it needs to be removed under or add handling for the "all" script that applies
-- regardless of script.
--all = {
-- remove_diacritics = c.ZWJ,
--},
},
},
}
}
Line 2,025: Line 2,092:
"roa-sou",
"roa-sou",
"Latn",
"Latn",
ancestors = "sc-old",
}
}


Line 2,034: Line 2,100:
"sd-Arab, Deva, Sind, Khoj",
"sd-Arab, Deva, Sind, Khoj",
translit = {
translit = {
Sind = "Sind-translit",
Sind = "Sind-translit"
["sd-Arab"] = "sd-Arab-translit"
},
},
strip_diacritics = {
entry_name = {
["sd-Arab"] = {
["sd-Arab"] = {
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
Line 2,055: Line 2,120:
to = {"ˈ"}
to = {"ˈ"}
},
},
strip_diacritics = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
entry_name = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
sort_key = {
sort_key = {
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
},
},
standard_chars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc,
standardChars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc,
}
}


Line 2,075: Line 2,140:
9301,
9301,
"zls",
"zls",
"Latn, Cyrl, Glag, Arab",
"Latn, Cyrl, Glag",
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1
wikimedia_codes = "sh, bs, hr, sr",
wikimedia_codes = "sh, bs, hr, sr",
strip_diacritics = {
entry_name = {
Latn = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
Line 2,102: Line 2,167:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
Line 2,125: Line 2,190:
ancestors = "zlw-osk",
ancestors = "zlw-osk",
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
standard_chars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
standardChars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
}
}


Line 2,133: Line 2,198:
"zls",
"zls",
"Latn",
"Latn",
strip_diacritics = {
entry_name = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"},
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"},
Line 2,145: Line 2,210:
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]},
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]},
},
},
standard_chars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc,
standardChars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc,
}
}


Line 2,160: Line 2,225:
"bnt-sho",
"bnt-sho",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute},
entry_name = {remove_diacritics = c.acute},
}
}


Line 2,168: Line 2,233:
"cus-som",
"cus-som",
"Latn, Arab, Osma",
"Latn, Arab, Osma",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
},
},
Line 2,180: Line 2,245:
translit = {
translit = {
Elba = "Elba-translit",
Elba = "Elba-translit",
Vith = "Vith-translit",
},
},
-- Grek display_text, sort_key in [[Module:scripts/data]]
display_text = {
strip_diacritics = {
Grek = s["Grek-displaytext"],
},
entry_name = {
Latn = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron,
remove_diacritics = c.acute,
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'},
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'},
},
},
Grek = { -- Diacritic removal from Grek-stripdiacritics excluded.
Grek = { -- Diacritic removal from Grek-entryname excluded.
from = m_langdata.chars_substitutions["Grek-stripdiacritics"].from,
from = s["Grek-entryname"].from,
to = m_langdata.chars_substitutions["Grek-stripdiacritics"].to,
to = s["Grek-entryname"].to,
},
},
},
},
sort_key = {
sort_key = {
Latn = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron .. c.tilde .. c.breve .. c.caron,
remove_diacritics = c.acute .. c.circ .. c.tilde .. c.breve .. c.caron,
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
}
}
-- TODO: Grek if the default sort key is unsuitable
-- TODO: Grek
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
c.punc
c.punc
Line 2,212: Line 2,278:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,220: Line 2,286:
"bnt-sts",
"bnt-sts",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,230: Line 2,296:
ancestors = "osn",
ancestors = "osn",
translit = {
translit = {
Sund = "Sund-translit"
Sund = "su-translit"
},
},
}
}
Line 2,246: Line 2,312:
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"}
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"}
},
},
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc,
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc,
}
}


Line 2,291: Line 2,357:
},
},
override_translit = true,
override_translit = true,
strip_diacritics = {
entry_name = {
Cyrl = s["tg-stripdiacritics"],
Cyrl = s["tg-entryname"],
Latn = s["tg-stripdiacritics"],
Latn = s["tg-entryname"],
},
},
sort_key = {
sort_key = {
Line 2,329: Line 2,395:
"trk-ogz",
"trk-ogz",
"Latn, Cyrl, Arab",
"Latn, Cyrl, Arab",
strip_diacritics = {
entry_name = {
Latn = s["tk-stripdiacritics"],
Latn = s["tk-entryname"],
Cyrl = s["tk-stripdiacritics"],
Cyrl = s["tk-entryname"],
},
},
sort_key = {
sort_key = {
Line 2,343: Line 2,409:
},
},
},
},
ancestors = "trk-eog",
}
}


Line 2,355: Line 2,420:
},
},
override_translit = true,
override_translit = true,
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
},
},
standard_chars = {
standardChars = {
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
c.punc
c.punc
Line 2,379: Line 2,444:
"poz-ton",
"poz-ton",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute},
entry_name = {remove_diacritics = c.acute},
sort_key = {remove_diacritics = c.macron},
sort_key = {remove_diacritics = c.macron},
}
}
Line 2,403: Line 2,468:
}
}
},
},
standard_chars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc,
standardChars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc,
}
}


Line 2,419: Line 2,484:
"Cyrl, Latn, tt-Arab",
"Cyrl, Latn, tt-Arab",
translit = {
translit = {
Cyrl = "tt-translit",
Cyrl = "tt-translit"
["tt-Arab"] = "tt-translit"
},
},
--override_translit = true, -- enable override until Module code can detect Russian loans such as [[аэропорт]]
override_translit = true,
dotted_dotless_i = true,
dotted_dotless_i = true,
sort_key = {
sort_key = {
Line 2,442: Line 2,506:
}
}


-- "tw" is treated as "ak", see [[WT:LT]]
-- "tw" IS TREATED AS "ak", SEE WT:LT


m["ty"] = {
m["ty"] = {
Line 2,469: Line 2,533:
"zle",
"zle",
"Cyrl",
"Cyrl",
ancestors = "zle-muk",
ancestors = "zle-ouk",
translit = "uk-translit",
translit = "uk-translit",
strip_diacritics = {remove_diacritics = c.grave .. c.acute},
entry_name = {remove_diacritics = c.grave .. c.acute},
sort_key = {
sort_key = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 2,483: Line 2,547:
}
}
},
},
standard_chars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""),  -- Exclude apostrophe.
standardChars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""),  -- Exclude apostrophe.
}
}


Line 2,494: Line 2,558:
["ur-Arab"] = "ur-translit"
["ur-Arab"] = "ur-translit"
},
},
strip_diacritics = {
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
["ur-Arab"] = {
["ur-Arab"] = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
-- character "ۂ" code U+06C2 to "ه" and "هٔ"(U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ۂ", "ٱ"},
from = {"هٔ", "ۂ", "ٱ"},
to = {"ہ", "ہ", "ا"},
to = {"ہ", "ہ", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
},
},
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standardChars = {
standard_chars = {
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے",
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے",
c.punc,
c.punc,
Line 2,527: Line 2,597:
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
},
},
},
strip_diacritics = {
["fa-Arab"] = "ar-stripdiacritics",
},
},
}
}
Line 2,579: Line 2,646:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,591: Line 2,658:
Hebr = "yi-translit",
Hebr = "yi-translit",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
}
}


Line 2,599: Line 2,674:
"alv-yor",
"alv-yor",
"Latn, Arab",
"Latn, Arab",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}
},
},
Line 2,642: Line 2,717:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


return require("Module:languages").finalizeData(m, "language")
return require("Module:languages").finalizeData(m, "language")

Latest revision as of 11:30, 21 April 2026

Documentation for this module may be created at Module:languages/data/2/doc

local m_langdata = require("Module:languages/data")

-- Loaded on demand, as it may not be needed (depending on the data).
local function u(...)
	u = require("Module:string utilities").char
	return u(...)
end

local c = m_langdata.chars
local p = m_langdata.puaChars
local s = m_langdata.shared

-- Ideally, we want to move these into [[Module:languages/data]], but because (a) it's necessary to use require on that module, and (b) they're only used in this data module, it's less memory-efficient to do that at the moment. If it becomes possible to use mw.loadData, then these should be moved there.
s["de-Latn-sortkey"] = {
	remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove,
	from = {"æ", "œ", "ß"},
	to = {"ae", "oe", "ss"}
}

s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"

s["ka-entryname"] = {remove_diacritics = c.circ}

s["no-sortkey"] = {
	remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dacute .. c.caron .. c.cedilla,
	remove_exceptions = {"å"},
	from = {"æ", "ø", "å"},
	to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
}

s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc

s["tg-entryname"] = {remove_diacritics = c.grave .. c.acute}

s["tk-entryname"] = {remove_diacritics = c.macron}

local m = {}

m["aa"] = {
	"Afar",
	27811,
	"cus-eas",
	"Latn, Ethi",
	entry_name = {
		Latn = {remove_diacritics = c.acute},
	},
}

m["ab"] = {
	"Abkhaz",
	5111,
	"cau-abz",
	"Cyrl, Geor, Latn",
	translit = {
		Cyrl = "ab-translit",
		Geor = "Geor-translit",
	},
	override_translit = true,
	display_text = {
		Cyrl = s["cau-Cyrl-displaytext"]
	},
	entry_name = {
		Cyrl = {
			remove_diacritics = c.acute,
			from =  {"^а%-"},
			to = {"а"},
		},
		Latn = s["cau-Latn-entryname"],
	},
	sort_key = {
		Cyrl = {
			from = {
				"х'ә", -- 3 chars
				"гь", "гә", "ӷь", "ҕь", "ӷә", "ҕә", "дә", "ё", "жь", "жә", "ҙә", "ӡә", "ӡ'", "кь", "кә", "қь", "қә", "ҟь", "ҟә", "ҫә", "тә", "ҭә", "ф'", "хь", "хә", "х'", "ҳә", "ць", "цә", "ц'", "ҵә", "ҵ'", "шь", "шә", "џь", -- 2 chars
				"ӷ", "ҕ", "ҙ", "ӡ", "қ", "ҟ", "ԥ", "ҧ", "ҫ", "ҭ", "ҳ", "ҵ", "ҷ", "ҽ", "ҿ", "ҩ", "џ", "ә", -- 1 char
				"^а",
			},
			to = {
				"х" .. p[4],
				"г" .. p[1], "г" .. p[2], "г" .. p[5], "г" .. p[6], "г" .. p[7], "г" .. p[8], "д" .. p[1], "е" .. p[1], "ж" .. p[1], "ж" .. p[2], "з" .. p[2], "з" .. p[4], "з" .. p[5], "к" .. p[1], "к" .. p[2], "к" .. p[4], "к" .. p[5], "к" .. p[7], "к" .. p[8], "с" .. p[2], "т" .. p[1], "т" .. p[3], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "х" .. p[6], "ц" .. p[1], "ц" .. p[2], "ц" .. p[3], "ц" .. p[5], "ц" .. p[6], "ш" .. p[1], "ш" .. p[2], "ы" .. p[3],
				"г" .. p[3], "г" .. p[4], "з" .. p[1], "з" .. p[3], "к" .. p[3], "к" .. p[6], "п" .. p[1], "п" .. p[2], "с" .. p[1], "т" .. p[2], "х" .. p[5], "ц" .. p[4], "ч" .. p[1], "ч" .. p[2], "ч" .. p[3], "ы" .. p[1], "ы" .. p[2], "ь" .. p[1],
				"",
			}
		},
	},
}

m["ae"] = {
	"Avestan",
	29572,
	"ira-cen",
	"Avst, Gujr",
	translit = {
		Avst = "Avst-translit"
	},
}

m["af"] = {
	"Afrikaans",
	14196,
	"gmw-frk",
	"Latn, Arab",
	ancestors = "nl",
	sort_key = {
		Latn = {
			remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'",
			from = {"['ʼ]n"},
			to = {"n" .. p[1]}
		}
	},
}

m["ak"] = {
	"Akan",
	28026,
	"alv-ctn",
	"Latn",
}

m["am"] = {
	"Amharic",
	28244,
	"sem-eth",
	"Ethi",
	translit = "Ethi-translit",
}

m["an"] = {
	"Aragonese",
	8765,
	"roa-nar",
	"Latn",
}

m["ar"] = {
	"Arabic",
	13955,
	"sem-arb",
	"Arab, Hebr, Syrc, Brai",
	translit = {
		Arab = "ar-translit"
	},
	display_text = {
		Hebr = "Hebr-common",
	},
	entry_name = {
		Arab = "ar-entryname",
		Hebr = "Hebr-common",
	},
	sort_key = {
		Hebr = "Hebr-common",
	},
}

m["as"] = {
	"Assamese",
	29401,
	"inc-bas",
	"as-Beng",
	ancestors = "inc-mas",
	translit = "as-translit",
}

m["av"] = {
	"Avar",
	29561,
	"cau-ava",
	"Cyrl, Latn, Arab",
	ancestors = "oav",
	translit = {
		Cyrl = "cau-nec-translit",
		Arab = "ar-translit",
	},
	override_translit = true,
	display_text = {
		Cyrl = s["cau-Cyrl-displaytext"],
	},
	entry_name = {
		Cyrl = s["cau-Cyrl-entryname"],
		Latn = s["cau-Latn-entryname"],
	},
	sort_key = {
		Cyrl = {
			from = {"гъ", "гь", "гӏ", "ё", "кк", "къ", "кь", "кӏ", "лъ", "лӏ", "тӏ", "хх", "хъ", "хь", "хӏ", "цӏ", "чӏ"},
			to = {"г" .. p[1], "г" .. p[2], "г" .. p[3], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "к" .. p[4], "л" .. p[1], "л" .. p[2], "т" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "х" .. p[4], "ц" .. p[1], "ч" .. p[1]}
		},
	},
}

m["ay"] = {
	"Aymara",
	4627,
	"sai-aym",
	"Latn",
}

m["az"] = {
	"Azerbaijani",
	9292,
	"trk-ogz",
	"Latn, Cyrl, fa-Arab",
	ancestors = "trk-oat",
	dotted_dotless_i = true,
	entry_name = {
		Latn = {
			from = {"ʼ"},
			to = {"'"},
		},
		["fa-Arab"] = {
			module = "ar-entryname",
			["from"] = {
				"ۆ",
				"ۇ",
				"وْ",
				"ڲ",
				"ؽ",
			},
			["to"] = {
				"و",
				"و",
				"و",
				"گ",
				"ی",
			},
		},
	},
	display_text = {
		Latn = {
			from = {"'"},
			to = {"ʼ"}
		}
	},
	sort_key = {
		Latn = {
			from = {
				"i", -- Ensure "i" comes after "ı".
				"ç", "ə", "ğ", "x", "ı", "q", "ö", "ş", "ü", "w"
			},
			to = {
				"i" .. p[1],
				"c" .. p[1], "e" .. p[1], "g" .. p[1], "h" .. p[1], "i", "k" .. p[1], "o" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]
			}
		},
		Cyrl = {
			from = {"ғ", "ә", "ы", "ј", "ҝ", "ө", "ү", "һ", "ҹ"},
			to = {"г" .. p[1], "е" .. p[1], "и" .. p[1], "и" .. p[2], "к" .. p[1], "о" .. p[1], "у" .. p[1], "х" .. p[1], "ч" .. p[1]}
		},
	},
}

m["ba"] = {
	"Bashkir",
	13389,
	"trk-kbu",
	"Cyrl",
	translit = "ba-translit",
	override_translit = true,
	sort_key = {
		from = {"ғ", "ҙ", "ё", "ҡ", "ң", "ө", "ҫ", "ү", "һ", "ә"},
		to = {"г" .. p[1], "д" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "с" .. p[1], "у" .. p[1], "х" .. p[1], "э" .. p[1]}
	},
}

m["be"] = {
	"Belarusian",
	9091,
	"zle",
	"Cyrl, Latn",
	ancestors = "zle-obe",
	translit = {
		Cyrl = "be-translit",
	},
	entry_name = {
		Cyrl = {
			remove_diacritics = c.grave .. c.acute,
		},
		Latn = {
			remove_diacritics = c.grave .. c.acute,
			remove_exceptions = {"Ć", "ć", "Ń", "ń", "Ś", "ś", "Ź", "ź"},
		},
	},
	sort_key = {
		Cyrl = {
			remove_diacritics = c.grave .. c.acute,
			from = {"ґ", "ё", "і", "ў"},
			to = {"г" .. p[1], "е" .. p[1], "и" .. p[1], "у" .. p[1]}
		},
		Latn = {
			remove_diacritics = c.grave .. c.acute,
			remove_exceptions = {"Ć", "ć", "Ń", "ń", "Ś", "ś", "Ź", "ź"},
			from = {"ć", "č", "dz", "dź", "dž", "ch", "ł", "ń", "ś", "š", "ŭ", "ź", "ž"},
			to = {"c" .. p[1], "c" .. p[2], "d" .. p[1], "d" .. p[2], "d" .. p[3], "h" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]}
		},
	},
	standardChars = {
		Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
		Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
		(c.punc:gsub("'", "")) -- Exclude apostrophe.
	},
}

m["bg"] = {
	"Bulgarian",
	7918,
	"zls",
	"Cyrl",
	ancestors = "cu-bgm",
	translit = "bg-translit",
	entry_name = {
		remove_diacritics = c.grave .. c.acute,
		remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
	},
	sort_key = {
		remove_diacritics = c.grave .. c.acute,
		remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
	},
	standardChars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc,
}

m["bh"] = {
	"Bihari",
	135305,
	"inc-eas",
	"Deva",
}

m["bi"] = {
	"Bislama",
	35452,
	"crp",
	"Latn",
	ancestors = "en",
}

m["bm"] = {
	"Bambara",
	33243,
	"dmn-emn",
	"Latn, Nkoo",
	sort_key = {
		Latn = {
		from = {"ɛ", "ɲ", "ŋ", "ɔ"},
		to = {"e" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1]}
		},
	},
}

m["bn"] = {
	"Bengali",
	9610,
	"inc-bas",
	"Beng, Newa",
	ancestors = "inc-mbn",
	translit = {
		Beng = "bn-translit"
	},
}

m["bo"] = {
	"Tibetan",
	34271,
	"sit-tib",
	"Tibt", -- sometimes Deva?
	ancestors = "xct",
	translit = "Tibt-translit",
	override_translit = true,
	display_text = s["Tibt-displaytext"],
	entry_name = s["Tibt-entryname"],
	sort_key = "Tibt-sortkey",
}

m["br"] = {
	"Breton",
	12107,
	"cel-brs",
	"Latn",
	ancestors = "xbm",
	sort_key = {
		from = {"ch", "c['ʼ’]h"},
		to = {"c" .. p[1], "c" .. p[2]}
	},
}

m["ca"] = {
	"Catalan",
	7026,
	"roa-ocr",
	"Latn",
	ancestors = "roa-oca",
	sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
	standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
}

m["ce"] = {
	"Chechen",
	33350,
	"cau-vay",
	"Cyrl, Latn, Arab",
	translit = {
		Cyrl = "cau-nec-translit",
		Arab = "ar-translit",
	},
	override_translit = true,
	display_text = {
		Cyrl = s["cau-Cyrl-displaytext"]
	},
	entry_name = {
		Cyrl = s["cau-Cyrl-entryname"],
		Latn = s["cau-Latn-entryname"],
	},
	sort_key = {
		Cyrl = {
			from = {"аь", "гӏ", "ё", "кх", "къ", "кӏ", "оь", "пӏ", "тӏ", "уь", "хь", "хӏ", "цӏ", "чӏ", "юь", "яь"},
			to = {"а" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "о" .. p[1], "п" .. p[1], "т" .. p[1], "у" .. p[1], "х" .. p[1], "х" .. p[2], "ц" .. p[1], "ч" .. p[1], "ю" .. p[1], "я" .. p[1]}
		},
	},
}

m["ch"] = {
	"Chamorro",
	33262,
	"poz",
	"Latn",
	sort_key = {
		remove_diacritics = "'",
		from = {"å", "ch", "ñ", "ng"},
		to = {"a" .. p[1], "c" .. p[1], "n" .. p[1], "n" .. p[2]}
	},
}

m["co"] = {
	"Corsican",
	33111,
	"roa-itr",
	"Latn",
	sort_key = {
		from = {"chj", "ghj", "sc", "sg"},
		to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
	},
	standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc,
}

m["cr"] = {
	"Cree",
	33390,
	"alg",
	"Latn, Cans",
	translit = {
		Cans = "cr-translit"
	},
}

m["cs"] = {
	"Czech",
	9056,
	"zlw",
	"Latn",
	ancestors = "cs-ear",
	sort_key = {
		from = {"á", "č", "ď", "é", "ě", "ch", "í", "ň", "ó", "ř", "š", "ť", "ú", "ů", "ý", "ž"},
		to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
	},
	standardChars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc,
}

m["cu"] = {
	"Old Church Slavonic",
	35499,
	"zls",
	"Cyrs, Glag",
	translit = {
		Cyrs = "Cyrs-translit",
		Glag = "Glag-translit"
	},
	entry_name = {
		Cyrs = s["Cyrs-entryname"]
	},
	sort_key = {
		Cyrs = s["Cyrs-sortkey"]
	},
}

m["cv"] = {
	"Chuvash",
	33348,
	"trk-ogr",
	"Cyrl",
	ancestors = "cv-mid",
	translit = "cv-translit",
	override_translit = true,
	sort_key = {
		from = {"ӑ", "ё", "ӗ", "ҫ", "ӳ"},
		to = {"а" .. p[1], "е" .. p[1], "е" .. p[2], "с" .. p[1], "у" .. p[1]}
	},
}

m["cy"] = {
	"Welsh",
	9309,
	"cel-brw",
	"Latn",
	ancestors = "wlm",
	sort_key = {
		remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. "'",
		from = {"ch", "dd", "ff", "ng", "ll", "ph", "rh", "th"},
		to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
	},
	standardChars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc,
}

m["da"] = {
	"Danish",
	9035,
	"gmq-eas",
	"Latn",
	ancestors = "gmq-oda",
	sort_key = {
		remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dacute .. c.caron .. c.cedilla,
		remove_exceptions = {"å"},
		from = {"æ", "ø", "å"},
		to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
	},
	standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc,
}

m["de"] = {
	"German",
	188,
	"gmw-hgm",
	"Latn, Latf, Brai",
	ancestors = "gmh",
	sort_key = {
		Latn = s["de-Latn-sortkey"],
		Latf = s["de-Latn-sortkey"],
	},
	standardChars = {
		Latn = s["de-Latn-standardchars"],
		Latf = s["de-Latn-standardchars"],
		Brai = c.braille,
		c.punc
	}
}

m["dv"] = {
	"Dhivehi",
	32656,
	"inc-ins",
	"Thaa, Diak",
	translit = {
		Thaa = "dv-translit",
		Diak = "Diak-translit",
	},
	override_translit = true,
}

m["dz"] = {
	"Dzongkha",
	33081,
	"sit-tib",
	"Tibt",
	ancestors = "xct",
	translit = "Tibt-translit",
	override_translit = true,
	display_text = s["Tibt-displaytext"],
	entry_name = s["Tibt-entryname"],
	sort_key = "Tibt-sortkey",
}

m["ee"] = {
	"Ewe",
	30005,
	"alv-gbe",
	"Latn",
	sort_key = {
		remove_diacritics = c.tilde,
		from = {"ɖ", "dz", "ɛ", "ƒ", "gb", "ɣ", "kp", "ny", "ŋ", "ɔ", "ts", "ʋ"},
		to = {"d" .. p[1], "d" .. p[2], "e" .. p[1], "f" .. p[1], "g" .. p[1], "g" .. p[2], "k" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1], "t" .. p[1], "v" .. p[1]}
	},
}

m["el"] = {
	"Greek",
	9129,
	"grk",
	"Grek, Polyt, Brai",
	ancestors = "el-kth",
	translit = {
		Grek = "el-translit",
		Polyt = "grc-translit",
	},
	override_translit = true,
	display_text = {
		Grek = s["Grek-displaytext"],
		Polyt = s["Polyt-displaytext"],
	},
	entry_name = {
		Grek = s["Grek-entryname"],
		Polyt = s["Polyt-entryname"],
	},
	sort_key = {
		Grek = s["Grek-sortkey"],
		Polyt = s["Polyt-sortkey"],
	},
	standardChars = {
		Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
		Brai = c.braille,
		c.punc
	},
}

m["en"] = {
	"English",
	1860,
	"gmw-ang",
	"Latn, Brai, Shaw, Dsrt", -- entries in Shaw or Dsrt might require prior discussion
	wikimedia_codes = "en, simple",
	ancestors = "en-ear",
	sort_key = {
		Latn = {
			-- Many of these are needed for sorting language names.
			remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
			-- These are found in entry names.
			from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
			to = {{
				["ɒ"] = "a", ["æ"] = "ae", ["🅱"] = "b", ["¢"] = "c", ["©"] = "c",
				["ᴄ"] = "c", ["ð"] = "d", ["đ"] = "d", ["ə"] = "e", ["ǝ"] = "e",
				["ɜ"] = "e", ["ɡ"] = "g", ["ħ"] = "h", ["ʜ"] = "h", ["ı"] = "i",
				["ɨ"] = "i", ["ł"] = "l", ["ŋ"] = "n", ["ɲ"] = "n", ["ø"] = "o",
				["ɔ"] = "o", ["œ"] = "oe", ["ꝑ"] = "p", ["ꝓ"] = "p", ["ꝕ"] = "p",
				["ß"] = "ss", ["ʋ"] = "v",
			}},
		},
	},
	standardChars = {
		Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
		Brai = c.braille,
		c.punc
	},
}

m["eo"] = {
	"Esperanto",
	143,
	"art",
	"Latn",
	sort_key = {
		remove_diacritics = c.grave .. c.acute,
		from = {"ĉ", "ĝ", "ĥ", "ĵ", "ŝ", "ŭ"},
		to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
	},
	standardChars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc,
}

m["es"] = {
	"Spanish",
	1321,
	"roa-cas",
	"Latn, Brai",
	ancestors = "es-ear",
	sort_key = {
		Latn = {
			remove_exceptions = {"ñ"},
			remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.diaer .. c.cedilla,
			from = {"ª", "æ", "ñ", "º", "œ"},
			to = {"a", "ae", "n" .. p[1], "o", "oe"}
		},
	},
	standardChars = {
		Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
		Brai = c.braille,
		c.punc
	},
}

m["et"] = {
	"Estonian",
	9072,
	"urj-fin",
	"Latn",
	sort_key = {
		from = {
			"š", "ž", "õ", "ä", "ö", "ü", -- 2 chars
			"z" -- 1 char
		},
		to = {
			"s" .. p[1], "s" .. p[3], "w" .. p[1], "w" .. p[2], "w" .. p[3], "w" .. p[4],
			"s" .. p[2]
		}
	},
	standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc,
}

m["eu"] = {
	"Basque",
	8752,
	"euq",
	"Latn",
	sort_key = {
		from = {"ç", "ñ"},
		to = {"c" .. p[1], "n" .. p[1]}
	},
	standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc,
}

m["fa"] = {
	"Persian",
	9168,
	"ira-swi",
	"fa-Arab, Hebr",
	ancestors = "fa-cls",
	display_text = {
		Hebr = "Hebr-common",
	},
	entry_name = {
		["fa-Arab"] = {
			-- character "ۂ" code U+06C2 to "ه" and "هٔ"‎ (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
			from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
			to = {"ه", "ا"},
			remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
		},
		Hebr = "Hebr-common",
	},
	sort_key = {
		Hebr = "Hebr-common",
	},
}

m["ff"] = {
	"Fula",
	33454,
	"alv-fwo",
	"Latn, Adlm",
}

m["fi"] = {
	"Finnish",
	1412,
	"urj-fin",
	"Latn",
	display_text = {
		from = {"'"},
		to = {"’"}
	},
	entry_name = { -- used to indicate gemination of the next consonant
		remove_diacritics = "ˣ",
		from = {"’"},
		to = {"'"},
	},
	sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö".
		remove_diacritics = "':" .. c.diacritics,
		remove_exceptions = {
			"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ
			"o[" .. c.diaer .. c.tilde .. c.dacute .. c.small_e .. "]", -- öõőoͤ
			"u[" .. c.diaer .. c.dacute .. "]" -- üű
		},
		from = {"æ", "[ðđ]", "ł", "ŋ", "œ", "ß", "þ", "u[" .. c.diaer .. c.dacute .. "]", "å", "aͤ", "o[" .. c.tilde .. c.dacute .. c.small_e .. "]", "ø", "(.)['%-]"},
		to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"}
	},
	standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc,
}

m["fj"] = {
	"Fijian",
	33295,
	"poz-pcc",
	"Latn",
}

m["fo"] = {
	"Faroese",
	25258,
	"gmq-ins",
	"Latn",
	sort_key = {
		from = {"á", "ð", "í", "ó", "ú", "ý", "æ", "ø"},
		to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
	},
	standardChars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc,
}

m["fr"] = {
	"French",
	150,
	"roa-oil",
	"Latn, Brai",
	display_text = {
		Latn = {
			from = {"'"},
			to = {"’"}
		},
	},
	entry_name = {
		Latn = {
			from = {"’"},
			to = {"'"},
		},
	},
	ancestors = "frm",
	sort_key = {
		Latn = s["roa-oil-sortkey"]
	},
	standardChars = {
		Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
		Brai = c.braille,
		c.punc
	},
}

m["fy"] = {
	"West Frisian",
	27175,
	"gmw-fri",
	"Latn",
	sort_key = {
		remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer,
		from = {"y"},
		to = {"i"}
	},
	standardChars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc,
}

m["ga"] = {
	"Irish",
	9142,
	"cel-gae",
	"Latn, Latg",
	ancestors = "mga",
	sort_key = {
		remove_diacritics = c.acute,
		from = {"ḃ", "ċ", "ḋ", "ḟ", "ġ", "ṁ", "ṗ", "ṡ", "ṫ"},
		to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
	},
	standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc,
}

m["gd"] = {
	"Scottish Gaelic",
	9314,
	"cel-gae",
	"Latn, Latg",
	ancestors = "mga",
	sort_key = {remove_diacritics = c.grave .. c.acute},
	standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc,
}

m["gl"] = {
	"Galician",
	9307,
	"roa-gap",
	"Latn",
	sort_key = {
		remove_diacritics = c.acute,
		from = {"ñ"},
		to = {"n" .. p[1]}
	},
	standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc,
}

m["gn"] = {
	"Guaraní",
	35876,
	"tup-gua",
	"Latn",
}

m["gu"] = {
	"Gujarati",
	5137,
	"inc-wes",
	"Arab, Gujr",
	ancestors = "inc-mgu",
	translit = {
		Gujr = "gu-translit",
	},
	entry_name = {
		Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun},
		Gujr = {remove_diacritics = "઼"},
	},
}

m["gv"] = {
	"Manx",
	12175,
	"cel-gae",
	"Latn",
	ancestors = "mga",
	sort_key = {remove_diacritics = c.cedilla .. "-"},
	standardChars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc,
}

m["ha"] = {
	"Hausa",
	56475,
	"cdc-wst",
	"Latn, Arab",
	entry_name = {
		Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}
	},
	sort_key = {
		Latn = {
			from = {"ɓ", "b'", "ɗ", "d'", "ƙ", "k'", "sh", "ƴ", "'y"},
			to = {"b" .. p[1], "b" .. p[2], "d" .. p[1], "d" .. p[2], "k" .. p[1], "k" .. p[2], "s" .. p[1], "y" .. p[1], "y" .. p[2]}
		},
	},
}

m["he"] = {
	"Hebrew",
	9288,
	"sem-can",
	"Hebr, Phnx, Brai, Samr",
	ancestors = "he-med",
	display_text = {
		Hebr = "Hebr-common",
	},
	entry_name = {
		Hebr = "Hebr-common",
		Samr = s["Samr-entryname"],
	},
	sort_key = {
		Hebr = "Hebr-common",
		Samr = s["Samr-sortkey"],
	},
}

m["hi"] = {
	"Hindi",
	1568,
	"inc-hnd",
	"Deva, Kthi, Newa",
	translit = {
		Deva = "hi-translit"
	},
	standardChars = {
		Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
		c.punc
	},
}

m["ho"] = {
	"Hiri Motu",
	33617,
	"crp",
	"Latn",
	ancestors = "meu",
}

m["ht"] = {
	"Haitian Creole",
	33491,
	"crp",
	"Latn",
	ancestors = "ht-sdm",
	sort_key = {
		from = {
			"oun", -- 3 chars
			"an", "ch", "è", "en", "ng", "ò", "on", "ou", "ui" -- 2 chars
		},
		to = {
			"o" .. p[4],
			"a" .. p[1], "c" .. p[1], "e" .. p[1], "e" .. p[2], "n" .. p[1], "o" .. p[1], "o" .. p[2], "o" .. p[3], "u" .. p[1]
		}
	},
}

m["hu"] = {
	"Hungarian",
	9067,
	"urj-ugr",
	"Latn, Hung",
	ancestors = "ohu",
	sort_key = {
		Latn = {
			from = {
				"dzs", -- 3 chars
				"á", "cs", "dz", "é", "gy", "í", "ly", "ny", "ó", "ö", "ő", "sz", "ty", "ú", "ü", "ű", "zs", -- 2 chars
			},
			to = {
				"d" .. p[2],
				"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "o" .. p[2], "o" .. p[3], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "u" .. p[3], "z" .. p[1],
			}
		},
	},
	standardChars = {
		Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
		c.punc
	},
}

m["hy"] = {
	"Armenian",
	8785,
	"hyx",
	"Armn, Brai",
	ancestors = "axm",
	translit = {
		Armn = "Armn-translit"
	},
	override_translit = true,
	entry_name = {
		Armn = {
			remove_diacritics = "՛՜՞՟",
			from = {"եւ", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>", "յ̵", "ՙ", "՚"},
			to = {"և", "յ", "ի", "է", "ֈ", "ʻ", "’"}
		},
	},
	sort_key = {
		Armn = {
			from = {
				"ու", "եւ", -- 2 chars
				"և" -- 1 char
			},
			to = {
				"ւ", "եվ",
				"եվ"
			}
		},
	},
}

m["hz"] = {
	"Herero",
	33315,
	"bnt-swb",
	"Latn",
}

m["ia"] = {
	"Interlingua",
	35934,
	"art",
	"Latn",
}

m["id"] = {
	"Indonesian",
	9240,
	"poz-mly",
	"Latn",
	ancestors = "ms",
	standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc,
}

m["ie"] = {
	"Interlingue",
	35850,
	"art",
	"Latn",
	type = "appendix-constructed",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ},
}

m["ig"] = {
	"Igbo",
	33578,
	"alv-igb",
	"Latn",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.macron},
	sort_key = {
		from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
		to = {"g" .. p[1], "g" .. p[2], "g" .. p[3], "i" .. p[1], "k" .. p[1], "k" .. p[2], "n" .. p[1], "n" .. p[2], "n" .. p[3], "o" .. p[1], "s" .. p[1], "u" .. p[1]}
	},
}

m["ii"] = {
	"Nuosu",
	34235,
	"tbq-nlo",
	"Yiii",
	translit = "ii-translit",
}

m["ik"] = {
	"Inupiaq",
	27183,
	"esx-inu",
	"Latn",
	sort_key = {
		from = {
			"ch", "ġ", "dj", "ḷ", "ł̣", "ñ", "ng", "r̂", "sr", "zr", -- 2 chars
			"ł", "ŋ", "ʼ" -- 1 char
		},
		to = {
			"c" .. p[1], "g" .. p[1], "h" .. p[1], "l" .. p[1], "l" .. p[3], "n" .. p[1], "n" .. p[2], "r" .. p[1], "s" .. p[1], "z" .. p[1],
			"l" .. p[2], "n" .. p[2], "z" .. p[2]
		}
	},
}

m["io"] = {
	"Ido",
	35224,
	"art",
	"Latn",
}

m["is"] = {
	"Icelandic",
	294,
	"gmq-ins",
	"Latn",
	sort_key = {
		from = {"á", "ð", "é", "í", "ó", "ú", "ý", "þ", "æ", "ö"},
		to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
	},
	standardChars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc,
}

m["it"] = {
	"Italian",
	652,
	"roa-itr",
	"Latn",
	ancestors = "roa-oit",
	sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
	standardChars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc,
}

m["iu"] = {
	"Inuktitut",
	29921,
	"esx-inu",
	"Cans, Latn",
	translit = {
		Cans = "cr-translit"
	},
	override_translit = true,
}

m["ja"] = {
	"Japanese",
	5287,
	"jpx",
	"Jpan, Latn, Brai",
	ancestors = "ja-ear",
	translit = s["jpx-translit"],
	link_tr = true,
	display_text = s["jpx-displaytext"],
	entry_name = s["jpx-entryname"],
	sort_key = s["jpx-sortkey"],
}

m["jv"] = {
	"Javanese",
	33549,
	"poz",
	"Latn, Java",
	ancestors = "kaw",
	translit = {
		Java = "jv-translit"
	},
	link_tr = true,
	entry_name = {
		Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê
	},
	sort_key = {
		Latn = {
			from = {"å", "dh", "é", "è", "ng", "ny", "th"},
			to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "n" .. p[1], "n" .. p[2], "t" .. p[1]}
		},
	},
}

m["ka"] = {
	"Georgian",
	8108,
	"ccs-gzn",
	"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
	ancestors = "ka-mid",
	translit = {
		Geor = "Geor-translit",
		Geok = "Geok-translit",
	},
	override_translit = true,
	display_text = {
		Hebr = "Hebr-common",
	},
	entry_name = {
		Geor = s["ka-entryname"],
		Geok = s["ka-entryname"],
		Hebr = "Hebr-common",
	},
	sort_key = {
		Hebr = "Hebr-common",
	}
}

m["kg"] = {
	"Kongo",
	33702,
	"bnt-kng",
	"Latn",
}

m["ki"] = {
	"Kikuyu",
	33587,
	"bnt-kka",
	"Latn",
}

m["kj"] = {
	"Kwanyama",
	1405077,
	"bnt-ova",
	"Latn",
}

m["kk"] = {
	"Kazakh",
	9252,
	"trk-kno",
	"Cyrl, Latn, kk-Arab",
	translit = {
		Cyrl = {
			from = {
				"Ё", "ё", "Й", "й", "Нг", "нг", "Ӯ", "ӯ", -- 2 chars; are "Ӯ" and "ӯ" actually used?
				"А", "а", "Ә", "ә", "Б", "б", "В", "в", "Г", "г", "Ғ", "ғ", "Д", "д", "Е", "е", "Ж", "ж", "З", "з", "И", "и", "К", "к", "Қ", "қ", "Л", "л", "М", "м", "Н", "н", "Ң", "ң", "О", "о", "Ө", "ө", "П", "п", "Р", "р", "С", "с", "Т", "т", "У", "у", "Ұ", "ұ", "Ү", "ү", "Ф", "ф", "Х", "х", "Һ", "һ", "Ц", "ц", "Ч", "ч", "Ш", "ш", "Щ", "щ", "Ъ", "ъ", "Ы", "ы", "І", "і", "Ь", "ь", "Э", "э", "Ю", "ю", "Я", "я", -- 1 char
			},
			to = {
				"E", "e", "İ", "i", "Ñ", "ñ", "U", "u",
				"A", "a", "Ä", "ä", "B", "b", "V", "v", "G", "g", "Ğ", "ğ", "D", "d", "E", "e", "J", "j", "Z", "z", "İ", "i", "K", "k", "Q", "q", "L", "l", "M", "m", "N", "n", "Ñ", "ñ", "O", "o", "Ö", "ö", "P", "p", "R", "r", "S", "s", "T", "t", "U", "u", "Ū", "ū", "Ü", "ü", "F", "f", "X", "x", "H", "h", "S", "s", "Ç", "ç", "Ş", "ş", "Ş", "ş", "", "", "Y", "y", "I", "ı", "", "", "É", "é", "Ü", "ü", "Ä", "ä",
			}
		}
	},
--	override_translit = true,
	sort_key = {
		Cyrl = {
			from = {"ә", "ғ", "ё", "қ", "ң", "ө", "ұ", "ү", "һ", "і"},
			to = {"а" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "у" .. p[2], "х" .. p[1], "ы" .. p[1]}
		},
	},
	standardChars = {
		Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
		c.punc
	},
}

m["kl"] = {
	"Greenlandic",
	25355,
	"esx-inu",
	"Latn",
	sort_key = {
		from = {"æ", "ø", "å"},
		to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
	}
}

m["km"] = {
	"Khmer",
	9205,
	"mkh-kmr",
	"Khmr",
	ancestors = "xhm",
	translit = "km-translit",
}

m["kn"] = {
	"Kannada",
	33673,
	"dra-kan",
	"Knda, Tutg",
	ancestors = "dra-mkn",
	translit = {
		Knda = "kn-translit",
	},
}

m["ko"] = {
	"Korean",
	9176,
	"qfa-kor",
	"Kore, Brai",
	ancestors = "ko-ear",
	translit = {
		Kore = "ko-translit",
	},
	entry_name = {
		Kore = s["Kore-entryname"],
	},
}

m["kr"] = {
	"Kanuri",
	36094,
	"ssa-sah",
	"Latn, Arab",
	-- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
	entry_name = {
		Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}
	},
	sort_key = {
		Latn = {
			from = {"ǝ", "ny", "ɍ", "sh"},
			to = {"e" .. p[1], "n" .. p[1], "r" .. p[1], "s" .. p[1]}
		},
	},
}

m["ks"] = {
	"Kashmiri",
	33552,
	"inc-kas",
	"ks-Arab, Deva, Shrd, Latn",
	translit = {
		["ks-Arab"] = "ks-Arab-translit",
		Deva = "ks-Deva-translit",
		Shrd = "Shrd-translit",
	},
}

-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT

m["kw"] = {
	"Cornish",
	25289,
	"cel-brs",
	"Latn",
	ancestors = "cnx",
	sort_key = {
		from = {"ch"},
		to = {"c" .. p[1]}
	},
}

m["ky"] = {
	"Kyrgyz",
	9255,
	"trk-kkp",
	"Cyrl, Latn, Arab",
	translit = {
		Cyrl = "ky-translit"
	},
	override_translit = true,
	sort_key = {
		Cyrl = {
			from = {"ё", "ң", "ө", "ү"},
			to = {"е" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]}
		},
	},
}

m["la"] = {
	"Latin",
	397,
	"itc-laf",
	"Latn",
	ancestors = "itc-ola",
	display_text = {
		Latn = s["itc-Latn-displaytext"]
	},
	entry_name = {
		Latn = s["itc-Latn-entryname"]
	},
	sort_key = {
		Latn = s["itc-Latn-sortkey"]
	},
	standardChars = {
		Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
		c.punc
	},
}

m["lb"] = {
	"Luxembourgish",
	9051,
	"gmw-hgm",
	"Latn, Brai",
	ancestors = "gmw-cfr",
	sort_key = {
		Latn = {
			from = {"ä", "ë", "é"},
			to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
		},
	},
}

m["lg"] = {
	"Luganda",
	33368,
	"bnt-nyg",
	"Latn",
	entry_name = {remove_diacritics = c.acute .. c.circ},
	sort_key = {
		from = {"ŋ"},
		to = {"n" .. p[1]}
	},
}

m["li"] = {
	"Limburgish",
	102172,
	"gmw-frk",
	"Latn",
	ancestors = "dum",
}

m["ln"] = {
	"Lingala",
	36217,
	"bnt-bmo",
	"Latn",
	sort_key = {
		remove_diacritics = c.acute .. c.circ .. c.caron,
		from = {"ɛ", "gb", "mb", "mp", "nd", "ng", "nk", "ns", "nt", "ny", "nz", "ɔ"},
		to = {"e" .. p[1], "g" .. p[1], "m" .. p[1], "m" .. p[2], "n" .. p[1], "n" .. p[2], "n" .. p[3], "n" .. p[4], "n" .. p[5], "n" .. p[6], "n" .. p[7], "o" .. p[1]}
	},
}

m["lo"] = {
	"Lao",
	9211,
	"tai-swe",
	"Laoo",
	translit = "lo-translit",
	sort_key = "Laoo-sortkey",
	standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
}

m["lt"] = {
	"Lithuanian",
	9083,
	"bat-eas",
	"Latn",
	ancestors = "olt",
	display_text = "lt-common",
	entry_name = "lt-common",
	sort_key = "lt-common",
	standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
}

m["lu"] = {
	"Luba-Katanga",
	36157,
	"bnt-lub",
	"Latn",
}

m["lv"] = {
	"Latvian",
	9078,
	"bat-eas",
	"Latn",
	entry_name = {
		-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
		from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
		to = {"%1", c.tilde, "%1%2%3", "%1%2", "%1%2", "%1" .. c.macron}
	},
	sort_key = {
		from = {"ā", "č", "ē", "ģ", "ī", "ķ", "ļ", "ņ", "š", "ū", "ž"},
		to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
	},
	standardChars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc,
}

m["mg"] = {
	"Malagasy",
	7930,
	"poz-bre",
	"Latn, Arab",
}

m["mh"] = {
	"Marshallese",
	36280,
	"poz-mic",
	"Latn",
	sort_key = {
		from = {"ā", "ļ", "m̧", "ņ", "n̄", "o̧", "ō", "ū"},
		to = {"a" .. p[1], "l" .. p[1], "m" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1], "o" .. p[2], "u" .. p[1]}
	},
}

m["mi"] = {
	"Maori",
	36451,
	"poz-pep",
	"Latn",
	sort_key = {
		remove_diacritics = c.macron,
		from = {"ng", "wh"},
		to = {"z" .. p[1], "z" .. p[2]}
	},
}

m["mk"] = {
	"Macedonian",
	9296,
	"zls",
	"Cyrl, Polyt",
	ancestors = "cu",
	translit = {
		Cyrl = "mk-translit"
	},
	display_text = {
		Polyt = s["Polyt-displaytext"]
	},
	entry_name = {
		Cyrl = {
			remove_diacritics = c.acute,
			remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
		},
		Polyt = s["Polyt-entryname"],
	},
	sort_key = {
		Cyrl = {
			remove_diacritics = c.grave,
			remove_exceptions = {"ѓ", "ќ"},
			from = {"ѓ", "ѕ", "ј", "љ", "њ", "ќ", "џ"},
			to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
		},
		Polyt = s["Polyt-sortkey"],
	},
	standardChars = {
		Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
		c.punc
	},
}

m["ml"] = {
	"Malayalam",
	36236,
	"dra-mal",
	"Mlym",
	translit = "ml-translit",
	override_translit = true,
}

m["mn"] = {
	"Mongolian",
	9246,
	"xgn-cen",
	"Cyrl, Mong, Latn, Brai",
	ancestors = "cmg",
	translit = {
		Cyrl = "mn-translit",
		Mong = "Mong-translit",
	},
	override_translit = true,
	display_text = {
		Mong = s["Mong-displaytext"]
	},
	entry_name = {
		Cyrl = {remove_diacritics = c.grave .. c.acute},
		Mong = s["Mong-entryname"],
	},
	sort_key = {
		Cyrl = {
			remove_diacritics = c.grave,
			from = {"ё", "ө", "ү"},
			to = {"е" .. p[1], "о" .. p[1], "у" .. p[1]}
		},
	},
	standardChars = {
		Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
		Brai = c.braille,
		c.punc
	},
}

-- "mo" IS TREATED AS "ro", SEE WT:LT

m["mr"] = {
	"Marathi",
	1571,
	"inc-sou",
	"Deva, Modi",
	ancestors = "omr",
	translit = {
		Deva = "mr-translit",
		Modi = "mr-Modi-translit",
	},
	entry_name = {
		Deva = {
			from = {"च़", "ज़", "झ़"},
			to = {"च", "ज", "झ"}
		},
	},
}

m["ms"] = {
	"Malay",
	9237,
	"poz-mly",
	"Latn, ms-Arab",
	ancestors = "ms-cla",
	standardChars = {
		Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
		c.punc
	},
}

m["mt"] = {
	"Maltese",
	9166,
	"sem-arb",
	"Latn",
	display_text = {
		from = {"'"},
		to = {"’"}
	},
	entry_name = {
		from = {"’"},
		to = {"'"},
	},
	ancestors = "sqr",
	sort_key = {
		from = {
			"ċ", "ġ", "ż", -- Convert into PUA so that decomposed form does not get caught by the next step.
			"([cgz])", -- Ensure "c" comes after "ċ", "g" comes after "ġ" and "z" comes after "ż".
			"g" .. p[1] .. "ħ", -- "għ" after initial conversion of "g".
			p[3], p[4], "ħ", "ie", p[5] -- Convert "ċ", "ġ", "ħ", "ie", "ż" into final output.
		},
		to = {
			p[3], p[4], p[5],
			"%1" .. p[1],
			"g" .. p[2],
			"c", "g", "h" .. p[1], "i" .. p[1], "z"
		}
	},
}

m["my"] = {
	"Burmese",
	9228,
	"tbq-brm",
	"Mymr",
	ancestors = "obr",
	translit = "my-translit",
	override_translit = true,
	sort_key = {
		from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},
		to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}
	},
}

m["na"] = {
	"Nauruan",
	13307,
	"poz-mic",
	"Latn",
}

m["nb"] = {
	"Norwegian Bokmål",
	25167,
	"gmq",
	"Latn",
	wikimedia_codes = "no",
	ancestors = "gmq-mno, da",  -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion
	sort_key = s["no-sortkey"],
	standardChars = s["no-standardchars"],
}

m["nd"] = {
	"Northern Ndebele",
	35613,
	"bnt-ngu",
	"Latn",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}

m["ne"] = {
	"Nepali",
	33823,
	"inc-pah",
	"Deva, Newa",
	translit = {
		Deva = "ne-translit"
	},
}

m["ng"] = {
	"Ndonga",
	33900,
	"bnt-ova",
	"Latn",
}

m["nl"] = {
	"Dutch",
	7411,
	"gmw-frk",
	"Latn, Brai",
	ancestors = "dum",
	sort_key = {
		Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
	},
	standardChars = {
		Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
		Brai = c.braille,
		c.punc
	},
}

m["nn"] = {
	"Norwegian Nynorsk",
	25164,
	"gmq-wes",
	"Latn",
	ancestors = "gmq-mno",
	entry_name = {
		remove_diacritics = c.grave .. c.acute,
	},
	sort_key = s["no-sortkey"],
	standardChars = s["no-standardchars"],
}

m["no"] = {
	"Norwegian",
	9043,
	"gmq-wes",
	"Latn",
	ancestors = "gmq-mno",
	sort_key = s["no-sortkey"],
	standardChars = s["no-standardchars"],
}

m["nr"] = {
	"Southern Ndebele",
	36785,
	"bnt-ngu",
	"Latn",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}

m["nv"] = {
	"Navajo",
	13310,
	"apa",
	"Latn, Brai",
	sort_key = {
		remove_diacritics = c.acute .. c.ogonek,
		from = {
			"chʼ", "tłʼ", "tsʼ", -- 3 chars
			"ch", "dl", "dz", "gh", "hw", "kʼ", "kw", "sh", "tł", "ts", "zh", -- 2 chars
			"ł", "ʼ" -- 1 char
		},
		to = {
			"c" .. p[2], "t" .. p[2], "t" .. p[4],
			"c" .. p[1], "d" .. p[1], "d" .. p[2], "g" .. p[1], "h" .. p[1], "k" .. p[1], "k" .. p[2], "s" .. p[1], "t" .. p[1], "t" .. p[3], "z" .. p[1],
			"l" .. p[1], "z" .. p[2]
		}
	},
}

m["ny"] = {
	"Chichewa",
	33273,
	"bnt-nys",
	"Latn",
	entry_name = {remove_diacritics = c.acute .. c.circ},
	sort_key = {
		from = {"ng'"},
		to = {"ng"}
	},
}

m["oc"] = {
	"Occitan",
	14185,
	"roa-ocr",
	"Latn, Hebr",
	ancestors = "pro",
	display_text = {
		Hebr = "Hebr-common",
	},
	entry_name = {
		Hebr = "Hebr-common",
	},
	sort_key = {
		Latn = {
			remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla,
			from = {"([lns])·h"},
			to = {"%1h"}
		},
		Hebr = "Hebr-common",
	},
}

m["oj"] = {
	"Ojibwe",
	33875,
	"alg",
	"Cans, Latn",
	sort_key = {
		Latn = {
			from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},
			to = {"a" .. p[1], "h" .. p[1], "i" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1]}
		},
	},
}

m["om"] = {
	"Oromo",
	33864,
	"cus-eas",
	"Latn, Ethi",
}

m["or"] = {
	"Odia",
	33810,
	"inc-eas",
	"Orya",
	ancestors = "inc-mor",
	translit = "or-translit",
}

m["os"] = {
	"Ossetian",
	33968,
	"xsc-sar",
	"Cyrl, Geor, Latn",
	ancestors = "oos",
	translit = {
		Cyrl = "os-translit",
		Geor = "Geor-translit",
	},
	override_translit = true,
	display_text = {
		Cyrl = {
			from = {"æ"},
			to = {"ӕ"}
		},
		Latn = {
			from = {"ӕ"},
			to = {"æ"}
		},
	},
	entry_name = {
		Cyrl = {
			remove_diacritics = c.grave .. c.acute,
			from = {"æ"},
			to = {"ӕ"}
		},
		Latn = {
			from = {"ӕ"},
			to = {"æ"}
		},
	},
	sort_key = {
		Cyrl = {
			from = {"ӕ", "гъ", "дж", "дз", "ё", "къ", "пъ", "тъ", "хъ", "цъ", "чъ"},
			to = {"а" .. p[1], "г" .. p[1], "д" .. p[1], "д" .. p[2], "е" .. p[1], "к" .. p[1], "п" .. p[1], "т" .. p[1], "х" .. p[1], "ц" .. p[1], "ч" .. p[1]}
		},
	},
}

m["pa"] = {
	"Punjabi",
	58635,
	"inc-pan",
	"Guru, pa-Arab",
	ancestors = "inc-opa",
	translit = {
		Guru = "Guru-translit",
		["pa-Arab"] = "pa-Arab-translit",
	},
	entry_name = {
		["pa-Arab"] = {
			remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
			from = {"ݨ", "ࣇ"},
			to = {"ن", "ل"}
		},
	},
}

m["pi"] = {
	"Pali",
	36727,
	"inc-mid",
	"Latn, Brah, Deva, Beng, Sinh, Mymr, Thai, Lana, Laoo, Khmr, Cakm", --and also Khom
	ancestors = "sa",
	translit = {
		Brah = "Brah-translit",
		Deva = "sa-translit",
		Beng = "pi-translit",
		Sinh = "si-translit",
		Mymr = "pi-translit",
		Thai = "pi-translit",
		Lana = "pi-translit",
		Laoo = "pi-translit",
		Khmr = "pi-translit",
		Cakm = "Cakm-translit",
	},
	entry_name = {
		Thai = {
			from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
			to = {"ิํ", "ฐ", "ญ"}
		},
		remove_diacritics = c.VS01
	},
	sort_key = { -- FIXME: This needs to be converted into the current standardized format.
		from = {"ā", "ī", "ū", "ḍ", "ḷ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
		to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}
	},
}

m["pl"] = {
	"Polish",
	809,
	"zlw-lch",
	"Latn",
	ancestors = "zlw-mpl",
	sort_key = {
		from = {"ą", "ć", "ę", "ł", "ń", "ó", "ś", "ź", "ż"},
		to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
	},
	standardChars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc,
}

m["ps"] = {
	"Pashto",
	58680,
	"ira-pat",
	"ps-Arab",
	entry_name = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef},
}

m["pt"] = {
	"Portuguese",
	5146,
	"roa-gap",
	"Latn, Brai",
	sort_key = {
		Latn = {
			remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.diaer .. c.cedilla,
			from = {"ª", "æ", "º", "œ"},
			to = {"a", "ae", "o", "oe"}
		},
	},
	standardChars = {
		Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
		Brai = c.braille,
		c.punc
	},
}

m["qu"] = {
	"Quechua",
	5218,
	"qwe",
	"Latn",
}

m["rm"] = {
	"Romansch",
	13199,
	"roa-rhe",
	"Latn",
	sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e},
}

m["ro"] = {
	"Romanian",
	7913,
	"roa-eas",
	"Latn, Cyrl, Cyrs",
	translit = {
		Cyrl = "ro-translit"
	},
	sort_key = {
		Latn = {
			remove_diacritics = c.grave .. c.acute,
			from = {"ă", "â", "î", "ș", "ț"},
			to = {"a" .. p[1], "a" .. p[2], "i" .. p[1], "s" .. p[1], "t" .. p[1]}
		},
		Cyrl = {
			from = {"ӂ"},
			to = {"ж" .. p[1]}
		},
	},
	standardChars = {
		Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
		Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
		c.punc
	},
}

m["ru"] = {
	"Russian",
	7737,
	"zle",
	"Cyrl, Brai",
	ancestors = "zle-mru",
	translit = {
		Cyrl = "ru-translit"
	},
	display_text = {
		Cyrl = {
			from = {"'"},
			to = {"’"}
		},
	},
	entry_name = {
		Cyrl = {
			remove_diacritics = c.grave .. c.acute .. c.diaer,
			remove_exceptions = {"Ё", "ё", "Ѣ̈", "ѣ̈", "Я̈", "я̈"},
			from = {"’"},
			to = {"'"},
		},
	},
	sort_key = {
		Cyrl = {
			remove_diacritics = c.grave .. c.acute .. c.diaer,
			remove_exceptions = {"ё", "ѣ̈", "я̈"},
			from = {
				"ё", "ѣ̈", "я̈", -- 2 chars
				"і", "ѣ", "ѳ", "ѵ" -- 1 char
			},
			to = {
				"е" .. p[1], "ь" .. p[2], "я" .. p[1],
				"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
			}
		},
	},
	standardChars = {
		Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
		Brai = c.braille,
		(c.punc:gsub("'", "")) -- Exclude apostrophe.
	},
}

m["rw"] = {
	"Rwanda-Rundi",
	3217514,
	"bnt-glb",
	"Latn",
	entry_name = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
}

m["sa"] = {
	"Sanskrit",
	11059,
	"inc",
	"as-Beng, Bali, Beng, Bhks, Brah, Mymr, xwo-Mong, Deva, Gujr, Guru, Gran, Hani, Java, Kthi, Knda, Kawi, Khar, Khmr, Laoo, Mlym, mnc-Mong, Marc, Modi, Mong, Nand, Newa, Orya, Phag, Ranj, Saur, Shrd, Sidd, Sinh, Soyo, Lana, Takr, Taml, Tang, Telu, Thai, Tibt, Tutg, Tirh, Zanb", --and also Khom; script codes sorted by canonical name rather than code for [[MOD:sa-convert]]
	translit = {
		Beng = "sa-Beng-translit",
		["as-Beng"] = "sa-Beng-translit",
		Brah = "Brah-translit",
		Deva = "sa-translit",
		Gujr = "sa-Gujr-translit",
		Guru = "sa-Guru-translit",
		Java = "sa-Java-translit",
		Kthi = "sa-Kthi-translit",
		Khmr = "pi-translit",
		Knda = "sa-Knda-translit",
		Lana = "pi-translit",
		Laoo = "pi-translit",
		Mlym = "sa-Mlym-translit",
		Modi = "sa-Modi-translit",
		Mong = "Mong-translit",
		["mnc-Mong"] = "mnc-translit",
		["xwo-Mong"] = "xal-translit",
		Mymr = "pi-translit",
		Orya = "sa-Orya-translit",
		Shrd = "Shrd-translit",
		Sidd = "Sidd-translit",
		Sinh = "si-translit",
		Taml = "sa-Taml-translit",
		Telu = "sa-Telu-translit",
		Thai = "pi-translit",
		Tibt = "Tibt-translit",
	},
	display_text = {
		Mong = s["Mong-displaytext"],
		Tibt = s["Tibt-displaytext"],
	},
	entry_name = {
		Mong = s["Mong-entryname"],
		Tibt = s["Tibt-entryname"],
		Thai = {
			from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
			to = {"ิํ", "ฐ", "ญ"}
		},
		remove_diacritics = c.VS01 .. c.udatta .. c.anudatta
	},
	sort_key = {
		Tibt = "Tibt-sortkey",
		{ -- FIXME: This needs to be converted into the current standardized format.
			from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
			to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"},
		},
	},
}

m["sc"] = {
	"Sardinian",
	33976,
	"roa-sou",
	"Latn",
}

m["sd"] = {
	"Sindhi",
	33997,
	"inc-snd",
	"sd-Arab, Deva, Sind, Khoj",
	translit = {
		Sind = "Sind-translit"
	},
	entry_name = {
		["sd-Arab"] = {
			remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
			from = {"ٱ"},
			to = {"ا"}
		},
	},
}

m["se"] = {
	"Northern Sami",
	33947,
	"smi",
	"Latn",
	display_text = {
		from = {"'"},
		to = {"ˈ"}
	},
	entry_name = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
	sort_key = {
		from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
		to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
	},
	standardChars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc,
}

m["sg"] = {
	"Sango",
	33954,
	"crp",
	"Latn",
	ancestors = "ngb",
}

m["sh"] = {
	"Serbo-Croatian",
	9301,
	"zls",
	"Latn, Cyrl, Glag",
	ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1
	wikimedia_codes = "sh, bs, hr, sr",
	entry_name = {
		Latn = {
			remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
			remove_exceptions = {"Ć", "ć", "Ś", "ś", "Ź", "ź"}
		},
		Cyrl = {
			remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
			remove_exceptions = {"З́", "з́", "С́", "с́"}
		},
	},
	sort_key = {
		Latn = {
			remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
			remove_exceptions = {"ć", "ś", "ź"},
			from = {"č", "ć", "dž", "đ", "lj", "nj", "š", "ś", "ž", "ź"},
			to = {"c" .. p[1], "c" .. p[2], "d" .. p[1], "d" .. p[2], "l" .. p[1], "n" .. p[1], "s" .. p[1], "s" .. p[2], "z" .. p[1], "z" .. p[2]}
		},
		Cyrl = {
			remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
			remove_exceptions = {"з́", "с́"},
			from = {"ђ", "з́", "ј", "љ", "њ", "с́", "ћ", "џ"},
			to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "с" .. p[1], "т" .. p[1], "ч" .. p[1]}
		},
	},
	standardChars = {
		Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
		Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
		c.punc
	},
}

m["si"] = {
	"Sinhalese",
	13267,
	"inc-ins",
	"Sinh",
	translit = "si-translit",
	override_translit = true,
}

m["sk"] = {
	"Slovak",
	9058,
	"zlw",
	"Latn",
	ancestors = "zlw-osk",
	sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
	standardChars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
}

m["sl"] = {
	"Slovene",
	9063,
	"zls",
	"Latn",
	entry_name = {
		remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
		remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"},
		from = {"Ə", "ə", "Ł", "ł"},
		to = {"E", "e", "L", "l"},
	},
	sort_key = {
		remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dotabove .. c.ringabove .. c.dgrave .. c.invbreve .. c.dotbelow .. c.ringbelow .. c.ogonek,
		remove_exceptions = {"ć", "ǵ", "ś", "ź"},
		from = {"ä", "č", "ć", "đ", "ə", "ë", "ǧ", "ǵ", "ï", "ł", "ö", "š", "ś", "ü", "ž", "ź"},
		to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]},
	},
	standardChars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc,
}

m["sm"] = {
	"Samoan",
	34011,
	"poz-pnp",
	"Latn",
}

m["sn"] = {
	"Shona",
	34004,
	"bnt-sho",
	"Latn",
	entry_name = {remove_diacritics = c.acute},
}

m["so"] = {
	"Somali",
	13275,
	"cus-som",
	"Latn, Arab, Osma",
	entry_name = {
		Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
	},
}

m["sq"] = {
	"Albanian",
	8748,
	"sqj",
	"Latn, Grek, ota-Arab, Elba, Todr, Vith",
	translit = {
		Elba = "Elba-translit",
	},
	display_text = {
		Grek = s["Grek-displaytext"],
	},
	entry_name = {
		Latn = {
			remove_diacritics = c.acute,
			from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'},
		},
		Grek = { -- Diacritic removal from Grek-entryname excluded.
			from = s["Grek-entryname"].from,
			to = s["Grek-entryname"].to,
		},
	},
	sort_key = {
		Latn = {
			remove_diacritics = c.acute .. c.circ .. c.tilde .. c.breve .. c.caron,
			from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
			to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
		}
		-- TODO: Grek
	},
	standardChars = {
		Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
		c.punc
	},
}

m["ss"] = {
	"Swazi",
	34014,
	"bnt-ngu",
	"Latn",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}

m["st"] = {
	"Sotho",
	34340,
	"bnt-sts",
	"Latn",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}

m["su"] = {
	"Sundanese",
	34002,
	"poz-msa",
	"Latn, Sund, Arab",
	ancestors = "osn",
	translit = {
		Sund = "su-translit"
	},
}

m["sv"] = {
	"Swedish",
	9027,
	"gmq-eas",
	"Latn",
	ancestors = "gmq-osw-lat",
	sort_key = {
		remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dacute .. c.caron .. c.cedilla .. "':",
		remove_exceptions = {"å"},
		from = {"ø", "æ", "œ", "ß", "å", "aͤ", "oͤ"},
		to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"}
	},
	standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc,
}

m["sw"] = {
	"Swahili",
	7838,
	"bnt-swh",
	"Latn, Arab",
	sort_key = {
		Latn = {
			from = {"ng'"},
			to = {"ng" .. p[1]}
		},
	},
}

m["ta"] = {
	"Tamil",
	5885,
	"dra-tam",
	"Taml",
	ancestors = "ta-mid",
	translit = "ta-translit",
	override_translit = true,
}

m["te"] = {
	"Telugu",
	8097,
	"dra-tel",
	"Telu",
	translit = "te-translit",
	override_translit = true,
}

m["tg"] = {
	"Tajik",
	9260,
	"ira-swi",
	"Cyrl, fa-Arab, Latn",
	ancestors = "fa-cls",
	translit = {
		Cyrl = "tg-translit"
	},
	override_translit = true,
	entry_name = {
		Cyrl = s["tg-entryname"],
		Latn = s["tg-entryname"],
	},
	sort_key = {
		Cyrl = {
			from = {"ғ", "ё", "ӣ", "қ", "ӯ", "ҳ", "ҷ"},
			to = {"г" .. p[1], "е" .. p[1], "и" .. p[1], "к" .. p[1], "у" .. p[1], "х" .. p[1], "ч" .. p[1]}
		},
	},
}

m["th"] = {
	"Thai",
	9217,
	"tai-swe",
	"Thai, Khomt, Brai",
	translit = {
		Thai = "th-translit"
	},
	sort_key = {
		Thai = "Thai-sortkey"
	},
}

m["ti"] = {
	"Tigrinya",
	34124,
	"sem-eth",
	"Ethi",
	translit = "Ethi-translit",
}

m["tk"] = {
	"Turkmen",
	9267,
	"trk-ogz",
	"Latn, Cyrl, Arab",
	entry_name = {
		Latn = s["tk-entryname"],
		Cyrl = s["tk-entryname"],
	},
	sort_key = {
		Latn = {
			from = {"ç", "ä", "ž", "ň", "ö", "ş", "ü", "ý"},
			to = {"c" .. p[1], "e" .. p[1], "j" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "u" .. p[1], "y" .. p[1]}
		},
		Cyrl = {
			from = {"ё", "җ", "ң", "ө", "ү", "ә"},
			to = {"е" .. p[1], "ж" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "э" .. p[1]}
		},
	},
}

m["tl"] = {
	"Tagalog",
	34057,
	"phi",
	"Latn, Tglg",
	translit = {
		Tglg = "tl-translit"
	},
	override_translit = true,
	entry_name = {
		Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
	},
	standardChars = {
		Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
		c.punc
	},
	sort_key = {
		Latn = "tl-sortkey",
	},
}

m["tn"] = {
	"Tswana",
	34137,
	"bnt-sts",
	"Latn",
}

m["to"] = {
	"Tongan",
	34094,
	"poz-ton",
	"Latn",
	entry_name = {remove_diacritics = c.acute},
	sort_key = {remove_diacritics = c.macron},
}

m["tr"] = {
	"Turkish",
	256,
	"trk-ogz",
	"Latn",
	ancestors = "ota",
	dotted_dotless_i = true,
	sort_key = {
		from = {
			-- Ignore circumflex, but account for capital Î wrongly becoming ı + circ due to dotted dotless I logic.
			"ı" .. c.circ, c.circ,
			"i", -- Ensure "i" comes after "ı".
			"ç", "ğ", "ı", "ö", "ş", "ü"
		},
		to = {
			"i", "",
			"i" .. p[1],
			"c" .. p[1], "g" .. p[1], "i", "o" .. p[1], "s" .. p[1], "u" .. p[1]
		}
	},
	standardChars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc,
}

m["ts"] = {
	"Tsonga",
	34327,
	"bnt-tsr",
	"Latn",
}

m["tt"] = {
	"Tatar",
	25285,
	"trk-kbu",
	"Cyrl, Latn, tt-Arab",
	translit = {
		Cyrl = "tt-translit"
	},
	override_translit = true,
	dotted_dotless_i = true,
	sort_key = {
		Cyrl = {
			from = {"ә", "ў", "ғ", "ё", "җ", "қ", "ң", "ө", "ү", "һ"},
			to = {"а" .. p[1], "в" .. p[1], "г" .. p[1], "е" .. p[1], "ж" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "х" .. p[1]}
		},
		Latn = {
			from = {
				"i", -- Ensure "i" comes after "ı".
				"ä", "ə", "ç", "ğ", "ı", "ñ", "ŋ", "ö", "ɵ", "ş", "ü"
			},
			to = {
				"i" .. p[1],
				"a" .. p[1], "a" .. p[2], "c" .. p[1], "g" .. p[1], "i", "n" .. p[1], "n" .. p[2], "o" .. p[1], "o" .. p[2], "s" .. p[1], "u" .. p[1]
			}
		},
	},
}

-- "tw" IS TREATED AS "ak", SEE WT:LT

m["ty"] = {
	"Tahitian",
	34128,
	"poz-pep",
	"Latn",
}

m["ug"] = {
	"Uyghur",
	13263,
	"trk-kar",
	"ug-Arab, Latn, Cyrl",
	ancestors = "chg",
	translit = {
		["ug-Arab"] = "ug-translit",
		Cyrl = "ug-translit",
	},
	override_translit = true,
}

m["uk"] = {
	"Ukrainian",
	8798,
	"zle",
	"Cyrl",
	ancestors = "zle-ouk",
	translit = "uk-translit",
	entry_name = {remove_diacritics = c.grave .. c.acute},
	sort_key = {
		remove_diacritics = c.grave .. c.acute,
		from = {
			"ї", -- 2 chars
			"ґ", "є", "і" -- 1 char
		},
		to = {
			"и" .. p[2],
			"г" .. p[1], "е" .. p[1], "и" .. p[1]
		}
	},
	standardChars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""),  -- Exclude apostrophe.
}

m["ur"] = {
	"Urdu",
	1617,
	"inc-hnd",
	"ur-Arab, Hebr",
	translit = {
		["ur-Arab"] = "ur-translit"
	},
	display_text = {
		Hebr = "Hebr-common",
	},
	entry_name = {
		["ur-Arab"] = {
			-- character "ۂ" code U+06C2 to "ه" and "هٔ"‎ (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
			from = {"هٔ", "ۂ", "ٱ"},
			to = {"ہ", "ہ", "ا"},
			remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
		},
		Hebr = "Hebr-common",
	},
	sort_key = {
		Hebr = "Hebr-common",
	},
	standardChars = {
		["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے",
		c.punc,
	},
}

m["uz"] = {
	"Uzbek",
	9264,
	"trk-kar",
	"Latn, Cyrl, fa-Arab",
	ancestors = "chg",
	translit = {
		Cyrl = "uz-translit"
	},
	sort_key = {
		Latn = {
			from = {"oʻ", "gʻ", "sh", "ch", "ng"},
			to = {"z" .. p[1], "z" .. p[2], "z" .. p[3], "z" .. p[4], "z" .. p[5]}
		},
		Cyrl = {
			from = {"ё", "ў", "қ", "ғ", "ҳ"},
			to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
		},
	},
}

m["ve"] = {
	"Venda",
	32704,
	"bnt-bso",
	"Latn",
}

m["vi"] = {
	"Vietnamese",
	9199,
	"mkh-vie",
	"Latn, Hani",
	ancestors = "mkh-mvi",
	sort_key = {
		Latn = "vi-sortkey",
		Hani = "Hani-sortkey",
	},
}

m["vo"] = {
	"Volapük",
	36986,
	"art",
	"Latn",
}

m["wa"] = {
	"Walloon",
	34219,
	"roa-oil",
	"Latn",
	sort_key = s["roa-oil-sortkey"],
}

m["wo"] = {
	"Wolof",
	34257,
	"alv-fwo",
	"Latn, Arab, Gara",
}

m["xh"] = {
	"Xhosa",
	13218,
	"bnt-ngu",
	"Latn",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}

m["yi"] = {
	"Yiddish",
	8641,
	"gmw-hgm",
	"Hebr, Latn",
	ancestors = "gmh",
	translit = {
		Hebr = "yi-translit",
	},
	display_text = {
		Hebr = "Hebr-common",
	},
	entry_name = {
		Hebr = "Hebr-common",
	},
	sort_key = {
		Hebr = "Hebr-common",
	},
}

m["yo"] = {
	"Yoruba",
	34311,
	"alv-yor",
	"Latn, Arab",
	entry_name = {
		Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}
	},
	sort_key = {
		Latn = {
			from = {"ẹ", "ɛ", "gb", "ị", "kp", "ọ", "ɔ", "ṣ", "sh", "ụ"},
			to = {"e" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "o" .. p[1], "o" .. p[1], "s" .. p[1], "s" .. p[1], "u" .. p[1]}
		},
	},
}

m["za"] = {
	"Zhuang",
	13216,
	"tai",
	"Latn, Hani",
	sort_key = {
		Latn = "za-sortkey",
		Hani = "Hani-sortkey",
	},
}

m["zh"] = {
	"Chinese",
	7850,
	"zhx",
	"Hants, Latn, Bopo, Nshu, Brai",
	ancestors = "ltc",
	generate_forms = "zh-generateforms",
	translit = {
		Hani = "zh-translit",
		Bopo = "zh-translit",
	},
	sort_key = {
		Hani = "Hani-sortkey"
	},
}

m["zu"] = {
	"Zulu",
	10179,
	"bnt-ngu",
	"Latn",
	entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}

return require("Module:languages").finalizeData(m, "language")