Module:mdf-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Moksha language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:mdf-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local gsub = mw.ustring.gsub
local lower = mw.ustring.lower
local trim = mw.text.trim

-- apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local letters = {
	["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="O", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J",
	["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F",
	["Х"]="H", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="Ə", ["Ы"]="Ï", ["Ь"]="", ["Э"]="Ë", ["Ю"]="U", ["Я"]="A",
	["Ӕ"]="Ä", ["Ѣ"]="E", ["Ҥ"]="Ŋ", ["І"]="I", ["Ѳ"]="Kv",
	["а"]="a", ["б"]="b", ["в"]="v", ["г"]="g", ["д"]="d", ["е"]="e", ["ё"]="o", ["ж"]="ž", ["з"]="z", ["и"]="i", ["й"]="j",
	["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["о"]="o", ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="u", ["ф"]="f",
	["х"]="h", ["ц"]="c", ["ч"]="č", ["ш"]="š", ["щ"]="šč", ["ъ"]="ə", ["ы"]="ï", ["ь"]="", ["э"]="ë", ["ю"]="u", ["я"]="a",
	["ӕ"]="ä", ["ѣ"]="e", ["ҥ"]="ŋ", ["і"]="i", ["ѳ"]="kv"
}

local palatals = {
	["Д"]="Ď", ["З"] = "Ź", ["Л"] = "Ľ", ["Н"] = "Ń", ["Р"] = "Ŕ", ["С"] = "Ś", ["Т"] = "Ť", ["Ц"] = "Ć",
	["д"]="ď", ["з"] = "ź", ["л"] = "ľ", ["н"] = "ń", ["р"] = "ŕ", ["с"] = "ś", ["т"] = "ť", ["ц"] = "ć",
}

local vowels = "аоуыэяёюиеьАОУЫЭЯЁЮИЕЬ"
local front = "ЕЁИЮЯІӔӒеёиюяіӕӓ"
local accents = "́̀"
local nonpalatals = "бвгжкпмшБВГЖКПМШ"

function export.tr(text, lang, sc)
	-- make all word borders have a space
	text = " " .. text .. " "

	-- front vowels after another vowel are written with initial j
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([еёию])", "%1j%2")
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([ЕЁИЮ])", "%1J%2")
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([я])", "%1jä")
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([Я])", "%1JÄ")

	-- е, ё, ю, я at the beginning of a word are also written with initial j
	text = gsub(text, "([%s%p])([еёюя])", "%1j%2")
	text = gsub(text, "([%s%p])([ЕЁЮЯ])", "%1J%2")

	-- э at the beginning of a word is written as e
	text = gsub(text, "([%s%p])э", "%1e")
	text = gsub(text, "([%s%p])Э", "%1E")

	-- я after non-palatal consonants becomes ä
	text = gsub(text, "([" .. nonpalatals .. "])я", "%1ä")
	text = gsub(text, "([" .. nonpalatals .. "])Я", "%1Ä")

	-- ё after non-palatal consonants becomes ö
	text = gsub(text, "([" .. nonpalatals .. "])ё", "%1ö")
	text = gsub(text, "([" .. nonpalatals .. "])Ё", "%1Ö")

	-- make Е, Ё, Ю, Я lowercase if preceding a non-capital letter
	text = gsub(text, "([ЕЁЮЯ])([^%u])", function(v, l)
		return lower(v) .. l
	end)

	-- consonants before ь and certain vowels are palatalised
	for i, v in pairs(palatals) do
		text = gsub(text, i .. "ь", v)

		-- special handling of я
		text = gsub(text, i .. "ь?я", v .. "a")
		text = gsub(text, i .. "ь?Я", v .. "A")

		-- other front vowels
		text = gsub(text, i .. "ь?([" .. front .. "])", v .. "%1")
	end

	-- ъ and ь are omitted before Е, Ё, Ю, Я
	text = gsub(text, "ъ([еёюя])","j%1")
	text = gsub(text, "Ъ([еёюя])","J%1")

	return trim(gsub(text, ".", letters))
end

return export