Module:myv-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Erzya language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:myv-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local gsub = mw.ustring.gsub
local lower = mw.ustring.lower
local trim = mw.text.trim

-- apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local letters = {
	["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="O", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J",
	["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F",
	["Х"]="H", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="", ["Ы"]="Ï", ["Ь"]="", ["Э"]="Ë", ["Ю"]="U", ["Я"]="A",
	["Ӕ"]="Ä", ["Ӓ"]="Ä", ["Ѣ"]="E", ["Ҥ"]="Ŋ", ["І"]="I", ["Ѳ"]="Kv",
	["а"]="a", ["б"]="b", ["в"]="v", ["г"]="g", ["д"]="d", ["е"]="e", ["ё"]="o", ["ж"]="ž", ["з"]="z", ["и"]="i", ["й"]="j",
	["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["о"]="o", ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="u", ["ф"]="f",
	["х"]="h", ["ц"]="c", ["ч"]="č", ["ш"]="š", ["щ"]="šč", ["ь"]="", ["ъ"]="", ["ы"]="ï", ["э"]="ë", ["ю"]="u", ["я"]="a",
	["ӕ"]="ä", ["ӓ"]="ä", ["ѣ"]="e", ["ҥ"]="ŋ", ["і"]="i", ["ѳ"]="kv"
}

local palatals = {
	["Д"]="Ď", ["З"] = "Ź", ["Л"] = "Ľ", ["Н"] = "Ń", ["Р"] = "Ŕ", ["С"] = "Ś", ["Т"] = "Ť", ["Ц"] = "Ć",
	["д"]="ď", ["з"] = "ź", ["л"] = "ľ", ["н"] = "ń", ["р"] = "ŕ", ["с"] = "ś", ["т"] = "ť", ["ц"] = "ć",
}

local vowels = "АОУЫЭЯЁЮИЕЪЬІӔӒаоуыэяёюиеъьіӕӓ"
local front = "ЕЁИЮЯІӔӒеёиюяіӕӓ"
local accents = "́̀"
local nonpalatals = "бвгжкпмшБВГЖКПМШ"

function export.tr(text, lang, sc)
	-- make all word borders have a space
	text = " " .. text .. " "
	
	-- е, ё, и, ю, я after another vowel are written with initial j
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([еёиюя])", "%1j%2")
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([ЕЁИЮЯ])", "%1J%2")

	-- ё, ю, я at the beginning of a word are also written with initial j
	text = gsub(text, "([%s%p])([ёюя])", "%1j%2")
	text = gsub(text, "([%s%p])([ЁЮЯ])", "%1J%2")
	
	-- э at the beginning of the word is written as e
	text = gsub(text, "([%s%p])э", "%1e")
	text = gsub(text, "([%s%p])Э", "%1E")

	-- я after non-palatal consonants becomes ä
	text = gsub(text, "([" .. nonpalatals .. "])я", "%1ä")
	text = gsub(text, "([" .. nonpalatals .. "])Я", "%1Ä")
	-- ё after non-palatal consonants becomes ö
	text = gsub(text, "([" .. nonpalatals .. "])ё", "%1ö")
	text = gsub(text, "([" .. nonpalatals .. "])Ё", "%1Ö")

	-- make Ё, Ю, Я lowercase if preceding a non-capital letter
	text = gsub(text, "([ЁЮЯ])([^%u])", function(v, l)
		return lower(v) .. l
	end)

	-- consonants before ь and certain vowels are palatalised
	for i, v in pairs(palatals) do
		text = gsub(text, i .. "ь", v)
		text = gsub(text, i .. "([" .. front .. "])", v .. "%1")
	end

	return trim(gsub(text, ".", letters))
end

return export