Module:tevo-translit: Difference between revisions

Revision as of 16:41, 3 June 2026

The following documentation is generated by Module:documentation/functions/translit. ^[edit]

Useful links: subpage list • links • transclusions • testcases • sandbox

This module will transliterate Ancient Tevvic language text. It is also used to transliterate Middle Tevvic and Tevvic. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:tevo-translit/testcases.

Functions

tr(text, lang, sc): Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.; When the transliteration fails, returns nil.

local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local toNFC = mw.ustring.toNFC
local U = m_str_utils.char

local grave = U(0x300)
local acute = U(0x301)
local diaeresis = U(0x308)
local svar = U(0x951)
local anud = U(0x952)
local d_svar = U(0x1CDA) -- double svarita, sometimes used for long vowel with svarita

local consonants = {
	['क']='k', ['ख']='kh', ['ग']='g', ['घ']='gh', ['ङ']='ṅ',
	['च']='c', ['छ']='ch', ['ज']='j', ['झ']='jh', ['ञ']='ñ', 
	['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ढ']='ḍh', ['ण']='ṇ', 
	['त']='t', ['थ']='th', ['द']='d', ['ध']='dh', ['न']='n', 
	['प']='p', ['फ']='ph', ['ब']='b', ['भ']='bh', ['म']='m',
	['य']='y', ['र']='r', ['ल']='l', ['व']='v', ['ळ']='ḷ',
	['श']='ś', ['ष']='ṣ', ['स']='s', ['ह']='h',
}

local diacritics = {
	['ा']='ā', 
	['ि']='i', 
	['ी']='ī', 
	['ु']='u', 
	['ू']='ū', 
	['ॆ']='ei',
	['े']='ēi', 
	['ॅ']='e',
	['ै']='ē', 
	['ॊ']='ou',
	['ो']='ōu', 
	['ॉ']='o',
	['ौ']='ō',  
	['्']='',
	['॔']='́',
	['॓']='́',
}

local tt = {
	-- vowels
	['अ']='a', ['आ']='ā', ['इ']='i', ['ई']='ī', ['उ']='u', ['ऊ']='ū', ['ऋ']='ṛ', ['ॠ']='ṝ',
	['ऌ']='ḷ', ['ॡ']='ḹ', ['ए']='e', ['ऐ']='ai', ['ओ']='o', ['औ']='au', 
	-- chandrabindu
	['ँ']='m̐', --until a better method is found
	-- anusvara
	['ं']='ṃ', --until a better method is found
	['ꣳ']='ṃ',  -- candrabindu virama
	-- visarga
	['ः']='ḥ',
	-- avagraha
	['ऽ']='ʼ',
	--numerals
	['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9',
	--punctuation        
--  ['॥']='.', --double danda
--	['।']='.', --danda
    --Vedic extensions
    ['ᳵ']='x', ['ᳶ']='f',
    --Om
    ['ॐ']='oṃ',
    --reconstructed
--  ['*'] = '',
}

function export.tr(text, lang, sc)
	if sc ~= "Deva" then
		return nil
	end
end
return export

@@ Line 25: / Line 25: @@
 local diacritics = {
-	['ा']='ā', ['ि']='i', ['ी']='ī', ['ु']='u', ['ू']='ū', ['ृ']='ṛ', ['ॄ']='ṝ',
+	['ा']='ā',
-	['ॢ']='ḷ', ['ॣ']='ḹ', ['े']='e', ['ै']='ai', ['ो']='o', ['ौ']='au',  ['्']='',
+	['ि']='i',
+	['ी']='ī',
+	['ु']='u',
+	['ू']='ū',
+	['ॆ']='ei',
+	['े']='ēi',
+	['ॅ']='e',
+	['ै']='ē',
+	['ॊ']='ou',
+	['ो']='ōu',
+	['ॉ']='o',
+	['ौ']='ō',
+	['्']='',
+	['॔']='́',
+	['॓']='́',
 }
@@ Line 59: / Line 73: @@
 		return nil
 	end
-	-- Vedic accent handling
-	if text:match(anud) or text:match(svar) or text:match(d_svar) then
-		-- insert 'a' after consonants without vowel diacritic or virama
-		text = gsub(text, '([क-ह])([ा-्ॢॣ]?)',
-			function(c,d)
-				if d == "" then return c .. 'a' else return c .. d end
-			end)
-		local vow_list = "aअ-औा-ौॠ-ॣ"
-		local vow = "[" .. vow_list .. "]"
-		local extra_list = "ःंँ" -- visarga, anusvara, candrabindu
-		local extra = "[" .. extra_list .. "]"
-		local acc_list = grave .. acute .. svar .. anud .. d_svar
---		local cons_list = "क-हᳵᳶऽ् \'" -- consonants + avagraha + virama + space + apostrophe (from e.g. bold formatting)
-		-- Workaround: the consonants (plus a few other signs, see outcommented 'local cons_list')
-		-- are defined by negating the non-consonants, so as to include
-		-- the munged versions of formatting characters (e.g. bold formatting)
-        local cons = "[^" .. vow_list .. acc_list .. extra_list .. "।॥१३ॐ]"
-        -- independent svarita before udatta or other independent svarita (indicated by १/३ with both svarita and anudatta sign)
-		text = gsub(text, "(" .. extra .. "?)" .. anud .. "?[१३][" .. anud .. svar .. d_svar .. "]+(" ..
-			cons .. "*" .. vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)",
-			function(a,b,c,d)
-				if d ~= "" then
-					return grave .. a .. b .. grave .. c	-- 2 × independent svarita
-				else
-					return grave .. a .. b .. acute .. c	-- independent svarita + udatta
-				end
-			end)
-		-- optional: a few non-Rigvedic ways to mark the independent svarita (but compatible with Rigvedic system)
-		-- 1) ᳡ (U+1CE1) used by Atharvavedic Śaunakīya Saṃhitā
-		-- 2) ᳖ (U+1CD6) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'standard' independent svarita
-		-- 3) ᳕ (U+1CD5) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'aggravated' independent svarita (before udatta)
-		-- note that the Rigvedic system doesn't distinguish between dependent vs. independendent
-		-- svarita after udatta (the latter needs manual addition of grave `, see documentation)
-		text = gsub(text, "(" .. extra .. "?)[᳡᳖`]", grave .. "%1")
-		text = gsub(text, "(" .. extra .. "?)᳕(" .. cons .. "*" .. vow ..")", grave .. "%1%2" .. acute)
-		-- initial udatta/svarita
-		text = gsub(text, "^(" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- initial svarita
-				else
-					return a .. acute .. b .. c -- initial udatta
-				end
-			end)
-		-- the same, after (double) danda or 'om'
-		text = gsub(text, "([।॥ॐ]" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- initial svarita
-				else
-					return a .. acute .. b .. c -- initial udatta
-				end
-			end)
-		-- in case of anudatta sign not before other anudatta sign (nor before grave accent from १/३)
-		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- independent svarita
-				else
-					return a .. acute .. b .. c -- udatta
-				end
-			end)
-		-- and again (excluding acute on next vowel), in case of overlapping patterns (if 'c' above happens to be another vowel with anudatta)
-		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. acute .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- independent svarita
-				else
-					return a .. acute .. b .. c -- udatta
-				end
-			end)
-		-- the same, string final
-		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)$",
-			function(a,b,c)
-				if c ~= "" then
-					return a .. grave .. b -- independent svarita
-				else
-					return a .. acute .. b -- udatta
-				end
-			end)
-		-- unmarked vowel after udatta is also udatta
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-		-- and again, in case of three udatta's in a row
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-		-- yet again: 4 udatta's in a row occurs in RV.1.164.39
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-		-- the same, string final
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)$", "%1" .. acute .. "%2")
-		-- remove remaining anudatta and svarita signs
-		text = gsub(text, "[" .. anud .. svar .. d_svar .. "]", "")
-		text = gsub(text, '.', consonants)
-		text = gsub(text, '.', diacritics)
-	else -- no Vedic accents
-		text = gsub(
-		text,
-		'([क-ह])'..
-		'([ािीुूृॄॢॣेैोौ्]?)'..
-		'([अ-औ]?)',
-		function(c, d, e)
-			if d == "" and e ~= "" then
-				if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis
-				else return consonants[c] .. 'a' .. tt[e] end
-            elseif e ~= "" then
-				return consonants[c] .. diacritics[d] .. tt[e]
-			elseif d == "" then
-				return consonants[c] .. 'a'
-			else
-				return consonants[c] .. diacritics[d]
-			end
-		end)
-	end
-	text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis)
-	text = gsub(text, '.', tt)
-	text = gsub(text, 'a([iu])([' .. acute .. grave .. '])', 'a%2%1')
-	text = gsub(text, " ?[।॥]", ".")
-	text = gsub(text, "(ā" .. acute .. "3[iu])" .. acute, "%1") -- for pluti vowels
-	text = toNFC(text)
-	return text
 end
 return export

Module:tevo-translit: Difference between revisions

Revision as of 16:41, 3 June 2026

Functions

Navigation menu

Search