Module:sd-Arab-translit: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
 
m 1 revision imported
 
(No difference)

Latest revision as of 12:46, 21 April 2026

Documentation for this module may be created at Module:sd-Arab-translit/doc

--- Taken from [[Module:pa-Arab-translit]]

local m_str_utils = require("Module:string utilities")

local U = m_str_utils.char
local gsub = m_str_utils.gsub

local export = {}

local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local hamza = 'ء'

local alif = 'ا'
local vav = 'و'
local ye = 'ي'
local he = 'ه'
local nun = 'ن'
local aspirates = '[ڙمنڻجگلn]'
local diacritics = '[' .. zabar .. zer .. pesh .. jazm .. ']'
local diacritics_or_alif = '[' .. zabar .. zer .. pesh .. jazm .. alif .. ']'
local diacritics_or_matres = '[' .. zabar .. zer .. pesh .. jazm .. alif .. vav .. ye .. ']'
local not_diacritics = '[^' .. zabar .. zer .. pesh .. jazm .. ']'

local mapping = {
	["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["ڀ"] = 'bh', ["ت"] = 't', ["ٿ"] = 'th',
	["ٽ"] = 'ṭ', ["ٺ"] = 'ṭh', ["ث"] = 's̱', ["پ"] = 'p', ["ج"] = 'j', ["ڄ"] = 'j̄',
	["ڃ"] = "ñ", ["چ"] = 'c', ["ڇ"] = 'ch', ["ح"] = 'ḥ', ["خ"] = 'x',
	["د"] = 'd', ["ڌ"] = 'dh', ["ڏ"] = 'ḏ', ["ڊ"] = 'ḍ', ["ڍ"] = 'ḍh', ["ذ"] = 'ẕ',
	["ر"] = 'r', ['ڙ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's',
	["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ', ["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ',
	["غ"] = 'ġ', ["ف"] = 'f', ["ڦ"] = 'ph', ["ق"] = 'q', ["ڪ"] = 'k', ["ک"] = 'kh',
	["گ"] = 'g', ["ڳ"] = 'g̠', ["ڱ"] = 'ṅ', ["ل"] = 'l',
	["م"] = 'm', ["ن"] = 'n', ["ڻ"] = 'ṇ',
	["و"] = 'v', ["ہ"] = 'h', ["ي"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ',
	
	["۾"] = 'mẽ', ["۽"] = 'a͠i',

	["ھ"] = "h", ["ه"] = "h",
	["ؤ"] = "'o",
	
	-- diacritics
	[zabar] = "a",
	[zer] = "i",
	[pesh] = "u",
	[jazm] = "", -- also sukun - no vowel
	[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
	
	-- ligatures
	["ﻻ"] = "lā",
	["ﷲ"] = "allāh",
	
	-- kashida
	["ـ"] = "-", -- kashida, no sound
	
	-- hamza
	[hamza] = "", -- nothing
	["ئ"] = "",
	
	-- numerals
	["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
	["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
	
	-- punctuation (leave on separate lines)
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
	["ۓ"] = "-ye", 
	["ۀ"] = "h-e" -- he ye (in izafat)
}

function export.tr(text, lang, sc)
	-- nun with diacritics / matres is consonant, else nasalisation
	text = gsub(text, nun .. '(' .. diacritics_or_matres .. ')', 'n%1')
	text = gsub(text, nun, '̃')

	-- handle initial او (= o) so alif doesn't later become ā
	text = gsub(text, alif .. vav, 'o')

	-- alif
	text = gsub(text, alif .. zabar, 'a')
	text = gsub(text, alif .. zer, 'i')
	text = gsub(text, alif .. pesh, 'u')
	text = gsub(text, zabar .. alif, 'ā')

	-- aspirates should have diacritics moved after
	text = gsub(text, he .. '(' .. diacritics_or_alif .. ')', 'h%1')
	text = gsub(text, he .. he .. '(' .. not_diacritics .. ')', he .. '%1')
	text = gsub(text, he .. he .. '$', he)
	text = gsub(text, '(' .. aspirates .. ')(' .. diacritics .. ')' .. he, '%1' .. he .. '%2')

	-- vav + ye: DO VOWEL RULES FIRST (fixes لَوْنگُ)
	text = gsub(text, zabar .. vav, 'au')
	text = gsub(text, pesh .. vav, 'ū')
	text = gsub(text, zabar .. ye, 'ai')
	text = gsub(text, zer .. ye, 'ī')

	-- extra consonant heuristics demanded by your tests
	text = gsub(text, vav .. ye, 'v' .. ye)          -- وي... -> ve...
	text = gsub(text, ye .. vav .. '$', ye .. 'v')   -- ...يو -> ...ev

	-- now mark consonantal vav/ye when they have their own marks
	text = gsub(text, vav .. '(' .. diacritics_or_alif .. ')', 'v%1')
	text = gsub(text, ye .. '(' .. diacritics_or_alif .. ')', 'y%1')

	-- default case
	text = gsub(text, vav, 'o')
	text = gsub(text, ye, 'e')

	-- all rules
	text = gsub(text, '.', mapping)

	-- remaining alif
	text = gsub(text, alif, 'ā')

	return text
end
return export