Module:sd-Arab-translit: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
m 1 revision imported |
||
(No difference)
| |||
Latest revision as of 12:46, 21 April 2026
Documentation for this module may be created at Module:sd-Arab-translit/doc
--- Taken from [[Module:pa-Arab-translit]]
local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local hamza = 'ء'
local alif = 'ا'
local vav = 'و'
local ye = 'ي'
local he = 'ه'
local nun = 'ن'
local aspirates = '[ڙمنڻجگلn]'
local diacritics = '[' .. zabar .. zer .. pesh .. jazm .. ']'
local diacritics_or_alif = '[' .. zabar .. zer .. pesh .. jazm .. alif .. ']'
local diacritics_or_matres = '[' .. zabar .. zer .. pesh .. jazm .. alif .. vav .. ye .. ']'
local not_diacritics = '[^' .. zabar .. zer .. pesh .. jazm .. ']'
local mapping = {
["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["ڀ"] = 'bh', ["ت"] = 't', ["ٿ"] = 'th',
["ٽ"] = 'ṭ', ["ٺ"] = 'ṭh', ["ث"] = 's̱', ["پ"] = 'p', ["ج"] = 'j', ["ڄ"] = 'j̄',
["ڃ"] = "ñ", ["چ"] = 'c', ["ڇ"] = 'ch', ["ح"] = 'ḥ', ["خ"] = 'x',
["د"] = 'd', ["ڌ"] = 'dh', ["ڏ"] = 'ḏ', ["ڊ"] = 'ḍ', ["ڍ"] = 'ḍh', ["ذ"] = 'ẕ',
["ر"] = 'r', ['ڙ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's',
["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ', ["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ',
["غ"] = 'ġ', ["ف"] = 'f', ["ڦ"] = 'ph', ["ق"] = 'q', ["ڪ"] = 'k', ["ک"] = 'kh',
["گ"] = 'g', ["ڳ"] = 'g̠', ["ڱ"] = 'ṅ', ["ل"] = 'l',
["م"] = 'm', ["ن"] = 'n', ["ڻ"] = 'ṇ',
["و"] = 'v', ["ہ"] = 'h', ["ي"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ',
["۾"] = 'mẽ', ["۽"] = 'a͠i',
["ھ"] = "h", ["ه"] = "h",
["ؤ"] = "'o",
-- diacritics
[zabar] = "a",
[zer] = "i",
[pesh] = "u",
[jazm] = "", -- also sukun - no vowel
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "lā",
["ﷲ"] = "allāh",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- hamza
[hamza] = "", -- nothing
["ئ"] = "",
-- numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
["ۓ"] = "-ye",
["ۀ"] = "h-e" -- he ye (in izafat)
}
function export.tr(text, lang, sc)
-- nun with diacritics / matres is consonant, else nasalisation
text = gsub(text, nun .. '(' .. diacritics_or_matres .. ')', 'n%1')
text = gsub(text, nun, '̃')
-- handle initial او (= o) so alif doesn't later become ā
text = gsub(text, alif .. vav, 'o')
-- alif
text = gsub(text, alif .. zabar, 'a')
text = gsub(text, alif .. zer, 'i')
text = gsub(text, alif .. pesh, 'u')
text = gsub(text, zabar .. alif, 'ā')
-- aspirates should have diacritics moved after
text = gsub(text, he .. '(' .. diacritics_or_alif .. ')', 'h%1')
text = gsub(text, he .. he .. '(' .. not_diacritics .. ')', he .. '%1')
text = gsub(text, he .. he .. '$', he)
text = gsub(text, '(' .. aspirates .. ')(' .. diacritics .. ')' .. he, '%1' .. he .. '%2')
-- vav + ye: DO VOWEL RULES FIRST (fixes لَوْنگُ)
text = gsub(text, zabar .. vav, 'au')
text = gsub(text, pesh .. vav, 'ū')
text = gsub(text, zabar .. ye, 'ai')
text = gsub(text, zer .. ye, 'ī')
-- extra consonant heuristics demanded by your tests
text = gsub(text, vav .. ye, 'v' .. ye) -- وي... -> ve...
text = gsub(text, ye .. vav .. '$', ye .. 'v') -- ...يو -> ...ev
-- now mark consonantal vav/ye when they have their own marks
text = gsub(text, vav .. '(' .. diacritics_or_alif .. ')', 'v%1')
text = gsub(text, ye .. '(' .. diacritics_or_alif .. ')', 'y%1')
-- default case
text = gsub(text, vav, 'o')
text = gsub(text, ye, 'e')
-- all rules
text = gsub(text, '.', mapping)
-- remaining alif
text = gsub(text, alif, 'ā')
return text
end
return export