Module:siwa-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(577 intermediate revisions by the same user not shown)
Line 1: Line 1:
local export = {}
local m_sm = mw.loadData("Module:siwa-pron/data")
local m_IPA = require("Module:IPA")
local m_su = require("Module:string utilities")
local m_table = require("Module:table")
local m_sm = mw.loadData('Module:siwa-pron/data')


local sub = mw.ustring.sub
local sub = mw.ustring.sub
Line 11: Line 6:
local gsub = mw.ustring.gsub
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit


local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local export = {}
local NASALIZED = u(0x0303) -- COMBINING TILDE. ̃
 
local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚


--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥɾlɬłʣjw]" .. UNRELEASED .. "?"
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæa"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "" .. NASALIZED .. "?ɑʊ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "]"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"
 
local unrelaxed = {
["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o", ["ụ"] = "u", ["ỵ"] = "y",
}
 
function spat(c)
return "(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end
 
local function ncategories(categories)
local out_categories = {}
for key, cat in ipairs(categories) do
out_categories[key] = "[[Category:" .. cat .. "]]"
end
 
return table.concat(out_categories, "")
end


local function open_to_closed(v)
local function open_to_closed(v)
local switch = {
local otc = {}
["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", ["ø"] = "œ",
local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
}
["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
local tab = mw.text.split(v,"")
["ø"] = "ü", -- dialectal variation, will be changed later
for _, vowel in ipairs(tab) do
["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
for regex, replacement in ipairs(switch) do
["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
gsub(tab[v],regex,replacement)
for vc in gmatch(v, ".") do
vc = gsub(vc, vc, switch[vc])
table.insert(otc, vc)
end
return table.concat(otc)
end
 
function export.morphemes(word)
local pss = {}
 
if gmatch(word,"·") then
pss = split(word,"·")
end
for i, m in ipairs(pss) do
if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
elseif m_sm.prefix[m] then
pss[i] = "ˌ" .. pss[i]
else
pss[i] = "ˈ" .. pss[i]
end
end
end
end
return table.concat(tab)
local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end
 
local function detect_dialect(term)
if find(term, "[ṁṅłƛ]") then
return "w"
elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then
return "e"
end
end
end


local rules = {
local anaptyctic = {
{ --long consonants
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
["mm"] = "",
["bb"] = "",
{"([])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"},  
["vv"] = "wː",
{"([])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
["nn"] = "nː",
["dd"] = "tː",
{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
["ḍḍ"] = "ðː",
{"([])đv", "%1ːdɨwː"},
["ss"] = "",
["ṡṡ"] = "ɕː",
{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
["ddį"] = "ʥː",
["rr"] = "",
{"([lr])[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
["ll"] = "lː",
["gg"] = "",
{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
["ġġ"] = "",
["ng"] = "ŋː",
{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%"},
["hh"] = "hː",
["ḥḥ"] = "ʔː",
{"([^ˈ])([msɕl])", "%1ːɡɨ%"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
},
{ --consonants not affected by stress
["ṡ"] = "ɕ",
["tṡ"] = "ʨ",
["dį"] = "ʥ",
["ḍ"] = "ð",
["dl"] = "tɬ",
["ng"] = "ŋː",
["nį"] = "ɲ",
["ġ"] = "x",
},
{ --all vowels as open (open-closed distinctions are computed later)
["a"] = "ɑ", [""] = "æː",
["ę"] = "æ",
[""] = "",
["ỉ"] = "iː",
["o"] = "ɔ", ["ỏ"] = "ʊː",
["ủ"] = "uː",
["ỷ"] = "yː",
["ů"] = "ø", ["ẻu"] = "øː",
["õ"] = "ɔ̃", ["õu"] = "ɔ̃ː̃",
},
{
["^k([" .. front_vowel .. "])"] = "c%1", --word-initial [k] palatalizes before front-vowels
["^([ptkc])"] = "%", --voiceless stops word-initially become aspirated
["^gį([" .. front_vowel .. "])"] = "ʣ%1", --<gį> word-initially and before front vowels is pronounced [d͡z]
},
{
["(" .. vowel .. "*)(" .. consonant .. consonant .. ")"] = open_to_closed("%1") .. "%2",
},
{
["(" .. vowel .. ")t$"] = "%1ʔ%1" -- -Vt becomes -VʔV (or -Vht, not considered)
},
}
}


--[[function export.syllable(frame)
local rules1 = {
local word = mw.title.getCurrentTitle().text
{".", {
local pattern = "^[" .. initial .. "]?[" .. vocalic .. "]{1}[" .. internal .. "]?"
["ḍ"] = "ð", ["ṡ"] = "ɕ",
x = m_su.capturing_split(word, pattern)
["ḥ"] = "ʔ", ["į"] = "j",
return x[1]
["ġ"] = "x", ["g"] = "ɡ", -- IPA g
end]]
["ų"] = "w", ["ł"] = "ɬ",
}},
{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
-- long consonants
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"},
{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels
{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
-- preaspirated consonants
{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"},
{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"}, 
-- other stem- and/or word-initial configurations
{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}


function export.crux(term)
local rules2 = {
local IPA = {}
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
-- internal consonant clusters
{"[ƀp][ƀp]", "ʔp"},
{"[đt][đt]", "ʔt"},
{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"b([sɕ])", "p%1"},
{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
{"[ꝺd]x", "ðx"},
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"},
{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
{"mʔk", "mkː"},
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
{"(ⱡ[mnꬶk])", "%1ː"},
{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},
-- closed vowels
{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"ɑ$", "a"},
}
local final = {
{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ
--undo ligatures
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}
function export.crux(term, a, e, w)
term=mw.ustring.lower(term)
term=mw.ustring.lower(term)
term=export.morphemes(term)
for _, rule in ipairs(rules) do
for _, rule in ipairs(rules1) do
for regex, replacement in pairs(rule) do
term = gsub(term, rule[1], rule[2])
term = gsub(term, regex, replacement)
end
 
if w then
term = gsub(term, "ꬶl", "ʔⱡ")
for _, anap in ipairs(anaptyctic) do
term = gsub(term, anap[1], anap[2])
end
end
elseif e then
term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
term = gsub(term, "ƀr", "pʼqʼ")
term = gsub(term, "đr", "tʼqʼ")
term = gsub(term, "ʦx", "ʦʼqʼ")
term = gsub(term, "ꬶl", "klː")
else
term = gsub(term, "ꬶl", "klː")
end
end
if not find(term, "·") then
for _, rrule in ipairs(rules2) do
table.insert(IPA, "ˈ")
term = gsub(term, rrule[1], rrule[2])
end
end
table.insert(IPA, term)
if w then
term = gsub(term, "ü", "ɤ")
else
term = gsub(term, "ü", "œ")
end
--[[if find(term, "·") then
for _, f in ipairs(final) do
morphemes = {}
term = gsub(term, f[1], f[2])
morphemes = mw.text.split(term, "·")
end
for _, morpheme in ipairs(morphemes) do
print(morphemes[morpheme])
return term
end
end
end]]
 
function format_IPA(items)
return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end
 
function IPA_span(items)
local bits = {}
for _, item in ipairs(items) do
local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
table.insert(bits, bit)
end
return table.concat(bits)
end
 
function line_format(pronunciation, dialect)
local full_pronunciations = {}
local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
table.insert(full_pronunciations, format_IPA(IPA_args))
return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end
 
function separate_word(term, a, e, w)
local result = {}
IPA = table.concat(IPA)
for word in gsplit(term, " ") do
table.insert(result, export.crux(word, a, e, w))
end
return IPA
return table.concat(result, " ")
end
end


function export.show(frame)
function export.show(frame)
local parent_args = frame:getParent().args
local params = {
local params = {
[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
["a"] = {type = 'boolean', default = true},
["e"] = {type = 'boolean', default = true},
["w"] = {type = 'boolean', default = true},
["dia"] = {},
["rs"] = {type = 'boolean'},
}
}
local args = require("Module:parameters").process(parent_args, params)
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1]
local term = args[1]
local categories = {}
local ipa = "* "
if detect_dialect(term) == "w" then
args.e = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Western Siwa terms")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
elseif detect_dialect(term) == "e" then
args.w = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Eastern Siwa terms")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
elseif args.a then
ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
end
local ipa = export.crux(term)
if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
if args.a  then
ipa = ipa .. "\n* "
end
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
local IPA_key = "IPA for Siwa"
if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
local key_link = "[[".. IPA_key .."|key]]"
if args.a or args.e then
 
ipa = ipa .. "\n* "
local prefix = "[[w:IPA chart|IPA]]<sup>(" .. key_link .. ")</sup>:&#32;"
end
local accent="(\''Aingo\'') "
ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
if find(ipa, "ɤ") then
ipa = ipa .. "\n** "
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
end
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
ipa = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>[" .. ipa .. "]</span>"
if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
ipa = accent..prefix..ipa
return ipa
return ipa .. ncategories(categories)
end
end


return export
return export

Latest revision as of 13:31, 10 August 2022

spat

function spat(c)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

export.morphemes

function export.morphemes(word)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

export.crux

function export.crux(term, a, e, w)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

format_IPA

function format_IPA(items)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

IPA_span

function IPA_span(items)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

line_format

function line_format(pronunciation, dialect)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

separate_word

function separate_word(term, a, e, w)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

export.show

function export.show(frame)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.


local m_sm = mw.loadData("Module:siwa-pron/data")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local export = {}

local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"

local unrelaxed = {
	["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o",	["ụ"] = "u", ["ỵ"] = "y",
}

function spat(c)
	return 	"(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end	

local function ncategories(categories)
	local out_categories = {}
	for key, cat in ipairs(categories) do
		out_categories[key] = "[[Category:" .. cat .. "]]"
	end

	return table.concat(out_categories, "")
end

local function open_to_closed(v)
	local otc = {}
	local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
		["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
		["ø"] = "ü", -- dialectal variation, will be changed later
		["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
		["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
		 
	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

function export.morphemes(word)
	local pss = {}

	if gmatch(word,"·") then
		pss = split(word,"·")
	end
	
	for i, m in ipairs(pss) do
		if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
		elseif m_sm.prefix[m] then
			pss[i] = "ˌ" .. pss[i]
		else
			pss[i] = "ˈ" .. pss[i]
		end
	end
	
	local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
	
	return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end

local function detect_dialect(term)
	if find(term, "[ṁṅłƛ]") then
		return "w"
	elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then
		return "e"
	end
end

local anaptyctic = {
	{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
	
	{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"}, 
	{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
	
	{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
	{"([^ˈ])đv", "%1ːdɨwː"},
	
	{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
	
	{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
	
	{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
	
	{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
	
	{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}

local rules1 = {
	{".", {
		["ḍ"] = "ð", ["ṡ"] = "ɕ",
		["ḥ"] = "ʔ", ["į"] = "j",
		["ġ"] = "x", ["g"] = "ɡ", -- IPA g
		["ų"] = "w", ["ł"] = "ɬ",
	}},
	{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
	
	{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
	
	-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
	{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
	
	-- long consonants
	{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
	{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
	{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
	{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
	{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
	
	-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
	{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
	{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
	{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
	
	{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"}, 
	{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels 
	{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
	{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
	{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
	{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
	{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
	
	-- preaspirated consonants
	{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
	{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"},
	{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
	
	{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
	{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
	{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"},  
	
	-- other stem- and/or word-initial configurations
	{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
	{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}

local rules2 = {
	-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
	{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
	{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
	
	-- internal consonant clusters
	{"[ƀp][ƀp]", "ʔp"},
	{"[đt][đt]", "ʔt"},
	{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
	{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
	{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
	{"b([sɕ])", "p%1"},
	{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
	{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
	{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
	{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
	{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
	{"[ꝺd]x", "ðx"},
	{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
	{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"}, 
	{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
	{"mʔk", "mkː"},
	{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
	{"(ⱡ[mnꬶk])", "%1ː"},
	{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
	{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},

	-- closed vowels
	{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"ɑ$", "a"},
}

local final = {
	{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
	{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ

	--undo ligatures
	{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
	{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
	{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
	 
	{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
	-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
	{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}

function export.crux(term, a, e, w)
	term=mw.ustring.lower(term)
	term=export.morphemes(term)
	
	for _, rule in ipairs(rules1) do
		term = gsub(term, rule[1], rule[2])
	end

	if w then
		term = gsub(term, "ꬶl", "ʔⱡ")
		for _, anap in ipairs(anaptyctic) do
			term = gsub(term, anap[1], anap[2])
		end
	elseif e then
		term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
		term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
		term = gsub(term, "ƀr", "pʼqʼ")
		term = gsub(term, "đr", "tʼqʼ")
		term = gsub(term, "ʦx", "ʦʼqʼ")
		term = gsub(term, "ꬶl", "klː")
	else
		term = gsub(term, "ꬶl", "klː")
	end
	
	for _, rrule in ipairs(rules2) do
		term = gsub(term, rrule[1], rrule[2])
	end
	
	if w then
		term = gsub(term, "ü", "ɤ")
	else
		term = gsub(term, "ü", "œ")
	end
	
	for _, f in ipairs(final) do
		term = gsub(term, f[1], f[2])
	end
	
	return term
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, a, e, w)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word, a, e, w))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
		["a"] = {type = 'boolean', default = true},
		["e"] = {type = 'boolean', default = true},
		["w"] = {type = 'boolean', default = true},
		["dia"] = {},
		["rs"] = {type = 'boolean'},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	local categories = {}
	
	local ipa = "* "
	
	if detect_dialect(term) == "w" then
		args.e = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Western Siwa terms")
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	elseif detect_dialect(term) == "e" then
		args.w = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Eastern Siwa terms")
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	elseif args.a then
		ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
	end
	
	if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
		if args.a  then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	end
	
	if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
		if args.a or args.e then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
		if find(ipa, "ɤ") then
			ipa = ipa .. "\n** "
			ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
		end
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	end
	
	if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
	
return ipa .. ncategories(categories)
end

return export