Module:siwa-pron: Difference between revisions
No edit summary |
No edit summary |
||
| (275 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
local m_sm = mw.loadData("Module:siwa-pron/data") | local m_sm = mw.loadData("Module:siwa-pron/data") | ||
| Line 11: | Line 7: | ||
local u = mw.ustring.char | local u = mw.ustring.char | ||
local split = mw.text.split | local split = mw.text.split | ||
local gsplit = mw.text.gsplit | |||
local export = {} | local export = {} | ||
local | local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚ | ||
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later | --obsolete ligatures and L with stroke used to remove two-character hassle. will replace later | ||
local consonant = "[ | local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?" | ||
local front_vowel = " | local front_vowel = "iɪyeøɛœæ" | ||
local back_vowel = " | local back_vowel = "uɔõɑʊɤɯ" | ||
local vowel = "[" .. front_vowel .. back_vowel .. " | local vowel = "[" .. front_vowel .. back_vowel .. "a]" | ||
local | local unrelaxed = { | ||
["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o", ["ụ"] = "u", ["ỵ"] = "y", | |||
} | |||
function spat(c) | |||
return "(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])" | |||
end | |||
local function ncategories(categories) | local function ncategories(categories) | ||
| Line 40: | Line 38: | ||
local function open_to_closed(v) | local function open_to_closed(v) | ||
local otc = {} | local otc = {} | ||
local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", | local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ", | ||
["ø"] = "ü", | ["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ", | ||
["ø"] = "ü", -- dialectal variation, will be changed later | |||
["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ", | |||
["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",} | |||
for vc in gmatch(v, ".") do | for vc in gmatch(v, ".") do | ||
vc = gsub(vc, vc, switch[vc]) | vc = gsub(vc, vc, switch[vc]) | ||
| Line 66: | Line 67: | ||
end | end | ||
return table.concat(pss,"·") | local _, n = gsub(table.concat(pss,"·"), "ˈ", "") | ||
return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·") | |||
end | end | ||
local | local function detect_dialect(term) | ||
if find(term, "[ṁṅłƛ]") then | |||
return "w" | |||
elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then | |||
return "e" | |||
end | |||
end | |||
local anaptyctic = { | |||
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"}, | |||
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"}, | |||
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"}, | |||
{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"}, | |||
{"([^ˈ])đv", "%1ːdɨwː"}, | |||
{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"}, | |||
{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"}, | |||
{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"}, | |||
{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"}, | |||
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"}, | |||
} | |||
local rules1 = { | |||
{".", { | {".", { | ||
["ḍ"] = "ð", ["ṡ"] = "ɕ", | ["ḍ"] = "ð", ["ṡ"] = "ɕ", | ||
["ḥ"] = "ʔ", ["į"] = "j", | ["ḥ"] = "ʔ", ["į"] = "j", | ||
["ġ"] = "x", ["g"] = "ɡ", -- IPA g | ["ġ"] = "x", ["g"] = "ɡ", -- IPA g | ||
["ų"] = "w", ["ł"] = "ɬ", | |||
}}, | }}, | ||
{" | {"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"}, {"ts", "ʦ"}, {"o̊", "ɯ"}, | ||
{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end}, | |||
{" | |||
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne | -- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne | ||
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"}, | {"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"}, | ||
-- long consonants | |||
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"}, | |||
{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"}, | |||
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"}, | |||
{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"}, | |||
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"}, | |||
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts | -- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts | ||
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, {" | {"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters | ||
{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"}, | |||
{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs | |||
{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"}, | |||
{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels | |||
{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated | |||
{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters | |||
{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not | |||
{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z] | |||
{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels | |||
-- preaspirated consonants | |||
{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"}, | |||
{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"}, | |||
{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"}, | |||
{" | {"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"}, | ||
{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"}, | |||
{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"}, | |||
{" | |||
{" | |||
-- other stem- and/or word-initial configurations | -- other stem- and/or word-initial configurations | ||
{"đi", " | {"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"}, | ||
{" | {"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"} | ||
} | } | ||
local | local rules2 = { | ||
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs) | -- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs) | ||
{ | {spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"}, | ||
{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"}, | |||
-- internal consonant clusters | -- internal consonant clusters | ||
{"mn", "mnː"}, {"mʔk", "mkː"}, | {"[ƀp][ƀp]", "ʔp"}, | ||
{" | {"[đt][đt]", "ʔt"}, | ||
{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"}, | |||
{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"}, | |||
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"}, | |||
{"b([sɕ])", "p%1"}, | {"b([sɕ])", "p%1"}, | ||
{" | {"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"}, | ||
{" | {"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"}, | ||
{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"}, | |||
{"ʦxv", "ʦxw"}, {" | {"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"}, | ||
{" | {"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"}, | ||
{" | {"[ꝺd]x", "ðx"}, | ||
{"( | {"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"}, | ||
{"([lr])ʔ([ptk])", "%1%2ː"}, | {"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"}, | ||
{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"}, | |||
{"mʔk", "mkː"}, | |||
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"}, | {"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"}, | ||
{"( | {"(ⱡ[mnꬶk])", "%1ː"}, | ||
{"( | {"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"}, | ||
{" | {"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"}, | ||
-- closed vowels | -- closed vowels | ||
{"(" .. vowel .. "*)(" .. consonant .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end}, | {"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end}, | ||
{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end}, | {"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end}, | ||
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end}, | |||
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end}, | |||
{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end}, | |||
{"ɑ$", "a"}, | {"ɑ$", "a"}, | ||
{"(" .. vowel .. ") | } | ||
local final = { | |||
{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"}, | |||
{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ | |||
--undo ligatures | --undo ligatures | ||
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {" | {"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"}, | ||
{"ƀ", " | {"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"}, | ||
{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"}, | |||
-- remove morpheme separator | |||
{" | {"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"}, | ||
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers | |||
{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"}, | |||
} | } | ||
function export.crux(term, e, w) | function export.crux(term, a, e, w) | ||
term=mw.ustring.lower(term) | term=mw.ustring.lower(term) | ||
term=export.morphemes(term) | term=export.morphemes(term) | ||
for _, rule in ipairs( | for _, rule in ipairs(rules1) do | ||
term = gsub(term, rule[1], rule[2]) | term = gsub(term, rule[1], rule[2]) | ||
end | end | ||
if w then | if w then | ||
term = gsub(term, "ꬶl", " | term = gsub(term, "ꬶl", "ʔⱡ") | ||
for _, anap in ipairs(anaptyctic) do | |||
term = gsub(term, anap[1], anap[2]) | |||
end | |||
elseif e then | elseif e then | ||
term = gsub(term, "( | term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ") | ||
term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ") | |||
term = gsub(term, "ƀr", "pʼqʼ") | |||
term = gsub(term, "đr", "tʼqʼ") | |||
term = gsub(term, "ʦx", "ʦʼqʼ") | term = gsub(term, "ʦx", "ʦʼqʼ") | ||
term = gsub(term, "ꬶl", "klː") | term = gsub(term, "ꬶl", "klː") | ||
| Line 161: | Line 222: | ||
end | end | ||
for _, | for _, rrule in ipairs(rules2) do | ||
term = gsub(term, | term = gsub(term, rrule[1], rrule[2]) | ||
end | end | ||
| Line 170: | Line 231: | ||
term = gsub(term, "ü", "œ") | term = gsub(term, "ü", "œ") | ||
end | end | ||
for _, f in ipairs(final) do | |||
term = gsub(term, f[1], f[2]) | |||
end | |||
return term | return term | ||
| Line 193: | Line 257: | ||
table.insert(full_pronunciations, format_IPA(IPA_args)) | table.insert(full_pronunciations, format_IPA(IPA_args)) | ||
return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ') | return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ') | ||
end | |||
function separate_word(term, a, e, w) | |||
local result = {} | |||
for word in gsplit(term, " ") do | |||
table.insert(result, export.crux(word, a, e, w)) | |||
end | |||
return table.concat(result, " ") | |||
end | end | ||
| Line 201: | Line 275: | ||
["e"] = {type = 'boolean', default = true}, | ["e"] = {type = 'boolean', default = true}, | ||
["w"] = {type = 'boolean', default = true}, | ["w"] = {type = 'boolean', default = true}, | ||
["dia"] = {}, | |||
["rs"] = {type = 'boolean'}, | |||
} | } | ||
| Line 209: | Line 285: | ||
local ipa = "* " | local ipa = "* " | ||
if args.a then | if detect_dialect(term) == "w" then | ||
ipa = ipa .. line_format( | args.e = false; args.a = false | ||
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'}) | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | |||
table.insert(categories, "Western Siwa terms") | |||
table.insert(categories, "Siwa terms with Western IPA pronunciation") | |||
end | |||
elseif detect_dialect(term) == "e" then | |||
args.w = false; args.a = false | |||
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'}) | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | |||
table.insert(categories, "Eastern Siwa terms") | |||
table.insert(categories, "Siwa terms with Eastern IPA pronunciation") | |||
end | |||
elseif args.a then | |||
ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'}) | |||
end | end | ||
if args.e | if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then | ||
if args.a then | |||
ipa = ipa .. "\n* " | |||
end | |||
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'}) | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | |||
table.insert(categories, "Siwa | table.insert(categories, "Siwa terms with Eastern IPA pronunciation") | ||
end | end | ||
end | end | ||
if args.w | if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then | ||
if args.a or args.e then | |||
ipa = ipa .. "\n* " | |||
end | |||
ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'}) | |||
ipa = ipa .. line_format( | if find(ipa, "ɤ") then | ||
table.insert(categories, "Siwa | ipa = ipa .. "\n** " | ||
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'}) | |||
end | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | |||
table.insert(categories, "Siwa terms with Western IPA pronunciation") | |||
end | end | ||
end | end | ||
if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end | |||
return ipa .. ncategories(categories) | return ipa .. ncategories(categories) | ||
Latest revision as of 13:31, 10 August 2022
spat
function spat(c)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
export.morphemes
function export.morphemes(word)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
export.crux
function export.crux(term, a, e, w)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
format_IPA
function format_IPA(items)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
IPA_span
function IPA_span(items)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
line_format
function line_format(pronunciation, dialect)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
separate_word
function separate_word(term, a, e, w)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
export.show
function export.show(frame)
This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.
local m_sm = mw.loadData("Module:siwa-pron/data")
local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit
local export = {}
local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"
local unrelaxed = {
["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o", ["ụ"] = "u", ["ỵ"] = "y",
}
function spat(c)
return "(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end
local function ncategories(categories)
local out_categories = {}
for key, cat in ipairs(categories) do
out_categories[key] = "[[Category:" .. cat .. "]]"
end
return table.concat(out_categories, "")
end
local function open_to_closed(v)
local otc = {}
local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
["ø"] = "ü", -- dialectal variation, will be changed later
["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
for vc in gmatch(v, ".") do
vc = gsub(vc, vc, switch[vc])
table.insert(otc, vc)
end
return table.concat(otc)
end
function export.morphemes(word)
local pss = {}
if gmatch(word,"·") then
pss = split(word,"·")
end
for i, m in ipairs(pss) do
if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
elseif m_sm.prefix[m] then
pss[i] = "ˌ" .. pss[i]
else
pss[i] = "ˈ" .. pss[i]
end
end
local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end
local function detect_dialect(term)
if find(term, "[ṁṅłƛ]") then
return "w"
elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then
return "e"
end
end
local anaptyctic = {
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"},
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
{"([^ˈ])đv", "%1ːdɨwː"},
{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}
local rules1 = {
{".", {
["ḍ"] = "ð", ["ṡ"] = "ɕ",
["ḥ"] = "ʔ", ["į"] = "j",
["ġ"] = "x", ["g"] = "ɡ", -- IPA g
["ų"] = "w", ["ł"] = "ɬ",
}},
{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"}, {"ts", "ʦ"}, {"o̊", "ɯ"},
{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
-- long consonants
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"},
{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels
{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
-- preaspirated consonants
{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"},
{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"},
-- other stem- and/or word-initial configurations
{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}
local rules2 = {
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
-- internal consonant clusters
{"[ƀp][ƀp]", "ʔp"},
{"[đt][đt]", "ʔt"},
{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"b([sɕ])", "p%1"},
{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
{"[ꝺd]x", "ðx"},
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"},
{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
{"mʔk", "mkː"},
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
{"(ⱡ[mnꬶk])", "%1ː"},
{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},
-- closed vowels
{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"ɑ$", "a"},
}
local final = {
{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ
--undo ligatures
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}
function export.crux(term, a, e, w)
term=mw.ustring.lower(term)
term=export.morphemes(term)
for _, rule in ipairs(rules1) do
term = gsub(term, rule[1], rule[2])
end
if w then
term = gsub(term, "ꬶl", "ʔⱡ")
for _, anap in ipairs(anaptyctic) do
term = gsub(term, anap[1], anap[2])
end
elseif e then
term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
term = gsub(term, "ƀr", "pʼqʼ")
term = gsub(term, "đr", "tʼqʼ")
term = gsub(term, "ʦx", "ʦʼqʼ")
term = gsub(term, "ꬶl", "klː")
else
term = gsub(term, "ꬶl", "klː")
end
for _, rrule in ipairs(rules2) do
term = gsub(term, rrule[1], rrule[2])
end
if w then
term = gsub(term, "ü", "ɤ")
else
term = gsub(term, "ü", "œ")
end
for _, f in ipairs(final) do
term = gsub(term, f[1], f[2])
end
return term
end
function format_IPA(items)
return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>: " .. IPA_span(items)
end
function IPA_span(items)
local bits = {}
for _, item in ipairs(items) do
local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
table.insert(bits, bit)
end
return table.concat(bits)
end
function line_format(pronunciation, dialect)
local full_pronunciations = {}
local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
table.insert(full_pronunciations, format_IPA(IPA_args))
return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end
function separate_word(term, a, e, w)
local result = {}
for word in gsplit(term, " ") do
table.insert(result, export.crux(word, a, e, w))
end
return table.concat(result, " ")
end
function export.show(frame)
local params = {
[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
["a"] = {type = 'boolean', default = true},
["e"] = {type = 'boolean', default = true},
["w"] = {type = 'boolean', default = true},
["dia"] = {},
["rs"] = {type = 'boolean'},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1]
local categories = {}
local ipa = "* "
if detect_dialect(term) == "w" then
args.e = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Western Siwa terms")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
elseif detect_dialect(term) == "e" then
args.w = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Eastern Siwa terms")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
elseif args.a then
ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
end
if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
if args.a then
ipa = ipa .. "\n* "
end
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
if args.a or args.e then
ipa = ipa .. "\n* "
end
ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
if find(ipa, "ɤ") then
ipa = ipa .. "\n** "
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
end
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
return ipa .. ncategories(categories)
end
return export