<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Agrc-utilities</id>
	<title>Module:grc-utilities - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Agrc-utilities"/>
	<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:grc-utilities&amp;action=history"/>
	<updated>2026-04-06T06:04:02Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.6</generator>
	<entry>
		<id>https://linguifex.com/w/index.php?title=Module:grc-utilities&amp;diff=240225&amp;oldid=prev</id>
		<title>Sware: Created page with &quot;local export = {}  local m_script_utils = require(&quot;Module:script utilities&quot;) local m_links = require(&quot;Module:links&quot;) local lang = require(&quot;Module:languages&quot;).getByCode(&quot;grc&quot;)...&quot;</title>
		<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:grc-utilities&amp;diff=240225&amp;oldid=prev"/>
		<updated>2021-10-15T19:38:38Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;local export = {}  local m_script_utils = require(&amp;quot;Module:script utilities&amp;quot;) local m_links = require(&amp;quot;Module:links&amp;quot;) local lang = require(&amp;quot;Module:languages&amp;quot;).getByCode(&amp;quot;grc&amp;quot;)...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;local export = {}&lt;br /&gt;
&lt;br /&gt;
local m_script_utils = require(&amp;quot;Module:script utilities&amp;quot;)&lt;br /&gt;
local m_links = require(&amp;quot;Module:links&amp;quot;)&lt;br /&gt;
local lang = require(&amp;quot;Module:languages&amp;quot;).getByCode(&amp;quot;grc&amp;quot;)&lt;br /&gt;
local sc = require(&amp;quot;Module:scripts&amp;quot;).getByCode(&amp;quot;polytonic&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
local m_data = mw.loadData(&amp;quot;Module:grc-utilities/data&amp;quot;)&lt;br /&gt;
local groups = m_data.groups&lt;br /&gt;
local diacritic_order = m_data.diacritic_order&lt;br /&gt;
local conversions = m_data.conversions&lt;br /&gt;
local diacritics = m_data.diacritics&lt;br /&gt;
local diacritic = m_data.diacritic&lt;br /&gt;
local macron = diacritics.macron&lt;br /&gt;
local breve = diacritics.breve&lt;br /&gt;
local spacing_macron = diacritics.spacing_macron&lt;br /&gt;
local spacing_breve = diacritics.spacing_breve&lt;br /&gt;
local rough = diacritics.rough&lt;br /&gt;
local smooth = diacritics.smooth&lt;br /&gt;
local diaeresis = diacritics.diaeresis&lt;br /&gt;
local acute = diacritics.acute&lt;br /&gt;
local grave = diacritics.grave&lt;br /&gt;
local circumflex = diacritics.circum&lt;br /&gt;
local subscript = diacritics.subscript&lt;br /&gt;
local combining_diacritic = m_data.combining_diacritic&lt;br /&gt;
&lt;br /&gt;
local UTF8_char = &amp;quot;[\1-\127\194-\244][\128-\191]*&amp;quot;&lt;br /&gt;
local basic_Greek = &amp;quot;[\206-\207][\128-\191]&amp;quot; -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ&lt;br /&gt;
&lt;br /&gt;
local find = mw.ustring.find&lt;br /&gt;
local match = mw.ustring.match&lt;br /&gt;
local gmatch = mw.ustring.gmatch&lt;br /&gt;
local sub = mw.ustring.sub&lt;br /&gt;
local gsub = mw.ustring.gsub&lt;br /&gt;
local toNFC = mw.ustring.toNFC&lt;br /&gt;
local decompose = mw.ustring.toNFD&lt;br /&gt;
&lt;br /&gt;
local info = {}&lt;br /&gt;
-- The tables are shared among different characters so that they can be checked&lt;br /&gt;
-- for equality if needed, and to use less space.&lt;br /&gt;
local vowel_t = { vowel = true }&lt;br /&gt;
local iota_t = { vowel = true, offglide = true }&lt;br /&gt;
local upsilon_t = { vowel = true, offglide = true }&lt;br /&gt;
-- These don&amp;#039;t need any contents.&lt;br /&gt;
local rho_t = {}&lt;br /&gt;
-- local consonant_t = {}&lt;br /&gt;
local diacritic_t = { diacritic = true }&lt;br /&gt;
-- Needed for equality comparisons.&lt;br /&gt;
local breathing_t = { diacritic = true }&lt;br /&gt;
&lt;br /&gt;
local function add_info(characters, t)&lt;br /&gt;
	if type(characters) == &amp;quot;string&amp;quot; then&lt;br /&gt;
		for character in string.gmatch(characters, UTF8_char) do&lt;br /&gt;
			info[character] = t&lt;br /&gt;
		end&lt;br /&gt;
	else&lt;br /&gt;
		for i, character in ipairs(characters) do&lt;br /&gt;
			info[character] = t&lt;br /&gt;
		end&lt;br /&gt;
	end&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
add_info({ macron, breve,&lt;br /&gt;
		diaeresis,&lt;br /&gt;
		acute, grave, circumflex,&lt;br /&gt;
		subscript,&lt;br /&gt;
	}, diacritic_t)&lt;br /&gt;
&lt;br /&gt;
add_info({rough, smooth}, breathing_t)&lt;br /&gt;
add_info(&amp;quot;ΑΕΗΟΩαεηοω&amp;quot;, vowel_t)&lt;br /&gt;
add_info(&amp;quot;Ιι&amp;quot;, iota_t)&lt;br /&gt;
add_info(&amp;quot;Υυ&amp;quot;, upsilon_t)&lt;br /&gt;
-- add_info(&amp;quot;ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ&amp;quot;, consonant_t)&lt;br /&gt;
add_info(&amp;quot;Ρρ&amp;quot;, rho_t)&lt;br /&gt;
&lt;br /&gt;
local not_recognized = {}&lt;br /&gt;
setmetatable(info, { __index =&lt;br /&gt;
	function(t, key)&lt;br /&gt;
		return not_recognized&lt;br /&gt;
	end&lt;br /&gt;
})&lt;br /&gt;
&lt;br /&gt;
local sparseConcat = require(&amp;quot;Module:table&amp;quot;).sparseConcat&lt;br /&gt;
&lt;br /&gt;
local checkType = require &amp;quot;libraryUtil&amp;quot;.checkType&lt;br /&gt;
&lt;br /&gt;
local function _check(funcName)&lt;br /&gt;
	return function(argIndex, arg, expectType, nilOk)&lt;br /&gt;
		return checkType(funcName, argIndex, arg, expectType, nilOk)&lt;br /&gt;
	end&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- Perform a function on each Unicode character in a string.&lt;br /&gt;
local function forEach(str, func)&lt;br /&gt;
	for char in string.gmatch(str, UTF8_char) do&lt;br /&gt;
		func(char)&lt;br /&gt;
	end&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- This concatenates or inserts a character, then removes it from the text.&lt;br /&gt;
local function add(list, index, chars, text)&lt;br /&gt;
	if not chars then&lt;br /&gt;
		error(&amp;quot;The function add cannot act on a nil character.&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	if list[index] then&lt;br /&gt;
		list[index] = list[index] .. chars&lt;br /&gt;
	else&lt;br /&gt;
		list[index] = chars&lt;br /&gt;
	end&lt;br /&gt;
	-- Basic string function works here.&lt;br /&gt;
	return text:sub(#chars + 1)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.tag(term, face)&lt;br /&gt;
	return m_script_utils.tag_text(term, lang, sc, face)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.link(term, face, alt, tr)&lt;br /&gt;
	return m_links.full_link( { term = term, alt = alt, lang = lang, sc = sc, tr = tr }, face)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local function linkNoTag(term, alt)&lt;br /&gt;
	return m_links.language_link{ term = term, lang = lang, alt = alt }&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek.&lt;br /&gt;
function export.standardDiacritics(text)&lt;br /&gt;
	text = decompose(text)&lt;br /&gt;
	&lt;br /&gt;
	text = text:gsub(UTF8_char, conversions)&lt;br /&gt;
	&lt;br /&gt;
	return text&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
--[=[	This function arranges diacritics in the following order:&lt;br /&gt;
			1. macron or breve&lt;br /&gt;
			2. breathings or diaeresis&lt;br /&gt;
			3. acute, circumflex, or grave&lt;br /&gt;
			4. iota subscript&lt;br /&gt;
		Used by [[Module:typing-aids]].&lt;br /&gt;
		&lt;br /&gt;
		Returns an error if a sequence of diacritics contains more than one&lt;br /&gt;
		of each category.&lt;br /&gt;
]=]&lt;br /&gt;
local function reorderDiacriticSequence(diacritics)&lt;br /&gt;
	local output = {}&lt;br /&gt;
	forEach(diacritics,&lt;br /&gt;
		function (diacritic)&lt;br /&gt;
			local index = diacritic_order[diacritic]&lt;br /&gt;
			if not output[index] then&lt;br /&gt;
				output[index] = diacritic&lt;br /&gt;
			else&lt;br /&gt;
				-- Place breve after macron.&lt;br /&gt;
				if diacritic == breve then&lt;br /&gt;
					index = index + 1&lt;br /&gt;
				end&lt;br /&gt;
				-- The following might have odd results when there&lt;br /&gt;
				-- are three or more diacritics.&lt;br /&gt;
				table.insert(output, index, diacritic)&lt;br /&gt;
				--[[&lt;br /&gt;
				local m_templates = require(&amp;quot;Module:grc-utilities/templates&amp;quot;)&lt;br /&gt;
				error(&amp;quot;There are two diacritics, &amp;quot; ..&lt;br /&gt;
						m_templates.addDottedCircle(output[index]) .. &amp;quot; and &amp;quot; ..&lt;br /&gt;
						m_templates.addDottedCircle(diacritic) ..&lt;br /&gt;
						&amp;quot; that belong in the same position. There should be only one.&amp;quot;&lt;br /&gt;
				)&lt;br /&gt;
				--]]&lt;br /&gt;
			end&lt;br /&gt;
		end)&lt;br /&gt;
	return sparseConcat(output)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.reorderDiacritics(text)&lt;br /&gt;
	local d = diacritics&lt;br /&gt;
	&lt;br /&gt;
	return (gsub(decompose(text),&lt;br /&gt;
		combining_diacritic .. combining_diacritic .. &amp;quot;+&amp;quot;,&lt;br /&gt;
		reorderDiacriticSequence))&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
--[=[&lt;br /&gt;
		This breaks a word into meaningful &amp;quot;tokens&amp;quot;, which are&lt;br /&gt;
		individual letters or diphthongs with their diacritics.&lt;br /&gt;
		Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].&lt;br /&gt;
--]=]&lt;br /&gt;
local function make_tokens(text)&lt;br /&gt;
	local tokens, prev_info = {}, {}&lt;br /&gt;
	local token_i, vowel_count = 1, 0 -- Vowel count tracks .&lt;br /&gt;
	local prev&lt;br /&gt;
	for character in string.gmatch(decompose(text), UTF8_char) do&lt;br /&gt;
		local curr_info = info[character]&lt;br /&gt;
		-- Split vowels between tokens if not a diphthong.&lt;br /&gt;
		if curr_info.vowel then&lt;br /&gt;
			vowel_count = vowel_count + 1&lt;br /&gt;
			if prev and (not (vowel_count == 2 and curr_info.offglide and prev_info.vowel)&lt;br /&gt;
					-- υυ → υ, υ&lt;br /&gt;
					-- ιυ → ι, υ&lt;br /&gt;
					or prev_info.offglide and curr_info == upsilon_t or curr_info == prev_info) then&lt;br /&gt;
				token_i = token_i + 1&lt;br /&gt;
				if prev_info.vowel then&lt;br /&gt;
					vowel_count = 1&lt;br /&gt;
				end&lt;br /&gt;
			elseif vowel_count == 2 then&lt;br /&gt;
				vowel_count = 0&lt;br /&gt;
			end&lt;br /&gt;
			tokens[token_i] = (tokens[token_i] or &amp;quot;&amp;quot;) .. character&lt;br /&gt;
		elseif curr_info.diacritic then&lt;br /&gt;
			vowel_count = 0&lt;br /&gt;
			tokens[token_i] = (tokens[token_i] or &amp;quot;&amp;quot;) .. character&lt;br /&gt;
			if prev_info.diacritic or prev_info.vowel then&lt;br /&gt;
				if character == diaeresis then&lt;br /&gt;
					-- Split the diphthong in the current token if a diaeresis was found:&lt;br /&gt;
					-- the first letter, then the second letter plus any diacritics.&lt;br /&gt;
					local previous_vowel, vowel_with_diaeresis =&lt;br /&gt;
						string.match(tokens[token_i],&lt;br /&gt;
							&amp;quot;^(&amp;quot; .. basic_Greek .. &amp;quot;)(&amp;quot; .. basic_Greek .. &amp;quot;.+)&amp;quot;)&lt;br /&gt;
					if previous_vowel then&lt;br /&gt;
						tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis&lt;br /&gt;
						token_i = token_i + 1&lt;br /&gt;
					else&lt;br /&gt;
						-- The vowel preceding the vowel with the diaeresis will already be&lt;br /&gt;
						-- placed in the previous token if it has a diacritic:&lt;br /&gt;
						-- Περικλῆῐ̈ → Π ε ρ ι κ λ ῆ ῐ̈&lt;br /&gt;
						--[[&lt;br /&gt;
						mw.log(&amp;#039;Diaeresis was found in &amp;#039; .. text .. &amp;#039;, but the previous token &amp;#039; ..&lt;br /&gt;
							require(&amp;quot;Module:Unicode data&amp;quot;).add_dotted_circle(tokens[token_i]) ..&lt;br /&gt;
							&amp;#039; couldn’t be split because it does not consist of two Basic Greek characters followed by other characters.&amp;#039;)&lt;br /&gt;
						--]]&lt;br /&gt;
					end&lt;br /&gt;
				end&lt;br /&gt;
			elseif prev_info == rho_t then&lt;br /&gt;
				if curr_info ~= breathing_t then&lt;br /&gt;
					mw.log(string.format(&amp;quot;The character %s in %s should not have the accent %s on it.&amp;quot;,&lt;br /&gt;
						prev, text, require(&amp;quot;Module:grc-utilities/templates&amp;quot;).addDottedCircle(character)))&lt;br /&gt;
				end&lt;br /&gt;
			else&lt;br /&gt;
				mw.log(&amp;quot;The character &amp;quot; .. prev .. &amp;quot; cannot have a diacritic on it.&amp;quot;)&lt;br /&gt;
			end&lt;br /&gt;
		else&lt;br /&gt;
			vowel_count = 0&lt;br /&gt;
			if prev then&lt;br /&gt;
				token_i = token_i + 1&lt;br /&gt;
			end&lt;br /&gt;
			tokens[token_i] = (tokens[token_i] or &amp;quot;&amp;quot;) .. character&lt;br /&gt;
		end&lt;br /&gt;
		prev = character&lt;br /&gt;
		prev_info = curr_info&lt;br /&gt;
	end&lt;br /&gt;
	return tokens&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local cache = {}&lt;br /&gt;
function export.tokenize(text)&lt;br /&gt;
	local decomposed = decompose(text)&lt;br /&gt;
	if not cache[decomposed] then&lt;br /&gt;
		cache[decomposed] = make_tokens(text)&lt;br /&gt;
	end&lt;br /&gt;
	return cache[decomposed]&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
--[=[	Places diacritics in the following order:&lt;br /&gt;
			1. breathings or diaeresis&lt;br /&gt;
			2. acute, circumflex, or grave&lt;br /&gt;
			3. macron or breve&lt;br /&gt;
			4. iota subscript&lt;br /&gt;
		Used by [[Module:grc-pronunciation]].		]=]&lt;br /&gt;
function export.pronunciationOrder(text)&lt;br /&gt;
	text = export.standardDiacritics(text)&lt;br /&gt;
	&lt;br /&gt;
	if find(text, groups[1]) then&lt;br /&gt;
	&lt;br /&gt;
		text = gsub(text,&lt;br /&gt;
			diacritic .. diacritic .. &amp;quot;+&amp;quot;,&lt;br /&gt;
			function(sequence)&lt;br /&gt;
				-- Put breathing and diaeresis first, then accents, then macron or breve&lt;br /&gt;
				return table.concat{&lt;br /&gt;
					match(sequence, groups[2]) or &amp;quot;&amp;quot;,&lt;br /&gt;
					match(sequence, groups[3]) or &amp;quot;&amp;quot;,&lt;br /&gt;
					match(sequence, groups[1]) or &amp;quot;&amp;quot;,&lt;br /&gt;
					match(sequence, groups[4]) or &amp;quot;&amp;quot;&lt;br /&gt;
				}&lt;br /&gt;
			end)&lt;br /&gt;
		&lt;br /&gt;
		text = gsub(text, macron, spacing_macron) -- combining to spacing macron&lt;br /&gt;
		text = gsub(text, breve, spacing_breve) -- combining to spacing breve&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	return toNFC(text)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
-- Returns a table of any ambiguous vowels in the text, language-tagged.&lt;br /&gt;
function export.findAmbig(text, noTag)&lt;br /&gt;
	if (not text) or type(text) ~= &amp;quot;string&amp;quot; then&lt;br /&gt;
		error(&amp;quot;The input to function findAmbig is nonexistent or not a string&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	local lengthDiacritic = &amp;quot;[&amp;quot; .. macron .. breve .. circumflex .. subscript .. &amp;quot;]&amp;quot;&lt;br /&gt;
	local aiu_diacritic = &amp;quot;^([&amp;quot; .. &amp;quot;αιυ&amp;quot; .. &amp;quot;])(&amp;quot; .. diacritic .. &amp;quot;*)$&amp;quot;&lt;br /&gt;
	&lt;br /&gt;
	-- breaks the word into units&lt;br /&gt;
	local output, vowels = {}, {}&lt;br /&gt;
	for _, token in ipairs(export.tokenize(text)) do&lt;br /&gt;
		if not find(token, m_data.consonant) then&lt;br /&gt;
			local vowel, diacritics = match(&lt;br /&gt;
				token,&lt;br /&gt;
				aiu_diacritic&lt;br /&gt;
			)&lt;br /&gt;
			&lt;br /&gt;
			if vowel and (diacritics == &amp;quot;&amp;quot; or&lt;br /&gt;
					not find(diacritics, lengthDiacritic)) then&lt;br /&gt;
				local diacriticked_vowel&lt;br /&gt;
				if not noTag then&lt;br /&gt;
					diacriticked_vowel = export.tag(vowel .. diacritics)&lt;br /&gt;
				else&lt;br /&gt;
					diacriticked_vowel = vowel&lt;br /&gt;
				end&lt;br /&gt;
				&lt;br /&gt;
				table.insert(output, diacriticked_vowel)&lt;br /&gt;
				&lt;br /&gt;
				-- Lists the vowel letters that are ambiguous, for categorization purposes.&lt;br /&gt;
				vowels[mw.ustring.lower(vowel)] = true&lt;br /&gt;
			end&lt;br /&gt;
		end&lt;br /&gt;
	end&lt;br /&gt;
		&lt;br /&gt;
	return output, vowels&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
return export&lt;/div&gt;</summary>
		<author><name>Sware</name></author>
	</entry>
</feed>