<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Alanguages%2FdoSubstitutions</id>
	<title>Module:languages/doSubstitutions - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Alanguages%2FdoSubstitutions"/>
	<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:languages/doSubstitutions&amp;action=history"/>
	<updated>2026-04-07T11:16:24Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.6</generator>
	<entry>
		<id>https://linguifex.com/w/index.php?title=Module:languages/doSubstitutions&amp;diff=375031&amp;oldid=prev</id>
		<title>Sware: Created page with &quot;local m_str_utils = require(&quot;Module:string utilities&quot;)  local codepoint = m_str_utils.codepoint local gsub = m_str_utils.gsub local safe_require = require(&quot;Module:utilities&quot;).safe_require local u = m_str_utils.char  local function doRemoveExceptions(text, sc, remove_exceptions) 	local substitutes, i = {}, 0 	 	local function insert_substitute(m) 		i = i + 1 		table.insert(substitutes, m) 		return u(0x80000 + i) 	end 	 	for _, exception in ipairs(remove_exceptions) do 		e...&quot;</title>
		<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:languages/doSubstitutions&amp;diff=375031&amp;oldid=prev"/>
		<updated>2024-08-07T11:01:06Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;local m_str_utils = require(&amp;quot;Module:string utilities&amp;quot;)  local codepoint = m_str_utils.codepoint local gsub = m_str_utils.gsub local safe_require = require(&amp;quot;Module:utilities&amp;quot;).safe_require local u = m_str_utils.char  local function doRemoveExceptions(text, sc, remove_exceptions) 	local substitutes, i = {}, 0 	 	local function insert_substitute(m) 		i = i + 1 		table.insert(substitutes, m) 		return u(0x80000 + i) 	end 	 	for _, exception in ipairs(remove_exceptions) do 		e...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;local m_str_utils = require(&amp;quot;Module:string utilities&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
local codepoint = m_str_utils.codepoint&lt;br /&gt;
local gsub = m_str_utils.gsub&lt;br /&gt;
local safe_require = require(&amp;quot;Module:utilities&amp;quot;).safe_require&lt;br /&gt;
local u = m_str_utils.char&lt;br /&gt;
&lt;br /&gt;
local function doRemoveExceptions(text, sc, remove_exceptions)&lt;br /&gt;
	local substitutes, i = {}, 0&lt;br /&gt;
	&lt;br /&gt;
	local function insert_substitute(m)&lt;br /&gt;
		i = i + 1&lt;br /&gt;
		table.insert(substitutes, m)&lt;br /&gt;
		return u(0x80000 + i)&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	for _, exception in ipairs(remove_exceptions) do&lt;br /&gt;
		exception = sc:toFixedNFD(exception)&lt;br /&gt;
		text = gsub(text, exception, insert_substitute)&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	return text, substitutes&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local function undoRemoveExceptions(text, substitutes)&lt;br /&gt;
	return text:gsub(&amp;quot;\242[\128-\191]*&amp;quot;, function(m)&lt;br /&gt;
		return substitutes[codepoint(m) - 0x80000]&lt;br /&gt;
	end)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local function doSubstitutions(text, self, sc, substitution_data, function_name, recursed)&lt;br /&gt;
	local fail, cats = nil, {}&lt;br /&gt;
	-- If there are language-specific substitutes given in the data module, use those.&lt;br /&gt;
	if type(substitution_data) == &amp;quot;table&amp;quot; then&lt;br /&gt;
		-- If a script is specified, run this function with the script-specific data before continuing.&lt;br /&gt;
		local sc_code = sc:getCode()&lt;br /&gt;
		if substitution_data[sc_code] then&lt;br /&gt;
			text, fail, cats = doSubstitutions(text, self, sc, substitution_data[sc_code], function_name, true)&lt;br /&gt;
		-- Hant, Hans and Hani are usually treated the same, so add a special case to avoid having to specify each one separately.&lt;br /&gt;
		elseif sc_code:match(&amp;quot;^Han&amp;quot;) and substitution_data.Hani then&lt;br /&gt;
			text, fail, cats = doSubstitutions(text, self, sc, substitution_data.Hani, function_name, true)&lt;br /&gt;
		-- Substitution data with key 1 in the outer table may be given as a fallback.&lt;br /&gt;
		elseif substitution_data[1] then&lt;br /&gt;
			text, fail, cats = doSubstitutions(text, self, sc, substitution_data[1], function_name, true)&lt;br /&gt;
		end&lt;br /&gt;
		-- Iterate over all strings in the &amp;quot;from&amp;quot; subtable, and gsub with the corresponding string in &amp;quot;to&amp;quot;. We work with the NFD decomposed forms, as this simplifies many substitutions.&lt;br /&gt;
		if substitution_data.from then&lt;br /&gt;
			for i, from in ipairs(substitution_data.from) do&lt;br /&gt;
				-- We normalize each loop, to ensure multi-stage substitutions work correctly.&lt;br /&gt;
				text = sc:toFixedNFD(text)&lt;br /&gt;
				-- Check whether specific magic characters are present, as they rely on UTF-8 compatibility. If not, just use string.gsub. In most cases, doing this is faster than using mw.ustring.gsub every time.&lt;br /&gt;
				text = gsub(text, sc:toFixedNFD(from), substitution_data.to[i] or &amp;quot;&amp;quot;)&lt;br /&gt;
			end&lt;br /&gt;
		end&lt;br /&gt;
		&lt;br /&gt;
		if substitution_data.remove_diacritics then&lt;br /&gt;
			text = sc:toFixedNFD(text)&lt;br /&gt;
			-- Convert exceptions to PUA.&lt;br /&gt;
			local substitutes&lt;br /&gt;
			if substitution_data.remove_exceptions then&lt;br /&gt;
				text, substitutes = doRemoveExceptions(text, sc, substitution_data.remove_exceptions)&lt;br /&gt;
			end&lt;br /&gt;
			-- Strip diacritics.&lt;br /&gt;
			text = gsub(text, &amp;quot;[&amp;quot; .. substitution_data.remove_diacritics .. &amp;quot;]&amp;quot;, &amp;quot;&amp;quot;)&lt;br /&gt;
			-- Convert exceptions back.&lt;br /&gt;
			if substitution_data.remove_exceptions then&lt;br /&gt;
				text = undoRemoveExceptions(text, substitutes)&lt;br /&gt;
			end&lt;br /&gt;
		end&lt;br /&gt;
	elseif type(substitution_data) == &amp;quot;string&amp;quot; then&lt;br /&gt;
		-- If there is a dedicated function module, use that.&lt;br /&gt;
		local module = safe_require(&amp;quot;Module:&amp;quot; .. substitution_data)&lt;br /&gt;
		if module then&lt;br /&gt;
			if function_name == &amp;quot;tr&amp;quot; then&lt;br /&gt;
				text, fail, cats = module[function_name](text, self:getCode(), sc:getCode())&lt;br /&gt;
			else&lt;br /&gt;
				text, fail, cats = module[function_name](sc:toFixedNFD(text), self:getCode(), sc:getCode())&lt;br /&gt;
			end&lt;br /&gt;
		else&lt;br /&gt;
			error(&amp;quot;Substitution data &amp;#039;&amp;quot; .. substitution_data .. &amp;quot;&amp;#039; does not match an existing module.&amp;quot;)&lt;br /&gt;
		end&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	-- Don&amp;#039;t normalize to NFC if this is the inner loop or if a module returned nil.&lt;br /&gt;
	if recursed or not text then&lt;br /&gt;
		return text, fail, cats&lt;br /&gt;
	else&lt;br /&gt;
		-- Fix any discouraged sequences created during the substitution process, and normalize into the final form.&lt;br /&gt;
		text = sc:fixDiscouragedSequences(text)&lt;br /&gt;
		return sc:toFixedNFC(text), fail, cats&lt;br /&gt;
	end&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- This avoids calling into globals with require when the main function recurses.&lt;br /&gt;
return function (text, self, sc, substitution_data, function_name)&lt;br /&gt;
	return doSubstitutions(text, self, sc, substitution_data, function_name)&lt;br /&gt;
end&lt;/div&gt;</summary>
		<author><name>Sware</name></author>
	</entry>
</feed>