Module:string/encode entities: Difference between revisions

From Linguifex
Jump to navigation Jump to search
Created page with "local m_str_utils = require("Module:string utilities") local codepoint = m_str_utils.codepoint local decode_entities = m_str_utils.decode_entities local find = string.find local format = string.format local gsub = string.gsub local match = string.match local pattern_escape = m_str_utils.pattern_escape local function encode_entity(ch) return "&#x" .. format("%X", codepoint(ch)) .. ";" end return function(text, charset, raw) if not raw then text = decode_entities(te..."
 
No edit summary
Line 1: Line 1:
local m_str_utils = require("Module:string utilities")
-- TO BE REPLACED BY encode_entities in [[Module:string utilities]]. This function decodes on input by default to prevent double-encoding, which the new function does not, so implementations need to take this into account when being converted.


local codepoint = m_str_utils.codepoint
local debug_track_module = "Module:debug/track"
local decode_entities = m_str_utils.decode_entities
local string_decode_entities_module = "Module:string/decodeEntities"
local find = string.find
local string_utilities_module = "Module:string utilities"
local format = string.format
local gsub = string.gsub
local match = string.match
local pattern_escape = m_str_utils.pattern_escape


local function encode_entity(ch)
local require = require
return "&#x" .. format("%X", codepoint(ch)) .. ";"
 
local function decode_entities(...)
decode_entities = require(string_decode_entities_module)
return decode_entities(...)
end
 
local function encode_entities(...)
encode_entities = require(string_utilities_module).encode_entities
return encode_entities(...)
end
 
local function track(...)
track = require(debug_track_module)
return track(...)
end
end


return function(text, charset, raw)
return function(str, charset, raw)
if not raw then
if not raw then
text = decode_entities(text)
local decoded = decode_entities(str)
end
if decoded ~= str then
if charset == "" then
track("string/encode entities/decoded first")
return text
end
elseif not charset then
str = decoded
charset = "\"&'<>\194\160"
elseif not match(charset, "[\128-\244]") then
return (gsub(text, "[" .. pattern_escape(charset) .. "]", encode_entity))
end
end
return (gsub(text, "[%z\1-\127\194-\244][\128-\191]*", function(ch)
return encode_entities(str, charset, nil, true)
return find(charset, ch, 1, true) and encode_entity(ch) or nil
end))
end
end

Revision as of 23:08, 21 May 2025



-- TO BE REPLACED BY encode_entities in [[Module:string utilities]]. This function decodes on input by default to prevent double-encoding, which the new function does not, so implementations need to take this into account when being converted.

local debug_track_module = "Module:debug/track"
local string_decode_entities_module = "Module:string/decodeEntities"
local string_utilities_module = "Module:string utilities"

local require = require

local function decode_entities(...)
	decode_entities = require(string_decode_entities_module)
	return decode_entities(...)
end

local function encode_entities(...)
	encode_entities = require(string_utilities_module).encode_entities
	return encode_entities(...)
end

local function track(...)
	track = require(debug_track_module)
	return track(...)
end

return function(str, charset, raw)
	if not raw then
		local decoded = decode_entities(str)
		if decoded ~= str then
			track("string/encode entities/decoded first")
		end
		str = decoded
	end
	return encode_entities(str, charset, nil, true)
end