Mô đun:egy-pron-Egyptological

local export = {}

local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")
local lang = require("Module:languages").getByCode("egy")

-- single characters that map to IPA sounds
local phonetic_chars_map = {
	["ꜣ"] = "ɑ",
	["j"] = "i",
	["y"] = "iː",
	["ꜥ"] = "ɑː",
	["w"] = "w",
	["u"] = "uː",
	["b"] = "b",
	["p"] = "p",
	["f"] = "f",
	["m"] = "m",
	["n"] = "n",
	["r"] = "r",
	["h"] = "h",
	["ḥ"] = "h",
	["ḫ"] = "x",
	["ẖ"] = "ç",
	["z"] = "z",
	["s"] = "s",
	["š"] = "ʃ",
	["q"] = "k",
	["k"] = "k",
	["g"] = "ɡ",
	["t"] = "t",
	["ṯ"] = "t͡ʃ",
	["d"] = "d",
	["ḏ"] = "d͡ʒ",
	["e"] = "ɛ"
}

-- single characters that map to letters in the standard anglicization
local ang_chars_map = {
	["ꜣ"] = "a",
	["j"] = "i",
	["y"] = "y",
	["ꜥ"] = "a",
	["w"] = "w",
	["u"] = "u",
	["b"] = "b",
	["p"] = "p",
	["f"] = "f",
	["m"] = "m",
	["n"] = "n",
	["r"] = "r",
	["h"] = "h",
	["ḥ"] = "h",
	["ḫ"] = "kh",
	["ẖ"] = "kh",
	["z"] = "z",
	["s"] = "s",
	["š"] = "sh",
	["q"] = "q",
	["k"] = "k",
	["g"] = "g",
	["t"] = "t",
	["ṯ"] = "tj",
	["d"] = "d",
	["ḏ"] = "dj",
	["e"] = "e"
}

function export.to_IPA(word)
	word = mw.ustring.lower(word)
	
	local phonetic = word
	
	phonetic = mw.ustring.gsub(phonetic, "^%-", "")
	phonetic = mw.ustring.gsub(phonetic, "%-$", "")
	phonetic = mw.ustring.gsub(phonetic, "%-", " ")
	phonetic = mw.ustring.gsub(phonetic, "_", " _ʔ")
	phonetic = mw.ustring.gsub(phonetic, "%f[%a]j%.", "j")
	phonetic = mw.ustring.gsub(phonetic, "%.", " ʔ")
	
	-- handle w/u
	phonetic = mw.ustring.gsub(phonetic, "w", "u")
	phonetic = mw.ustring.gsub(phonetic, "%f[%a]u", "w")
	phonetic = mw.ustring.gsub(phonetic, "s _ʔu", "s _ʔw")
	
	-- e-epenthesis
	phonetic = mw.ustring.gsub(phonetic, "%f[%a]([mnr])%f[%A]", "e%1")
	phonetic = mw.ustring.gsub(phonetic, "ʔ([bpfmnrhḥḫẖzsšqkgtṯdḏ])%f[%A]", "ʔe%1")
	phonetic = mw.ustring.gsub(phonetic, "%f[%a]([wbpfmnrhḥḫẖzsšqkgtṯdḏ])%f[%A]", "%1e")
	phonetic = mw.ustring.gsub(phonetic, "([ꜣjyꜥue])([bpfmnrhḥḫẖzsšqkgtṯdḏ])([bpfmnrhḥḫẖzsšqkgtṯdḏ])([ꜣjyꜥue])", "%1%2v%3%4")
	phonetic = mw.ustring.gsub(phonetic, "([wbpfmnrhḥḫẖzsšqkgtṯdḏ])([bpfmnrhḥḫẖzsšqkgtṯdḏ])", "%1e%2")
	phonetic = mw.ustring.gsub(phonetic, "([wbpfmnrhḥḫẖzsšqkgtṯdḏ])([bpfmnrhḥḫẖzsšqkgtṯdḏ])%f[%A]", "%1e%2")
	phonetic = mw.ustring.gsub(phonetic, "v", "")
	phonetic = mw.ustring.gsub(phonetic, "([ꜣjyꜥue])([bpfmnrhḥḫẖzsšqkgtṯdḏ])e([bpfmnrhḥḫẖzsšqkgtṯdḏ])e", "%1%2%3e")
	phonetic = mw.ustring.gsub(phonetic, "([wbpfmnrhḥḫẖzsšqkgtṯdḏ])%1", "%1e%1")
	
	phonetic = mw.ustring.gsub(phonetic, " _ʔ", "")
	phonetic = mw.ustring.gsub(phonetic, " ʔ", "ʔ")
	
	-- irregularity
	phonetic = mw.ustring.gsub(phonetic, "ꜥneḫ", "ꜥnḫ")
	
	-- glottal stop (~ hiatus) between vowels
	phonetic = mw.ustring.gsub(phonetic, "([ꜣꜥ])([ꜣꜥ])", "%1ʔ%2")
	phonetic = mw.ustring.gsub(phonetic, "([ꜣꜥ])([ꜣꜥ])%f[%A]", "%1ʔ%2")
	phonetic = mw.ustring.gsub(phonetic, "([jy])([jy])", "%1ʔ%2")
	phonetic = mw.ustring.gsub(phonetic, "([jy])([jy])%f[%A]", "%1ʔ%2")
	phonetic = mw.ustring.gsub(phonetic, "uu", "uʔu")
	
	-- to IPA
	phonetic = mw.ustring.gsub(phonetic, '.', phonetic_chars_map)
	
	return "/" .. phonetic .. "/"
end

function export.pronunciation(words)
	local items = {}
	for _, word in ipairs(words) do
		table.insert(items, { pron = export.to_IPA(word), note = nil })
	end
	return m_IPA.format_IPA_full { lang = lang, items = items }
end

function export.to_anglicization(word)
	word = mw.ustring.lower(word)
	
	local ang = word
	
	ang = mw.ustring.gsub(ang, "%-", " - ")
	ang = mw.ustring.gsub(ang, "%f[%a]j%.", "j")
	ang = mw.ustring.gsub(ang, "%.", " ʔ")
	ang = mw.ustring.gsub(ang, "_", " _ʔ")
	
	-- handle w/u
	ang = mw.ustring.gsub(ang, "w", "u")
	ang = mw.ustring.gsub(ang, "%f[%a]u", "w")
	ang = mw.ustring.gsub(ang, "s _ʔu", "s _ʔw")
	ang = mw.ustring.gsub(ang, "uj%f[%A]", "wj")
	
	-- e-epenthesis
	ang = mw.ustring.gsub(ang, "%f[%a]([mnr])%f[%A]", "e%1")
	ang = mw.ustring.gsub(ang, "ʔ([wbpfmnrhḥḫẖzsšqkgtṯdḏ])%f[%A]", "ʔe%1")
	ang = mw.ustring.gsub(ang, "%f[%a]([wbpfmnrhḥḫẖzsšqkgtṯdḏ])%f[%A]", "%1e")
	ang = mw.ustring.gsub(ang, "([ꜣjyꜥue])([wbpfmnrhḥḫẖzsšqkgtṯdḏ])([wbpfmnrhḥḫẖzsšqkgtṯdḏ])([ꜣjyꜥue])", "%1%2v%3%4")
	ang = mw.ustring.gsub(ang, "([wbpfmnrhḥḫẖzsšqkgtṯdḏ])([wbpfmnrhḥḫẖzsšqkgtṯdḏ])", "%1e%2")
	ang = mw.ustring.gsub(ang, "([wbpfmnrhḥḫẖzsšqkgtṯdḏ])([wbpfmnrhḥḫẖzsšqkgtṯdḏ])%f[%A]", "%1e%2")
	ang = mw.ustring.gsub(ang, "v", "")
	ang = mw.ustring.gsub(ang, "([ꜣjyꜥue])([wbpfmnrhḥḫẖzsšqkgtṯdḏ])e([wbpfmnrhḥḫẖzsšqkgtṯdḏ])e", "%1%2%3e")
	ang = mw.ustring.gsub(ang, "([wbpfmnrhḥḫẖzsšqkgtṯdḏ])%1", "%1e%1")
	
	ang = mw.ustring.gsub(ang, " _ʔ", "")
	ang = mw.ustring.gsub(ang, " ʔ", ".")
	
	-- irregularities
	ang = mw.ustring.gsub(ang, "ꜥneḫ", "ꜥnḫ")
	ang = mw.ustring.gsub(ang, "neḫet", "naḫt")
	ang = mw.ustring.gsub(ang, "jmen", "ꜣmen")
	ang = mw.ustring.gsub(ang, "jten", "ꜣten")
	ang = mw.ustring.gsub(ang, "sebek", "sobek")
	ang = mw.ustring.gsub(ang, "peteḥ", "ptaḥ")
	
	ang = mw.ustring.gsub(ang, " %- ", "-")
	
	-- to standard anglicization
	ang = mw.ustring.gsub(ang, '.', ang_chars_map)
	
	return ang
end

function export.anglicization(words)
	local items = {}
	for _, word in ipairs(words) do
		table.insert(items, export.to_anglicization(word))
	end
	return table.concat(items, ', ')
end

function export.show(frame)
	local params = {
		[1] = { list = true, default = mw.title.getCurrentTitle().text },
		ang = { list = true, allow_holes = true },
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	-- if there is not a user-specified anglicization of a given pronunciation input,
	-- copy the pronunciation input
	for i, word in ipairs(args[1]) do
		if not args.ang[i] then
			args.ang[i] = word
		end
	end
	
	local anglicization = '\n** Quy ước Anh hóa: <span class="use-with-mention">' .. export.anglicization(args.ang) .. '</span>'
	if args.ang[1] == '0' then
		anglicization = ''
	end
	
	return table.concat {
		'* ', m_a.format_qualifiers(lang, { "Ai Cập học" }), ' ', export.pronunciation(args[1]),
		anglicization
	}
end

return export