local export = {}
-- U+0302 COMBINING CIRCUMFLEX ACCENT
-- U+0301 COMBINING ACUTE ACCENT
-- U+0303 COMBINING TILDE
-- U+030F COMBINING DOUBLE GRAVE ACCENT
-- U+0300 COMBINING GRAVE ACCENT
-- U+030B COMBINING DOUBLE ACUTE ACCENT
local pitch_accent = "[" .. mw.ustring.char(0x0302, 0x301, 0x303, 0x30F, 0x300, 0x30B) .. "]"
function export.makeEntryName(text)
-- Decompose to permit diacritics to be matched even in composed characters.
text = mw.ustring.toNFD(text)
text = mw.ustring.gsub(
text,
"[aeɛioɵuʉ][" .. mw.ustring.char(0x300) .. "-" .. mw.ustring.char(0x36F) .. "]+",
function(vowel)
return mw.ustring.gsub(vowel, pitch_accent, "")
end)
-- Return back to native MediaWiki normalization.
text = mw.ustring.toNFC(text)
return text
end
return export