local export = {}
local m_string_utils = require("Module:string utilities")
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local gmatch = m_string_utils.gmatch
local gsplit = mw.text.gsplit
local lower = m_string_utils.lower
local upper = m_string_utils.upper
local initialConv = {
["b"] = "p", ["d"] = "t", ["g"] = "k",
["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ",
["z"] = "t͡s", ["j"] = "t͡ɕ",
["c"] = "t͡sʰ", ["q"] = "t͡ɕʰ",
["m"] = "m", ["n"] = "n", ["ny"] = "nʲ", ["ng"] = "ŋ",
["f"] = "f", ["s"] = "s", ["x"] = "ɕ", ["h"] = "x",
["w"] = "v", ["r"] = "z",
[""] = "",
}
-- note that 'ir' is for internal use by the code and not used in actual sichuanese pinyin
local finalConv = {
["ir"] = "z̩", ["er"] = "ɚ",
["a"] = "a", ["o"] = "o", ["e"] = "ɛ",
["ai"] = "ai", ["ei"] = "ei", ["ao"] = "au", ["ou"] = "əu",
["an"] = "an", ["en"] = "ən", ["ang"] = "aŋ", ["ong"] = "oŋ",
["i"] = "i", ["ia"] = "ia", ["ie"] = "iɛ",
["iai"] = "iɛi", ["iao"] = "iau", ["iu"] = "iəu",
["ian"] = "iɛn", ["in"] = "in", ["iang"] = "iaŋ",
["u"] = "u", ["ua"] = "ua", ["ue"] = "uɛ",
["uai"] = "uai", ["ui"] = "uei",
["uan"] = "uan", ["un"] = "uən", ["uang"] = "uaŋ",
["ü"] = "y", ["üo"] = "yo", ["üe"] = "ye",
["üan"] = "yan", ["ün"] = "yn", ["iong"] = "yoŋ",
}
local toneConv = {
["1"] = "⁵⁵", ["2"] = "²¹", ["3"] = "⁵³", ["4"] = "²¹³", ["-"] = "⁻",
}
local initialConv_swz = {
["j"] = "g", ["q"] = "k", ["n"] = "l", ["ny"] = "n", ["h"] = "x", ["w"] = "", ["r"] = "rh",
}
local finalConv_swz = {
["ir"] = "", ["er"] = "r", ["ong"] = "ung", ["uang"] = "uong", ["ü"] = "y", ["üo"] = "iuo", ["üe"] = "ye", ["üan"] = "uan", ["ün"] = "un", ["iong"] = "yng",
}
local function fix(initial, final)
-- ju /tɕy/
if find(initial, '^[yjqx]$') and find(final, '^u') then
final = gsub(final, '^u', 'ü')
end
if initial == 'y' then
initial = ''
if final == 'ou' then
final = 'iu'
elseif not find(final, '^[iü]') then -- yin /in/, yuan /yan/, ya /ia/
final = 'i' .. final
end
end
-- wei /uei/ (/-uei/ is usually spelled <-ui> but /uei/ is not <wui>)
-- wu /vu/
-- wai /uai/
-- wen /uən/
if initial == 'w' then
initial = (final == 'u') and 'w' or ''
if final == 'ei' then
final = 'ui'
elseif final == 'en' then
final = 'un'
elseif final ~= 'u' then
final = 'u' .. final
end
end
-- distinguish the two 'i's
if find(initial, '^[zcsr]$') and final == 'i' then
final = 'ir'
end
return initial, final
end
local function warn(initial, final, tone)
if initial == "" and find(final, "^[iu]") then
error("Syllables in Sichuanese Pinyin do not begin with i-/u-. Add y-/w-.")
end
if not initialConv[initial] and initial ~= "y" then
error("Invalid initial: " .. initial)
end
if not finalConv[final] and final ~= "uo" then
error("Invalid final: " .. final)
end
if tone == "5" then
error("Chengdu does not have the fifth tone anymore. Use 2.")
end
end
function export.convert(text, scheme)
if type(text) == "table" then
text, scheme = text.args[1], text.args[2]
end
local result = {}
for word in gsplit(text, '/') do
local converted = {}
local extra2 = match(word, '^[^A-Za-zü]*')
for syllable in gmatch(word, '[A-Za-zü]+[%d%-]+[^A-Za-zü]*') do
local initial, final, erhua, tone, extra = match(syllable, '^([BDGPTKZJCQMNFSXHVRWYbdgptkzjcqmnfsxhvrwy]?[yg]?)([AEOaiueoüng]+)(r?)([%d%-]+)([^A-Za-zü]*)$')
local caps = false
if find(initial .. final, '[A-Z]') then
caps = true
initial, final = lower(initial), lower(final)
end
warn(initial, final, tone)
initial, final = fix(initial, final)
if final == 'e' and erhua == 'r' then
final, erhua = 'er', ''
end
if scheme == 'IPA' then
initial = initialConv[initial]
final = finalConv[final]
tone = gsub(tone, '.', function(char) return toneConv[char] end)
if erhua == 'r' then
if find(final, '^y') then -- 撮口呼
final = 'yɚ'
elseif find(final, '^i') then -- 齊齒呼
final = 'iɚ'
elseif find(final, '^u') then -- 合口呼
final = 'uɚ'
elseif (final == 'o' or final == 'oŋ') and find(initial, '^[pmfv]') then
final = 'ɚ'
elseif final == 'o' or final == 'oŋ' then
final = 'uɚ'
else -- 開口呼
final = 'ɚ'
end
end
syllable = initial .. final .. tone
table.insert(converted, syllable)
elseif scheme == 'SWZ' then
initial = initialConv_swz[initial] or initial
final = finalConv_swz[final] or final
tone = gsub(tone, '(%d)%-(%d)', '%2')
-- XXX: what happens with erhua? (disabled output for now)
-- cf the given example 貓(mer)
if erhua == 'r' then return false end
if tone == '3' and (final == 'a' or final == 'ai') then
final = 'a' .. final
end
syllable = initial .. final
if caps then syllable = gsub(syllable, '^.', upper) end
table.insert(converted, '@' .. syllable .. extra)
else
error('Convert to what representation?')
end
end
if scheme == 'IPA' then
local text = '/' .. table.concat(converted, ' ') .. '/'
table.insert(result, text)
elseif scheme == 'SWZ' then
local text = table.concat(converted, '')
text = gsub(text, '([a-z])@(u)', '%1w')
text = gsub(text, '([a-z])@(i)', '%1j')
text = gsub(text, '([ng])@(y)', '%1j')
text = gsub(text, '@un', 'wen')
text = gsub(text, '@', '')
table.insert(result, extra2 .. text)
end
end
if scheme == 'IPA' then
return table.concat(result, ', ')
else
return table.concat(result, ' / ')
end
end
return export