local concat = table.concat
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local insert = table.insert
local load_data = mw.loadData
local toNFC = mw.ustring.toNFC
local m_ja = require("Module:ja")
local kata_to_hira = m_ja.kata_to_hira
local normalize_kana = m_ja.normalize_kana
local data_common = load_data("Module:Hrkt-translit/data")
local c_apos = data_common.rom["っ"]
local export = {}
local function get_data(lang)
local function inspect_table(t, ...)
for i = 1, select("#", ...) do
if type(t) == "table" then
t = t[select(i, ...)]
else return nil end
end
return t
end
if lang then
local name_data = "Module:Hrkt-translit/data/" .. lang
if package.loaders[2](name_data) then
local data_lang = load_data(name_data)
return function(...)
local item_lang, item_common = data_lang[...], data_common[...]
for i = 2, select("#", ...) do
local key = select(i, ...)
if type(item_lang) == "table" then
item_lang = item_lang[key]
else return inspect_table(item_common, select(i, ...)) end
if type(item_common) == "table" then
item_common = item_common[key]
else return inspect_table(item_lang, select(i + 1, ...)) end
end
if item_lang ~= nil then return item_lang else return item_common end
end
end
end
return function(...)
return inspect_table(data_common[...], select(2, ...))
end
end
function export.tr(text, lang, sc, options)
options = options or {}
local result = {[0] = ""}
local result_sp = {}
local d = get_data(lang)
local function getlast(i_start, predicate_good, predicate_bad)
local in_xml = false
for i = i_start or #result, 1, -1 do
if in_xml then
if result[i] == "<" then in_xml = false end
elseif result[i] == ">" then
in_xml = true
else
if (predicate_bad or function(index)
return result_sp[index] == "stop"
end)(i) then break end
if (predicate_good or function(index)
return result[index]:len() > 0 and result_sp[index] ~= "'"
end)(i) then return i end
end
end
return 0
end
-- normalize long vowels and iteration marks
text = normalize_kana(text)
-- convert to NFC (FIXME: convert this module to use NFD, which will simplify things)
text = toNFC(text)
for c in gsub(text, "[ァ-ヶ𛄠𛄢𛅤-𛅦]", kata_to_hira):gsub("\227\130[\144-\146]゙", {
-- convert ゐ゙, ゑ゙, を゙ to ヸ, ヹ, ヺ, to ensure voicing works correctly
["ゐ゙"] = "ヸ", ["ゑ゙"] = "ヹ", ["を゙"] = "ヺ",
}):gmatch(".[\128-\191]*") do
local rc = options.hist and d("rom_hist", c) or d("rom", c) or c
local rc_sp = d("rom_sp", c)
local i_last = getlast()
if options.keep_period and c == "." then rc = "."
elseif c:match("%a") then rc_sp = "stop" end
local repl_digraph = d("digraph", c, result[i_last])
if repl_digraph then
result[i_last], rc = repl_digraph, ""
result_sp[i_last], rc_sp = nil, nil
end
if not options.hist then --はへ
if d("flag_hahe", result_sp[i_last]) and (find(c, "[-%.゙゚]") or rc:match("%a") or rc == c_apos) then
result[i_last] = result_sp[i_last]
result_sp[i_last] = nil
end
if d("flag_hahe", rc_sp) and (options.phonetic or result_sp[getlast(nil, function(i)
return result[i]:len() > 0 and result_sp[i] ~= "'" or result_sp[i] == "stop"
end, function() return false end)] == "stop" or result[i_last]:match"[-%a]" or result[i_last] == c_apos) then
rc = rc_sp
rc_sp = nil
end
end
if rc:match"%a" and find(result[i_last], "^[,%.?!:)Ӡ]$") then --space and punctuations
result[i_last] = result[i_last] .. " "
elseif find(rc, "^[(“]$") and result[i_last]:match("%a") then
rc = " " .. rc
end
if rc_sp == "voiced" then --voicing
result[i_last] = result[i_last]:gsub("^[b-df-hj-np-tv-z]+", d("tr_voicing"))
elseif rc_sp == "semivoiced" then
result[i_last] = result[i_last]:gsub("^[b-df-hj-np-tv-z]+", d("tr_semivoicing"))
end
if result[i_last] == "n" and rc:match(options.hist and "^[aiueoyw]" or "^[aiueoy]") then --na vs n'a
rc = c_apos .. rc
end
local r_lastlast = result[i_last]:match"^.*(%a%A*)$" --vowel clusters or stop consonants
if r_lastlast then
if r_lastlast:match("[aiueo]") then
if rc:match("^%-[yw]") or options.hist and (r_lastlast == "i" and rc:sub(1, 1) == "y" or r_lastlast == "u" and rc:sub(1, 1) == "w") then
if rc:sub(1, 1) == "-" then rc = rc:sub(2) end
result[i_last] = result[i_last]:sub(1, -2)
if rc:sub(1, 1) == "y" and d("flag_postalveolarconsonant", result[i_last]) then rc = rc:sub(2) end
elseif rc:match"^%-[aiueo]$" then
rc = rc:sub(2)
if r_lastlast == rc then
result[i_last] = result[i_last] .. r_lastlast
rc = ""
elseif d("flag_specialconsonant", result[i_last]) then
result[i_last] = result[i_last]:sub(1, -2)
elseif r_lastlast == "i" then
result[i_last] = result[i_last]:sub(1, -2) .. "y"
elseif r_lastlast:match("[ou]") and rc ~= "u" then
result[i_last] = result[i_last]:sub(1, -2) .. "w"
else
result[i_last] = result[i_last]:sub(1, -2)
end
elseif rc:match("^[aiueo]$") then
if not options.hist and not options.phonetic and d("tr_long", r_lastlast .. rc) and not result[i_last]:match("[aiueo][aiueo]$") then
result[i_last] = result[i_last] .. rc
rc = ""
end
end
end
end
insert(result, rc)
result_sp[#result] = rc_sp
end
if not options.hist then --isolated はへ
local i_last = getlast()
if d("flag_hahe", result_sp[i_last]) and getlast(i_last - 1) == 0 then
result[i_last] = result_sp[i_last]
end
end
local num_cap = 0
local has_gem = false
for i, v in ipairs(result) do
--gemination
if has_gem then
local apos, consonant, remainder = v:match("^(" .. c_apos .. "*)([b-df-hj-np-tv-z]+)(.*)")
if consonant then
local c_gem = d("tr_gem", apos .. consonant) or consonant:sub(1, 1)
v = consonant .. remainder
local i_gem = getlast(i)
while true do
i_gem = getlast(i_gem - 1)
if result_sp[i_gem] == "gem" then
result[i_gem] = c_gem
else
i_gem = getlast(i_gem + 1)
result[i_gem] = apos .. result[i_gem]
break
end
end
has_gem = false
end
elseif result_sp[i] == "gem" then
has_gem = true
end
-- anga vs a'nga
if v:match("^ng") then
local i_no_gem = getlast(i - 1, function(index)
return result[index]:len() > 0 and result_sp[index] ~= "'" and result_sp[index] ~= "gem"
end)
if find(result[i_no_gem], "%a") then
result[i_no_gem] = result[i_no_gem] .. c_apos
end
end
--diacritics (long vowels and others)
if not options.no_diacritics then
v = v:gsub("[aiueo][aiueo%A]*", d("tr_long"))
end
--uppercase
if result_sp[i] == "cap" then num_cap = num_cap + 1 end
if num_cap > 0 then
v = v:gsub(".[\128-\191]*", function(c)
if num_cap <= 0 then return c end
local uc = c:uupper()
if c ~= uc then num_cap = num_cap - 1 end
return uc
end)
end
result[i] = v
end
return (concat(result):gsub(c_apos, "'"))
end
return export