Mô đun:ja-kanji-readings

local export = {}

local titleObj = mw.title.getCurrentTitle()
local pagename = titleObj.text
local namespace = titleObj.nsText

local get_script_by_code = require("Module:scripts").getByCode
local Jpan = get_script_by_code("Jpan")
-- local katakana_script = get_script_by_code("Kana")
local Hira = get_script_by_code("Hira")
local module_ja = require("Module:ja")
local kana_to_romaji = require("Module:Hrkt-translit").tr

local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local split = mw.text.split

-- Only used by commented-out code.
-- local data = mw.loadData("Module:ja/data")

local CONCAT_SEP = ', '
	
local labels = {
	{
		text = "Go-on",
		text2 = "goon",
		classification = "on",
	},
	{
		text = "Kan-on",
		text2 = "kan'on",
		classification = "on",
	},
	{
		text = "Tō-on",
		text2 = "tōon",
		classification = "on",
	},
	{
		text = "Sō-on",
		text2 = "sōon",
		classification = "on",
	},
	{
		text = "Kan’yō-on",
		text2 = "kan'yōon",
		classification = "on",
	},
	{
		entry = "on'yomi",
		text = "On",
		text2 = "on",
		classification = "on",
		unclassified = " (unclassified)",
	},
	{
		entry = "kun'yomi",
		text = "Kun",
		text2 = "kun",
		classification = "kun",
	},
	{
		text = "Nanori",
		text2 = "nanori",
		classification = "nanori",
	},
}

local function track(code)
	require("Module:debug").track("ja-kanji-readings/" .. code)
end

local function plain_link(data)
	data.term = data.term:gsub('[%.%- ]', '') -- 「かな-し.い」→「かなしい」, 「も-しく は」→「もしくは」
	data.tr = data.tr and data.tr:gsub('[%.%-]', '') or '-'
	data.sc = mw.ustring.match(data.term:gsub('[%z\1-\127]', ''), '[^' .. Hira:getCharacters() .. ']') and Jpan or Hira
	data.pos = data.pos ~= '' and data.pos or nil
	return require("Module:links").full_link(data, "term") --"term" makes italic
end

--[=[
		Copied from [[Module:ja]] on 2017/6/14.
		Replaces the code in Template:ja-readings which accepted kanji readings,
		and displayed them in a consistent format.
		Substantial change in function was introduced in https://en.wiktionary.org/w/index.php?diff=46057625
]=]
function export.show(frame)
	local args = require("Module:parameters").process(frame:getParent().args, {
		["goon"] = {},
		["kanon"] = {},
		["toon"] = {},
		["soon"] = {},
		["on"] = {},
		["kanyoon"] = {},
		["kun"] = {},
		["nanori"] = {},
		["pagename"] = {},
	})
	
	local lang_code = frame.args[1] or 'ja'
	local lang = require'Module:languages'.getByCode(lang_code)
	local lang_name = lang:getCanonicalName()
	
	if args.pagename then
		if namespace == "" then
			error("The pagename parameter should not be used in entries, as it is only for testing.")
		end
		pagename = args.pagename
	end

	local yomi_data = mw.loadData("Module:ja/data/jouyou-yomi").yomi

	-- this holds the finished product composed of wikilinks to be displayed
	-- in the Readings section under the Kanji section
	local links, categories = {}, {}

	local is_old_format = false
	
	-- We need a separate kanji sortkey module.
	local sortkey = (require("Module:Hani-sortkey").makeSortKey(pagename, lang_code, "Jpan"))
	local function add_reading_category(reading, subtype, period)
		reading = reading:gsub("[%. ]", ""):gsub("%-$", "")
		if subtype then
			return table.insert(categories, '[[Thể loại:Chữ kanji ' .. lang_name .. " có cách đọc " ..
				(period or '') .. ' ' .. subtype .. ' là ' .. reading ..
				'|' .. sortkey .. ']]')
		else
			return table.insert(categories, '[[Category:' .. lang_name .. ' kanji read as ' ..
				reading .. '|' .. sortkey .. ']]')
		end
	end
	
	local unclassified_on = {}
	local classified_on = {}
	local kun = {}
			
	local kana = "[ぁ-ー]"
	
	for _, label in ipairs(labels) do
		local readings = args[label.text2:gsub('ō', 'o'):gsub('\'', '')]
		if readings then
			local unclassified = ""

			if label.unclassified then
				if not (args.goon or args.kanon or args.toon or args.soon or args.kanyoon) then
					unclassified = label.unclassified
				end
			end
			
			if find(readings, '%[%[' .. kana) then
				is_old_format = true

				if label.classification == 'on' then
					for reading in gmatch(readings, kana .. '+') do
						add_reading_category(reading)
					end
				end

				readings = readings:gsub("%[%[([^%]|]+)%]%]", function(entry)
					if find(entry, "^[" .. Jpan:getCharacters() .. "]+$") then
						return plain_link{
							lang = lang,
							term = entry,
						}
					else
						return "[[" .. entry .. "]]"
					end
				end)
			else
				readings = split(readings, ',%s*')

				for i, reading in ipairs(readings) do
					local is_jouyou = false

					local pos, pos_hist, pos_oldest = { }, { '[[w:Historical kana orthography|historical]]' }, { 'ancient' }

					-- check for formatting indicating presence of historical kana spelling
					local reading_mod, reading_hist, reading_oldest, reading_surplus = reading:match'^(.-)%f[<%z]<?(.-)%f[<%z]<?(.-)%f[<%z]<?(.*)$'
					
					if reading_surplus ~= '' then
						error("The reading " .. reading .. " contains too many historical readings. The maximum is 3: modern, historical, ancient.")
					end
					
					if label.text2 == "on" then
						unclassified_on[reading_mod] = true
						table.insert(unclassified_on, reading_mod)
					elseif label.text2 == "kun" then
						kun[reading_mod] = true
						table.insert(kun, reading_mod)
					elseif label.classification == "on" then
						classified_on[reading_mod] = true
						table.insert(classified_on, reading_mod)
					end
					
					-- test if reading contains katakana
					if find(reading_mod .. reading_hist .. reading_oldest, '[ァ-ヺ]') then
						table.insert(categories, '[[Category:Requests for attention concerning ' .. lang_name .. '|1]]') -- sometimes legit, like 「頁（ページ）」
					end

					if reading_hist ~= '' or reading_oldest ~= '' then
						-- test if historical readings contain small kana (anachronistic)
						if find(reading_hist .. reading_oldest, '[ぁぃぅぇぉゃゅょ]') then
							table.insert(categories, '[[Category:Requests for attention concerning ' .. lang_name .. '|2]]') -- 
						end
						
						-- test if reading contains kun'yomi delimiter thing but historical readings don't
						if reading_mod:match'%-' then
							if reading_hist ~= '' and not reading_hist:match'%-' or reading_oldest ~= '' and not reading_oldest:match'%-' then
								table.insert(categories, '[[Category:Requests for attention concerning ' .. lang_name .. '|3]]')
							end
						end
					end

					-- check if there is data indicating that our kanji is a jouyou kanji
					if yomi_data[pagename] then
						local reading = (label.classification == 'on' and module_ja.hira_to_kata(reading_mod) or reading_mod)
						reading = reading:gsub('%.', '') -- 「あたら-し.い」→「あたら-しい」
						local yomi_type = yomi_data[pagename][reading]

						if yomi_type then
							is_jouyou = true

							if yomi_type == 1 or yomi_type == 2 then
								table.insert(pos, '[[w:Jōyō kanji|<abbr title="Cách đọc này được liệt kê trong bảng Jōyō kanji. Bấm vào bài viết về Jōyō kanji trên Wikipedia.">Jōyō</abbr>]]')
							elseif yomi_type == 3 or yomi_type == 4 then
								table.insert(pos, '[[w:Jōyō kanji|<abbr title="Cách đọc này được liệt kê trong bảng Jōyō kanji, nhưng được đánh dấu là dùng hạn chế hoặc hiếm. Bấm vào bài viết về Jōyō kanji trên Wikipedia.">Jōyō <sup>†</sup></abbr>]]')
							end
						end
					end
					
					local subtype = label.text2
					if reading_mod then
						add_reading_category(reading_mod, subtype)
					end
					if reading_hist ~= '' then
						add_reading_category(reading_hist, subtype, 'historical')
					end
					if reading_oldest ~= '' then
						add_reading_category(reading_oldest, subtype, 'ancient')
					end
					
					-- process kun readings with okurigana, create kanji-okurigana links
					if reading:match'%-' then
						table.insert(pos, 1, plain_link{
							lang = lang,
							term = reading_mod:gsub('^.+%-', pagename),
						})
						
						if reading_hist ~= '' then
							table.insert(pos_hist, 1, plain_link{
								lang = lang,
								term = reading_hist:gsub('^.+%-', pagename),
							})
						end
						
						if reading_oldest ~= '' then
							table.insert(pos_oldest, 1, plain_link{
								lang = lang,
								term = reading_oldest:gsub('^.+%-', pagename),
							})
						end
					elseif label.classification == 'kun' then
						table.insert(categories, '[[Category:' .. lang_name .. ' kanji with kun readings missing okurigana designation|' .. sortkey .. ']]')
					end

					local rom = kana_to_romaji((reading_mod), lang_code):gsub('^(.+)(%-)', '<u>%1</u>')
					local rom_hist = kana_to_romaji((reading_hist:gsub('^(.+)(%-)', '<u>%1</u>')), lang_code, nil, {hist = true})
					local rom_oldest = kana_to_romaji((reading_oldest:gsub('^(.+)(%-)', '<u>%1</u>')), lang_code, nil, {hist = true})
					
					local mod_link = plain_link{
						lang = lang,
						term = reading_mod,
						tr = rom,
						pos = table.concat(pos, CONCAT_SEP),
					}
					if is_jouyou then
						mod_link = '<mark class="jouyou-reading">' .. mod_link .. '</mark>'
					end
					
					readings[i] = mod_link .. (reading_hist ~= '' and '<sup>←' .. plain_link{
						lang = lang,
						term = reading_hist,
						tr = rom_hist,
						pos = table.concat(pos_hist, CONCAT_SEP),
					} .. '</sup>' or '') .. (reading_oldest ~= '' and '<sup>←' .. plain_link{
						lang = lang,
						term = reading_oldest,
						tr = rom_oldest,
						pos = table.concat(pos_oldest, CONCAT_SEP),
					} .. '</sup>' or '')
				end

				readings = table.concat(readings, '; ')
			end
			
			-- Add "on-yomi", "kun-yomi", or "nanori-yomi" class around list of
			-- readings to allow JavaScript to locate them.
			table.insert(links, "* '''[[Appendix:Japanese glossary#" .. (label.entry or label.text2) .. '|'.. label.text .. "]]'''" .. unclassified .. ': <span class="' .. label.classification .. '-yomi">' .. readings .. '</span>')
		end
	end
	
	for i, reading in ipairs(unclassified_on) do
		-- [[Special:WhatLinksHere/Template:tracking/ja-kanji-readings/duplicate reading]]
		if classified_on[reading] then
			track("duplicate reading")
		end
	end
	
	if not next(classified_on) and not next(unclassified_on) then
		if next(kun) then
			-- [[Special:WhatLinksHere/Template:tracking/ja-kanji-readings/kun only]]
			track("kun only")
		end
	elseif not next(kun) then
		-- [[Special:WhatLinksHere/Template:tracking/ja-kanji-readings/on only]]
		track("on only")
	end

	if is_old_format then
		table.insert(categories, '[[Category:Japanese kanji using old ja-readings format|' .. sortkey .. ']]')
	end
	
	return table.concat(links, '\n') .. (namespace == '' and table.concat(categories) or '') .. require("Module:TemplateStyles")("Template:ja-readings/style.css")
end

return export