Mô đun:zh-usex

local export = {}

local m_zh = require("Module:zh")
local m_languages = require("Module:languages")

local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local split = mw.text.split

-- Use this when the actual title needs to be known.
local actual_title = mw.title.getCurrentTitle()

-- Use this when testcases need to be able to override the title (for bolding,
-- for instance).
local title = actual_title
local PAGENAME = PAGENAME or title.text

local data = mw.loadData("Module:zh-usex/data")
local punctuation = data.punctuation
local ref_list = data.ref_list
local pron_correction = data.pron_correction
local polysyllable_pron_correction = data.polysyllable_pron_correction

local zh_format_end = "</span>"

--local Han_pattern = "[" .. require("Module:scripts").getByCode("Hani"):getCharacters() .. "]"
local Han_pattern = "[一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮹟𰀀-𲎯]"
local UTF8_char = '[%z\1-\127\194-\244][\128-\191]*'
local UTF8_char2 = '[%z\1-Z\\^-\127\194-\244][\128-\191]*' -- not "[" or "]"

local tr_data = {
	cmn = {
		segment_c = " %-",
		separator_conv = {[""]="",[" "]=" ",["-"]="",["--"]="-"},
		link_ignore = "\1.^",
		tr_cap = true,
		combine = function(t)
			return t:gsub("^%f[aoe\195-\199]","\3") -- temporary substitute of the apostrophe
		end,
	},
	yue = {
		segment_c = " ",
		separator_conv = {[""]="",[" "]=" "},
		link_ignore = "\1",
		tr_cap = false,
		combine = function(t) return t:gsub(",.+","") end,
	},
	["nan-hbl"] = {
		segment_c = " ~",
		separator_conv = {[""]="",[" "]=" ",["~"]="-"},
		link_ignore = "\1%%.^",
		tr_cap = true,
		combine = function(t) return "-"..t:gsub("/.+","") end,
	},
	hak = {
		segment_c = " ~",
		separator_conv = {[""]="",[" "]=" ",["~"]="-"},
		link_ignore = "\1.^",
		tr_cap = true,
		combine = function(t) return "-"..t:gsub("/.+","") end,
	},
	default = {
		segment_c = " ",
		separator_conv = {[""]="",[" "]=" "},
		link_ignore = "\1",
		tr_cap = false,
	},
}

local function get_tr(display, norm_code)
	local given, given_pos = {}, 1 -- record the characters with given transcription
	local punc, punc_pos = {}, 1  -- record the punctuations with given transcription
	local tr_datapoint = tr_data[norm_code]
	local word_regex = "[^"..tr_datapoint.link_ignore.." \2{}".."]+" -- regex that matches words
	local tr_word = display:gsub("\1", " ")
		:gsub("%["..UTF8_char2.."%]", "")
		:gsub("("..UTF8_char.."){([^{}]*)}", function(a,b) -- record given tr and replace with "{"
			given[given_pos] = a:find("^%w$") and b or tr_datapoint.combine(b)
			given_pos = given_pos + 1
			return "{"
		end)
		:gsub("%f[^ ][^ ]+%f[ ]", function(a) -- record punctuation and replace with "}"
			if punctuation[a] then
				punc[punc_pos] = punctuation[a]
				punc_pos = punc_pos + 1
				return "}"
			end
			return a
		end)
		:gsub("<b>","\1"):gsub("</b>","\2") -- substitute bold tags for further processing
		:gsub(word_regex,function(word)
			-- first attempt to get the pronunciation of the whole word
			local res = polysyllable_pron_correction[norm_code][word]
				or pron_correction[norm_code][word]
			if res then return res end
			local length = 0 -- for check_pron (a bit hacky because check_pron only checks if length == 1)
			if word:find("^"..UTF8_char.."$") then length = 1 end
			res = m_zh.check_pron(word, norm_code, length, true)
			if res then return tr_datapoint.combine(res) end
			-- if it fails, get pronunciation of each character
			return word:gsub(UTF8_char, function(ch)
				local ch_res = pron_correction[norm_code][ch]
				if ch_res then return ch_res end
				ch_res = m_zh.check_pron(ch, norm_code, 1, true)
				return ch_res and tr_datapoint.combine(ch_res) or ch
			end)
		end)
	if norm_code == "cmn" then
		tr_word = tr_word:gsub("%.%.","-")
	end
	if norm_code ~= "yue" then
		tr_word = tr_word:gsub("%."," ")
	end
	given_pos, punc_pos = 0,0
	tr_word = tr_word:gsub("{",function() -- substitute back the stored results
			given_pos = given_pos + 1
			return given[given_pos]
		end)
		:gsub("}",function() -- substitute back the punctuations
			punc_pos = punc_pos + 1
			return punc[punc_pos]
		end)
	if norm_code == "yue" then
		tr_word = tr_word:gsub("[a-z][1-9]%-?[1-9]?", "%0 ")
	end
	return tr_word
end

local function make_link(target, display)
	target = target == "" and display or target
	-- Remove bold tags from target
	target = target:gsub("</?b>","")
	-- Generate link to Chinese section
	local result = "[[" .. target .. "#Tiếng Trung Quốc|" .. display .. "]]"
	-- For debugging purposes
	--if actual_title.nsText == "Module" then mw.log(display, target, "->", result) end
	return result
end

local function convert(conv_fun, text)
	return (text .. "A[A]")
		:gsub("([^%[%]]*)"..UTF8_char2.."%[("..UTF8_char2..")%]",
			function(a,b) return conv_fun(a)..b end)
		:sub(1,-2)
end

function export.show(frame)
	local params = {
		[1] = { required = true },	-- example
		[2] = {},					-- translation
		[3] = {},					-- variety
		lit = {},
		tr = {},
		ref = {}, r = { alias_of = "ref" },
		inline = {},
		audio = {}, a = { alias_of = "audio" },
		collapsed = { type = "boolean" },
		-- Allow specifying pagename in testcases on documentation page.
		pagename = actual_title.nsText == "Template" and {} or nil,
		nocat = { type = "boolean" },
		tr_nocap = { type = "boolean" },
		simp = { type = "boolean" }
	}
	
	local category = frame.args["category"] or error("Please specify the category.")
	
	local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
	
	if args.pagename then
		-- Override title in Module namespace.
		title = mw.title.new(args.pagename)
		PAGENAME = title.text
	end
	
	local example = args[1] or error("Example unspecified.")
	local translation = args[2]
	local literal = args["lit"]
	local reference = args["ref"]
	local manual_tr = args["tr"]
	local display = args["display_type"]
	local inline = args["inline"]
	local audio_file = args["audio"]
	local collapsed = args["collapsed"]
	local simp = args["simp"]
	local phonetic = ""
	local original_length = example:gsub("[^\194-\244]+",""):len()
	local variety = args[3] or frame.args["variety"] or (ref_list[reference] and ref_list[reference][1] or false) or "cmn"
	local variety_data = data.varieties_by_code[variety] or data.varieties_by_old_code[variety] or error("Variety " .. variety .. " not recognized.")
	-- unpack() doesn't work here because the data was loaded using mw.loadData()
	local std_code, norm_code, desc, tr_desc = variety_data[2], variety_data[3], variety_data[4], variety_data[5]
	norm_code = norm_code or std_code
	variety = std_code
	
	local lang_obj_wikt = m_languages.getByCode(variety, 3, "allow etym")
	
	if next(unrecognized_args) then
		--[[Special:WhatLinksHere/Wiktionary:Tracking/zh-usex/unrecognized arg]]
		require("Module:debug").track_unrecognized_args(unrecognized_args, "zh-usex")
	end
	
	if reference then
		require("Module:debug").track("zh-usex/ref")
	end
	
	if example:find("[%(%)]") then
		require("Module:debug").track("zh-usex/parentheses")
	end
	
	if example:find("&#") then
		require("Module:debug").track("zh-usex/html")
	end
	
	-- future escape character?
	if example:find("`") then
		require("Module:debug").track("zh-usex/backtick")
	end
	if example:find("  ") then
		require("Module:debug").track("zh-usex/double-space")
	end
	
	if (norm_code == "nan-hbl" or norm_code:find("^hak")) and example:find("%-") then
		require("Module:debug").track("zh-usex/hyphen")
	end
	
	if example:find("%w%{") then
		require("Module:debug").track("zh-usex/rom-text")
	end
	
	if not translation or translation == '' then -- per standard [[Module:usex]]
		translation = '<small>((vui lòng thêm bản dịch tiếng Việt cho ' .. (category == "các trích dẫn" and "trích dẫn" or "vd sử dụng") .. ')</small> [[Thể loại:Từ ' .. lang_obj_wikt:getFullName() .. ' có ' .. (category == "ví dụ là nhóm các từ thường cùng xuất hiện" and  "ví dụ cách sử dụng" or category) .. ' cần dịch]]'
	end
	
	-- should we generate the other (simp/trad) form
	-- (in the end, only actually display if the converted text is different)
	local do_conv = true
	if norm_code == "vi" or norm_code == "ko" then
		do_conv = false
	end
	local conv_fun = m_zh.ts
	if simp then
		if category ~= "trích dẫn ngữ liệu" then error("parameter simp cannot be true in [[Template:zh-x]] or [[Template:zh-co]].") end
		if norm_code == "vi" or norm_code == "ko" or norm_code == "lzh" or variety == "yue-HK" or variety == "cmn-TW" or
				variety == "nan-hbl-TW" or variety == "lzh-cmn-TW" or variety == "hak-hai" or variety == "hak-dab" or
				variety == "hak-zha" then
			error(("Parameter simp= cannot be specified for variety '%s'"):format(variety))
		end
		conv_fun = m_zh.st
	end
	
	-- should we generate the transcription
	local generate_tr = false
	if tr_data[norm_code] then
		if manual_tr then
			require("Module:debug").track("zh-usex/manual-tr")
		else
			generate_tr = true
		end
	end
	
	local boldify = false
	-- automatically boldify pagetitle if nothing is in bold
	if not example:find("'''") and not punctuation[PAGENAME] then
		boldify = true
	end
	
	-- tidying up the example, making it ready for transcription
	example = gsub(example, "[？！，。、“”…；：‘’|（）「」『』—《》〈〉【】·　．～]", " %0 ")
	example = example:gsub("—  —", "——") -- double em-dash (to be converted to single em-dash later)
		:gsub("<br */?>"," <br> ") -- process linebreaks
		:gsub("^ *",""):gsub(" *$",""):gsub("  +"," ") -- process spaces
		:gsub("%[%[(.-)%]%]%f[^%]]",function(a) -- process [[]]
			return a:gsub(" ","\1")
		end)
		:gsub("'''([^']+)'''", "<b>%1</b>") -- normalise bold syntax
		:gsub("%^<b>","<b>^")
		:gsub("</b>(%["..UTF8_char2.."%])","%1</b>")
		:gsub("</b>({[^{}]*})","%1</b>")
	
	-- parsing: convert "-", "--", "---" to "-", "..", "--" respectively
	-- so that "-" is the character that delimits links
	-- further explanation will use the replacement result to refer to the commands
	if norm_code == "cmn" then
		example = example:gsub("%-+",{["--"]="..",["---"]="--"})
		if example:find("%-[^%-%s]+\\") then
			require("Module:debug").track("zh-usex/extra-pinyin")
		end
	end

	local regex_data = tr_data[norm_code] or tr_data.default
	local segment_c = regex_data.segment_c -- the characters that delimit links
	local separator_conv = regex_data.separator_conv -- the table for separator mapping
	local link_ignore = regex_data.link_ignore -- the characters that do not affect links
	local tr_cap = regex_data.tr_cap -- transliteration can be capitalised
	local segment_regex = "(["..segment_c.."]*)([^"..segment_c.."]+)" -- the regex that matches each segment and the separator before it
	
	local cache = {} -- store the result of each segment
	local trad_text = ""
	local simp_text = ""
	-- generate the transliteration
	-- but store the results in the cache
	-- and also build up trad_text and simp_text
	local tr_text = example:gsub(segment_regex, function(separator,seg)
		separator = separator_conv[separator] or error('Invalid separator: "'..separator..'"')
		if cache[seg] then
			trad_text = trad_text .. cache[seg].trad
			simp_text = simp_text .. cache[seg].simp
			return separator..cache[seg].tr
		end
		
		if punctuation[seg] then
			cache[seg] = {
				trad = seg,
				simp = seg,
				tr = punctuation[seg]
			}
			trad_text = trad_text .. seg
			simp_text = simp_text .. seg
			return separator..punctuation[seg]
		end
		
		local generate_link = 0
		seg, generate_link = seg:gsub("@","")
		generate_link = (generate_link == 0)
		
		local target, display = "", seg
		local pos = seg:find("\\",1,true)
		if generate_link and pos then
			-- move formatting from start of target to display
			-- e.g. <b>^甲\乙 --> 甲\<b>^乙
			local bold = ""
			local caret = ""
			local start = 1
			if seg:sub(1,3) == "<b>" then
				bold,start = "<b>",4
			end
			if tr_cap and seg:sub(start) == "^" then
				caret,start = "^",start+1
			end
			target, display = seg:sub(start,pos-1), bold..caret..seg:sub(pos+1,-1)
			if target:find("</?b>") then -- Check for bold tags in target.
				require("Module:debug").track("zh-usex/bold-target")
			end
		end
		
		target = target:gsub("\1","")
		local target_trad = target:gsub("%["..UTF8_char2.."%]","")
		local target_simp = do_conv and convert(conv_fun, target)
		
		local occurrences = 0
		if boldify then
			display, occurrences = display:gsub(PAGENAME,"<b>"..PAGENAME.."</b>")
		end
		if occurrences > 0 then
			display = display:gsub("%[<b>"..PAGENAME.."</b>%]","%["..PAGENAME.."%]")
				:gsub("%^<b>","<b>^")
				:gsub("</b>(%["..UTF8_char2.."%])","%1</b>")
				:gsub("</b>({[^{}]*})","%1</b>")
		end
		
		local display_derom = display:gsub("{[^{}]*}","")
			:gsub("["..link_ignore.."]+","")
		local display_trad = display_derom:gsub("%["..UTF8_char2.."%]","")
		local display_simp = do_conv and convert(conv_fun, display_derom) or ""
		local seg_tr = generate_tr and get_tr(display, norm_code) or ""
		
		if display_trad:gsub("</?b>","") == PAGENAME or target_trad == PAGENAME then
			generate_link = false
			if boldify and occurrences == 0 then
				display_trad = "<b>" .. display_trad .. "</b>"
				display_simp = "<b>" .. display_simp .. "</b>"
				seg_tr = "<b>" .. seg_tr .. "</b>"
			end
		end
		
		local seg_trad = generate_link and make_link(target_trad, display_trad) or display_trad
		local seg_simp = generate_link and do_conv and make_link(target_simp, display_simp) or display_simp
		cache[seg] = {
			trad = seg_trad,
			simp = seg_simp,
			tr = seg_tr
		}
		trad_text = trad_text .. seg_trad
		simp_text = simp_text .. seg_simp
		return separator..seg_tr
	end)
	
	if trad_text == simp_text then
		do_conv = false
		simp_text = nil
	end
	
	if not trad_text:find("</?b>") then
		require("Module:debug").track("zh-usex/no-bold")
	end
	
	-- format generated tr
	-- at this point we have three temporary substitutions:
	-- <b>:\1, </b>:\2, ':\3
	if generate_tr then
		if norm_code == "cmn" then -- format apostrophe
			tr_text = tr_text
				:gsub("%f[^%z -]([\1\2^]*)\3", "%1")
				:gsub("\1\3","\3\1") -- <b>' → '<b>
				:gsub("^\3","\3^")   -- ^'   → '^ (shouldn't occur)
		elseif norm_code == "nan-hbl" or norm_code == "hak" then -- format hyphens
			tr_text = tr_text
				:gsub("%^%-","-^")
				:gsub("\1%-","-\1") -- <b>-  → -<b>
				:gsub("%-\2","\2-") -- -</b> → </b>-
				:gsub("%f[^%z ]%-%f[^%z %-]","") -- "-chhek" at beginning -> "chhek"
				:gsub("%f[%z %-]%-%f[%z ]","") -- "shi-" at the end -> "shi"
				:gsub("%-+","-")
				:gsub("%-?%%%-?", "--")
		end
		tr_text = tr_text:gsub("[\1\2\3]",{["\1"]="<b>",["\2"]="</b>",["\3"]="&#39;"})
		
		if find(tr_text, Han_pattern) then
			require("Module:debug").track("zh-usex/character without transliteration")
		end
	end

	local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\">&#91;" -- HTML entity since "[[[w:MSC|MSC]]" is interpreted poorly
	local tag_end = "&#93;</span>"
	
	local simp_link = "<i>[[w:Chữ Hán giản thể|giản.]]</i>"
	local trad_link = "<i>[[w:Chữ Hán phồn thể|phồn.]]</i>"
	if simp then
		simp_link, trad_link = trad_link, simp_link
	end
	
	trad_text, auto_spaces = trad_text:gsub("([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2")
	simp_text = do_conv and simp_text:gsub("([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2") or false
	phonetic = manual_tr or (generate_tr and tr_text)

	if auto_spaces > 0 then
		require("Module:debug").track("zh-usex/auto-spaces")
	end
	
	-- overall transcription formatting
	if phonetic then
		phonetic = gsub(phonetic, " </b>", "</b> ")
		phonetic = gsub(phonetic, "  ", " ")
		if norm_code == "yue" or norm_code == "zhx-tai" or norm_code == "nan-tws" or norm_code == "nan-hnm" or
			norm_code == "zhx-sic" or norm_code == "cjy" or norm_code == "hsn" or norm_code == "gan" or
			variety == "hak-mei" then
			phonetic = gsub(phonetic, "([a-zê]+)([1-9%-]+)", "%1<sup>%2</sup>") -- superscript tones
		end
		phonetic = gsub(phonetic, " ([,%.?!;:’”)])", "%1") -- remove excess spaces from punctiation
		phonetic = gsub(phonetic, "([‘“(]) ", "%1")
		phonetic = phonetic:gsub(" <br> ", "<br>")
		if not manual_tr then
			if norm_code == "nan-hbl" then
				phonetic = gsub(phonetic, " +%-%-", "--")
			end
		end

		-- capitalisation
		if not manual_tr then
			if norm_code == "yue" or norm_code == "zhx-tai" or norm_code == "cjy" or norm_code == "hsn" or
				norm_code == "cmn-wuh" or norm_code == "nan-tws" or norm_code == "wxa" or norm_code == "wuu" or
				variety == "hak-mei" then
				args.tr_nocap = true
			end
			if not args.tr_nocap and match(example, "[。？！]") then
				phonetic = "^" .. gsub(phonetic, "([%.?!]) ", "%1 ^")
			end
			if not args.tr_nocap then
				phonetic = gsub(phonetic, "([%.%?%!][”’]) (.)", "%1 ^%2")
				phonetic = gsub(phonetic, "<br>(.)", "<br>^%1")
				phonetic = gsub(phonetic, ": ([“‘])(.)", ": %1^%2")
			end
			phonetic = gsub(phonetic, "%^<b>", "<b>^")
			phonetic = gsub(phonetic, "%^+.", mw.ustring.upper)
			phonetic = gsub(phonetic, "%^", "")
		end

		if norm_code == "wuu" then
			local wuu_pron = require("Module:wuu-pron")
			if phonetic:find(":") then
				phonetic = "''" .. wuu_pron.wugniu_format(phonetic:sub(4)) .. "''"
			else
				phonetic = "''" .. wuu_pron.wugniu_format(wuu_pron.wikt_to_wugniu(phonetic)) .. "''"
			end
		elseif norm_code == "cmn-wuh" or norm_code == "wxa" then
			phonetic = "<span class=\"IPA\">[" .. phonetic .. "]</span>"

		elseif norm_code == "cdo" then
			local cdo_pron = require("Module:cdo-pron")
			phonetic = "<i>" .. phonetic .. "</i>" ..
				(not match(phonetic, "-[^ ]+-[^ ]+-[^ ]+-")
					and " / <span class=\"IPA\"><small>[" .. cdo_pron.sentence(phonetic) .. "]</small></span>"
					or "")

		else
			phonetic = "<i>" .. phonetic .. "</i>"
		end
		phonetic = "<span lang=\"zh-Latn\" style=\"color:#404D52\">" .. phonetic .. "</span>"
	end
	
	local collapse_start, collapse_end, collapse_tag, collapse_border_div, collapse_border_div_end = '', '', '', '', ''
	simplified_start = '<br>'
	if collapsed then
		collapse_start = '<span class="vsHide">'
		collapse_end = '</span>'
		collapse_tag = '<span class="vsToggleElement" style="color:darkgreen; font-size:x-small;padding-left:10px"></span>'
		collapse_border_div = '<div class="vsSwitcher" data-toggle-category="usage examples" style="border-left: 1px solid #930; border-left-width: 2px; padding-left: 0.8em;">'
		collapse_border_div_end = '</div>'
		simplified_start = '<hr>'
	end
	
	if actual_title.nsText == '' and (not args.nocat) then -- fixme: probably categorize only if text contains the actual word
		if reference then
			cat = "[[Thể loại:Định nghĩa mục từ " .. lang_obj_wikt:getFullName() .. " có trích dẫn ngữ liệu]]"
		else
			cat = "[[Thể loại:Định nghĩa mục từ " .. lang_obj_wikt:getFullName() .. " có " .. category .. "]]"
		end
	end
	
	local zh_format_start_simp = "<span lang=\"zh-Hans\" class=\"Hans\">"
	local zh_format_start_trad = "<span lang=\"zh-Hant\" class=\"Hant\">"
	if simp then zh_format_start_simp, zh_format_start_trad = zh_format_start_trad, zh_format_start_simp end
	
	-- indentation, font and identity tags
	if ((norm_code == "cmn" and original_length > 7)
			or (norm_code ~= "cmn" and original_length > 5)
			or reference
			or collapsed
			or (match(example, "[，。？！、：；　]") and norm_code == "wuu")
			or (norm_code == "cdo" and original_length > 3)
			or (inline or "" ~= "")) then

		trad_text = zh_format_start_trad .. trad_text .. zh_format_end

		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end

		if phonetic then
			phonetic = "<dd>" .. collapse_start .. phonetic
			translation = "<dd>" .. translation .. "</dd>"
			tr_tag = tag_start .. tr_desc .. tag_end .. collapse_end .. "</dd>"
		else
			translation = "<dd>" .. translation .. "</dd>"
		end

		if audio_file then
			audio = "<dd>[[File:" .. audio_file .. "]]</dd>"
		end
		
		if do_conv then
			trad_tag = collapse_start .. tag_start .. desc .. ", " .. trad_link .. tag_end .. collapse_end .. collapse_tag
			simp_text = simplified_start .. collapse_start .. zh_format_start_simp .. simp_text .. zh_format_end
			simp_tag = tag_start .. desc .. ", " .. simp_link .. tag_end .. collapse_end
		elseif norm_code == "vi" or norm_code == "ko" then
			trad_tag = collapse_start .. tag_start .. desc ..", " .. trad_link .. tag_end .. collapse_end .. collapse_tag
		else
			trad_tag = collapse_start .. tag_start .. desc ..", " .. trad_link .. " và " .. simp_link .. tag_end .. collapse_end .. collapse_tag
		end

		if reference then
			reference = "<dd>" .. collapse_start .. "<small><i>Từ:</i> " ..
				(ref_list[reference] and ref_list[reference][2] or reference) .. "</small>" .. collapse_end .. "</dd>"
		end

		return collapse_border_div .. "<dl class=\"zhusex\">" .. trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") ..
			(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. translation .. "</dl>" .. (cat or "") .. collapse_border_div_end

	else
		trad_text = zh_format_start_trad .. trad_text .. zh_format_end
		divider = "&nbsp; ―&nbsp; "

		if variety ~= "cmn" then
			ts_tag = tag_start .. desc .. tag_end
			tr_tag = tag_start .. tr_desc .. tag_end
		end

		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end

		if do_conv then
			simp_text = "<span lang=\"zh-Hani\" class=\"Hani\">／</span>" .. zh_format_start_simp .. simp_text .. zh_format_end
		end

		if audio_file then
			audio = " [[File:" .. audio_file .. "]]"
		end

		return trad_text .. (simp_text or "") .. (ts_tag or "") .. divider ..
			(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. translation .. (literal and " (literally, “" .. literal .. "”)" or "") ..
			(cat or "")
	end
end

-- function export.migrate(text, translation, ref)
-- 	if type(text) == "table" then
-- 		if not text.args or not text.args[1] then
-- 			text = text:getParent()
-- 		end
-- 		if text.args[2] and text.args[2] ~= '' then
-- 			ref = text.args[1]
-- 			translation = text.args[3]
-- 			text = text.args[2]
-- 		else
-- 			text = text.args[1]
-- 		end
-- 	end
-- 	text = text:gsub('^[%*#: \n]+', ''):gsub('[ \n]+$', ''):gsub(' +', '　'):gsub('\n+', '<br>'):gsub('|', '\\'):gsub('\'\'\'%[%[', ' '):gsub('%]%]\'\'\'', ' '):gsub('%]%]%[%[', ' '):gsub('%]%]', ''):gsub('%[%[', '')
-- :gsub('\'\'\'', ''):gsub(',', '，'):gsub('!', '！'):gsub('%?', '？')
-- 	if translation then
-- 		if ref and ref ~= '' then
-- 			return '{{zh-x|' .. text .. '|' .. translation .. '|ref=' .. ref .. '}}'
-- 		else
-- 			return '{{zh-x|' .. text .. '|' .. translation .. '}}'
-- 		end
-- 	else
-- 		return text
-- 	end
-- end

return export