local export = {}

local m_IPA = require("Module:IPA")

local lang = require("Module:languages").getByCode("pga")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local ulower = mw.ustring.lower

local AC = u(0x0301) -- acute =  ́
local CFLEX = u(0x0302) -- circumflex =  ̂

local vowel = "aɛɪɔʊ"
local V = "[" .. vowel .. "]"
local accent = AC .. CFLEX
local accent_c = "[" .. accent .. "]"
local ipa_stress = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local separator = accent .. ipa_stress .. "# ."
local C = "[^" .. vowel .. separator .. "]" -- consonant

local unstressed_words = require("Module:table").listToSet({ }) --add here unstressed words

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

--
function export.IPA(text, phonetic)
	text = ulower(text or mw.title.getCurrentTitle().text)

	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub(text, "%s*[,–—]%s*", " | ")
	-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
	text = rsub(text, "([^%s])%s*[¡!¿?]%s*([^%s])", "%1 | %2")

	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = canon_spaces(text)

	-- Make certain monosyllabic words unstressed (not implemented yet)
	local words = rsplit(text, " ")
	for i, word in ipairs(words) do
		if rfind(word, "%-$") and not rfind(word, accent_c) or unstressed_words[word] then
			-- add CFLEX to the last vowel not the first one, or we will mess up 'que' by
			-- adding the CFLEX after the 'u'
			words[i] = rsub(word, "^(.*" .. V .. ")", "%1" .. CFLEX)
		end
	end
	text = table.concat(words, " ")
	-- Convert hyphens to spaces
	text = rsub(text, "%-", " ")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = canon_spaces(text)
	-- now eliminate punctuation
	text = rsub(text, "[¡!¿?']", "")
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	
	--vowels
	text = rsub(text, "aa", "a" .. AC)
	text = rsub(text, "ee", "e" .. AC)
	text = rsub(text, "ii", "i" .. AC)
	text = rsub(text, "oo", "o" .. AC)
	text = rsub(text, "uu", "u" .. AC)

	--transcription
	text = rsub(text, "sh", "ɕ")
	text = rsub(text, "ng", "ŋ") --these two sounds do
	text = rsub(text, "ny", "ɲ")
	text = rsub(text, "[jg'fvyreiou]",
			{ ["j"] = "ɟ", ["g"] = "ɡ", ["'"] = "ʔ", ["f"] = "ɸ", ["v"] = "β", ["y"] = "j", ["r"] = "ɾ", ["e"] = "ɛ", ["i"] = "ɪ", ["o"] = "ɔ", ["u"] = "ʊ"})
	-- /e/,/i/,/o/,/u/ are open except in stressed syllables when not followed by /ɾ/.

	--syllable division (rudimentary)
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. "?" .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	
	local accent_to_stress_mark = { [AC] = "ˈ", [CFLEX] = "" }

	local function accent_word(word, syllables)
		if rfind(word, accent_c) then
			for i = 1, #syllables do
				syllables[i] = rsub(syllables[i], "^(.*)(" .. accent_c .. ")(.*)$",
						function(pre, accent, post)
							return accent_to_stress_mark[accent] .. pre .. post
						end
				)
			end
		else
			syllables[1] = "ˈ" .. syllables[1]
		end
	end

	local words = rsplit(text, " ")
	for j, word in ipairs(words) do
		-- accentuation
		local syllables = rsplit(word, "%.")
		accent_word(word, syllables)
		
		--mark stressed syllables with an accent (to be used later), no need to mark stressed /a/
		if phonetic then
			for i = 1, #syllables do
				if rfind(syllables[i], "ˈ") then
					syllables[i] = rsub(syllables[i], "[ɛɪɔʊ]", {["ɛ"] = "é", ["ɪ"] = "í", ["ɔ"] = "ó", ["ʊ"] = "ú"})
				end
			end
		end

		-- Reconstruct the word.
		if phonetic then
			words[j] = table.concat(syllables, ".")
		else
			words[j] = table.concat(syllables, "")
		end
	end

	text = table.concat(words, " ")

	-- suppress syllable mark before IPA stress indicator
	text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")

	--The vowels /e/, /i/, /o/, /u/ are open in unstressed syllables and stressed syllables when followed by /ɾ/. Only use <aeiou> in phonemic transcription. 
	if phonetic then
		--FIXME: the following code works but could be more compact
		text = rsub(text, "éɾ", "ɛɾ")
		text = rsub(text, "é%.ɾ", "ɛ.ɾ")
		text = rsub(text, "íɾ", "ɪɾ")
		text = rsub(text, "í%.ɾ", "ɪ.ɾ")
		text = rsub(text, "óɾ", "ɔɾ")
		text = rsub(text, "ó%.ɾ", "ɔ.ɾ")
		text = rsub(text, "úɾ", "ʊɾ")
		text = rsub(text, "ú%.ɾ", "ʊ.ɾ")
		text = rsub(text, "[éíóú]", {["é"] = "e", ["í"] = "i", ["ó"] = "o", ["ú"] = "u"})
	else
		text = rsub(text, "[ɛɪɔʊ]", {["ɛ"] = "e", ["ɪ"] = "i", ["ɔ"] = "o", ["ʊ"] = "u"})
	end
	
	--mark /h/ as optional
	text = rsub(text, "h", "(h)")
	
	-- remove # symbols at word and text boundaries
	text = rsub(text, "#", "")

	return mw.ustring.toNFC(text)
end

function export.show(frame)
	local params = {
		[1] = {},
		["pre"] = {},
	}

	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)

	local results = {}

	local text = args[1] or mw.title.getCurrentTitle().text

	table.insert(results, { pron = "/" .. export.IPA(text, false) .. "/" })
	table.insert(results, { pron = "[" .. export.IPA(text, true) .. "]" })

	local pre = args.pre and args.pre .. " " or ""

	return "* " .. pre .. m_IPA.format_IPA_full(lang, results)
end

return export