local export = {}
local lang = require("Module:languages").getByCode("uk")
local m_links = require("Module:links")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local m_uk_translit = require("Module:uk-translit")
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local AC = u(0x0301) -- acute = ́
local GR = u(0x0300) -- acute = `
export.VAR1 = u(0xFFF0)
export.VAR2 = u(0xFFF1)
export.VAR3 = u(0xFFF2)
export.var_code_c = "[" .. export.VAR1 .. export.VAR2 .. export.VAR3 .. "]"
export.vowel = "аеиоуіїяєюАЕИОУІЇЯЄЮ"
export.vowel_c = "[" .. export.vowel .. "]"
export.non_vowel_c = "[^" .. export.vowel .. "]"
export.cons_except_hushing_or_ts = "бдфгґйклмнпрствхзь'БДФГҐЙКЛМНПРСТВХЗЬ"
export.cons_except_hushing_or_ts_c = "[" .. export.cons_except_hushing_or_ts .. "]"
export.hushing = "чшжщЧШЖЩ"
export.hushing_c = "[" .. export.hushing .. "]"
export.hushing_or_ts = export.hushing .. "цЦ"
export.hushing_or_ts_c = "[" .. export.hushing_or_ts .. "]"
export.cons = export.cons_except_hushing_or_ts .. export.hushing_or_ts
export.cons_c = "[" .. export.cons .. "]"
-- Cyrillic velar consonants
export.velar = "кгґхКГҐХ"
export.velar_c = "[" .. export.velar .. "]"
-- uppercase Cyrillic consonants
export.uppercase = "АЕИОУІЇЯЄЮБЦДФГҐЧЙКЛМНПРСТВШХЗЖЬЩ"
export.uppercase_c = "[" .. export.uppercase .. "]"
export.accents_c = "[" .. AC .. GR .. "]"
local first_palatalization = {
["к"] = "ч",
["г"] = "ж",
["ґ"] = "ж",
["х"] = "ш",
["ц"] = "ч",
}
local second_palatalization = {
["к"] = "ц",
["г"] = "з",
["ґ"] = "з",
["х"] = "с",
}
function export.translit_no_links(text)
return m_uk_translit.tr(m_links.remove_links(text))
end
local grave_decomposer = {
["ѐ"] = "е" .. GR,
["Ѐ"] = "Е" .. GR,
["ѝ"] = "и" .. GR,
["Ѝ"] = "И" .. GR,
}
-- decompose precomposed Cyrillic chars w/grave accent; not necessary for
-- acute accent as there aren't precomposed Cyrillic chars w/acute accent,
-- and undesirable for precomposed й, й, ї, Ї, etc.
function export.decompose_grave(text)
return rsub(text, "[ѐЀѝЍ]", grave_decomposer)
end
function export.needs_accents(text)
text = export.decompose_grave(text)
for _, word_with_hyphens in ipairs(rsplit(text, "%s+")) do
-- A word needs accents if it contains no accent and has more than one vowel
-- and doesn't begin or end with a hyphen (marking a prefix or suffix)
if not rfind(word_with_hyphens, "^%-") and not rfind(word_with_hyphens, "%-$") then
for _, word in ipairs(rsplit(word_with_hyphens, "%-")) do
if not rfind(word, export.accents_c) and not export.is_monosyllabic(word) then
return true
end
end
end
end
return false
end
function export.is_stressed(word)
return rfind(word, AC)
end
function export.is_multi_stressed(text)
for _, word in ipairs(rsplit(text, "[%s%-]+")) do
if ulen(rsub(word, "[^́]", "")) > 1 then
return true
end
end
return false
end
function export.remove_stress(word)
return rsub(word, AC, "")
end
function export.remove_variant_codes(word)
return rsub(word, export.var_code_c, "")
end
-- Handles the alternation between initial і/у and й/в.
function export.initial_alternation(word, previous)
if rfind(word, "^[іІ]") or rfind(word, "^[йЙ]" .. export.non_vowel_c) then
if rfind(previous, export.vowel_c .. AC .. "?$") then
return rsub(word, "^[іІ]", {["і"] = "й", ["І"] = "Й"})
else
return rsub(word, "^[йЙ]", {["й"] = "і", ["Й"] = "І"})
end
elseif rfind(word, "^[уУ]") or rfind(word, "^[вВ]" .. export.non_vowel_c) then
if rfind(previous, export.vowel_c .. AC .. "?$") then
return rsub(word, "^[уУ]", {["у"] = "в", ["У"] = "В"})
else
return rsub(word, "^[вВ]", {["в"] = "у", ["В"] = "У"})
end
end
return word
end
-- Check if word is monosyllabic (also includes words without vowels).
function export.is_monosyllabic(word)
local num_syl = ulen(rsub(word, export.non_vowel_c, ""))
return num_syl <= 1
end
-- If word is monosyllabic, add stress to the vowel.
function export.add_monosyllabic_stress(word)
if export.is_monosyllabic(word) and not rfind(word, "^%-") and
not rfind(word, "%-$") and not rfind(word, AC) then
word = rsub(word, "(" .. export.vowel_c .. ")", "%1" .. AC)
end
return word
end
-- If word is monosyllabic, remove stress from the vowel.
function export.remove_monosyllabic_stress(word)
if export.is_monosyllabic(word) and not rfind(word, "^%-") and
not rfind(word, "%-$") then
return export.remove_stress(word)
end
return word
end
-- Check if word is nonsyllabic.
function export.is_nonsyllabic(word)
local num_syl = ulen(rsub(word, export.non_vowel_c, ""))
return num_syl == 0
end
-- Check if word ends in a vowel.
function export.ends_in_vowel(stem)
return rfind(stem, export.vowel_c .. AC .. "?$")
end
-- If word is unstressed, add stress onto initial syllable.
function export.maybe_stress_initial_syllable(word)
if not rfind(word, AC) then
-- stress first syllable
word = rsub(word, "^(.-" .. export.vowel_c .. ")", "%1" .. AC)
end
return word
end
-- If word is unstressed, add stress onto final syllable.
function export.maybe_stress_final_syllable(word)
if not rfind(word, AC) then
-- stress last syllable
word = rsub(word, "(.*" .. export.vowel_c .. ")", "%1" .. AC)
end
return word
end
function export.iotate(stem)
stem = rsub(stem, "с[кт]$", "щ")
stem = rsub(stem, "з[дгґ]$", "ждж")
stem = rsub(stem, "к?т$", "ч")
stem = rsub(stem, "зк$", "жч")
stem = rsub(stem, "[кц]$", "ч")
stem = rsub(stem, "[сх]$", "ш")
stem = rsub(stem, "[гз]$", "ж")
stem = rsub(stem, "д$", "дж")
stem = rsub(stem, "([бвмпф])$", "%1л")
return stem
end
function export.apply_first_palatalization(word)
return rsub(word, "^(.*)([кгґхц])$",
function(prefix, lastchar) return prefix .. first_palatalization[lastchar] end
)
end
function export.apply_second_palatalization(word)
return rsub(word, "^(.*)([кгґх])$",
function(prefix, lastchar) return prefix .. second_palatalization[lastchar] end
)
end
function export.reduce(word)
local pre, letter, post = rmatch(word, "^(.*)([оОеЕєЄіІ])́?(" .. export.cons_c .. "+)$")
if not pre then
return nil
end
if letter == "о" or letter == "О" then
-- FIXME, what about when the accent is on the removed letter?
if post == "й" or post == "Й" then
-- FIXME, is this correct?
return nil
end
letter = ""
else
local is_upper = rfind(post, export.uppercase_c)
if letter == "є" or letter == "Є" then
-- англі́єц -> англі́йц-
letter = is_upper and "Й" or "й"
elseif post == "й" or post == "Й" then
-- солове́й -> солов'-
letter = "'"
post = ""
elseif (rfind(post, export.velar_c .. "$") and rfind(pre, export.cons_except_hushing_or_ts_c .. "$")) or
(rfind(post, "[^йЙ" .. export.velar .. "]$") and rfind(pre, "[лЛ]$")) then
-- FIXME, is this correct? This logic comes from ru-common.lua. The second clause that
-- adds ь after л is needed but I'm not sure about the first one.
letter = is_upper and "Ь" or "ь"
else
letter = ""
end
end
return pre .. letter .. post
end
function export.dereduce(stem, epenthetic_stress)
if epenthetic_stress then
stem = export.remove_stress(stem)
end
-- We don't require there to be two consonants at the end because of ону́ка (gen pl ону́ок).
local pre, letter, post = rmatch(stem, "^(.*)(.)(" .. export.cons_c .. ")$")
if not pre then
return nil
end
local is_upper = rfind(post, export.uppercase_c)
local epvowel
if rfind(letter, export.velar_c) or rfind(post, export.velar_c) or rfind(post, "[вВ]") then
epvowel = is_upper and "О" or "о"
elseif rfind(post, "['ьЬ]") then
-- сім'я́ -> gen pl сіме́й
-- ескадри́лья -> gen pl ескадри́лей
epvowel = rfind(letter, export.uppercase_c) and "Е" or "е"
post = ""
elseif rfind(letter, "[йЙ]") then
-- яйце́ -> gen pl я́єць
epvowel = is_upper and "Є" or "є"
letter = ""
else
if rfind(letter, "[ьЬ]") then
-- кільце́ -> gen pl кі́лець
letter = ""
end
epvowel = is_upper and "Е" or "е"
end
if epenthetic_stress then
epvowel = epvowel .. AC
end
return pre .. letter .. epvowel .. post
end
function export.apply_vowel_alternation(ialt, stem)
local modstem, origvowel
if ialt == "io" then
-- ріг, gen sg. ро́га; плід, gen sg. плода́/пло́ду; безкра́їсть gen sg. безкра́йості
modstem = rsub(stem, "([іІїЇ])(́?" .. export.cons_c .. "*)$",
function(vowel, post)
origvowel = vowel
if vowel == "і" then
return "о" .. post
elseif vowel == "І" then
return "О" .. post
elseif vowel == "ї" then
return "йо" .. post
else
return "Йо" .. post
end
end
)
if modstem == stem then
error("Indicator 'io' can't be applied because stem '" .. stem .. "' doesn't have an і as its last vowel")
end
elseif ialt == "ijo" then
-- ко́лір, gen sg. ко́льору; вертолі́т, gen sg. вертольо́та
modstem = rsub(stem, "і(́?" .. export.cons_c .. "*)$", "ьо%1")
if modstem == stem then
error("Indicator 'ijo' can't be applied because stem '" .. stem .. "' doesn't have an і as its last vowel")
end
origvowel = "і"
elseif ialt == "ie" then
modstem = rsub(stem, "([іїІЇ])(́?" .. export.cons_c .. "*)$",
function(vowel, post)
origvowel = vowel
if vowel == "і" then
-- ведмі́дь gen sg. ведме́дя
return "е" .. post
elseif vowel == "І" then
return "Е" .. post
elseif vowel == "ї" then
-- Ки́їв gen sg. Ки́єва
return "є" .. post
else
return "Є" .. post
end
end
)
if modstem == stem then
error("Indicator 'ie' can't be applied because stem '" .. stem .. "' doesn't have an і or ї as its last vowel")
end
elseif ialt == "i" then
modstem = rsub(stem, "ь?([оеОЕ])(́?" .. export.cons_c .. "*)$",
function(vowel, post)
origvowel = vowel
if vowel == "о" or vowel == "е" then
return "і" .. post
else
return "І" .. post
end
end
)
if modstem == stem then
error("Indicator 'i' can't be applied because stem '" .. stem .. "' doesn't have an о or е as its last vowel")
end
else
return stem, nil
end
return modstem, origvowel
end
-- Given a list of forms (each of which is a table of the form {form=FORM, footnotes=FOOTNOTES}),
-- concatenate into a SLOT=FORM,FORM,... string, replacing embedded | signs with <!>.
function export.concat_forms_in_slot(forms)
if forms then
local new_vals = {}
for _, v in ipairs(forms) do
table.insert(new_vals, rsub(v.form, "|", "<!>"))
end
return table.concat(new_vals, ",")
else
return nil
end
end
function export.combine_stem_ending(stem, ending)
if stem == "?" then
return "?"
elseif export.is_stressed(ending) then
return export.remove_stress(stem) .. ending
else
return stem .. ending
end
end
function export.generate_form(form, footnotes)
if type(footnotes) == "string" then
footnotes = {footnotes}
end
if footnotes then
return {form = form, footnotes = footnotes}
else
return form
end
end
function export.u_v_alternation_msg(frame)
local params = {
[1] = {}
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local alternant = args[1] or mw.title.getCurrentTitle().text
local ualt, valt, ufirst
if rfind(alternant, "^[вВ]") then
valt = alternant
ualt = rsub(export.add_monosyllabic_stress(valt), "^([вВ])", {["в"] = "у", ["В"] = "У"})
ufirst = false
else
ualt = alternant
valt = export.remove_monosyllabic_stress(rsub(ualt, "^([уУ])", {["у"] = "в", ["У"] = "В"}))
ufirst = true
end
ualt = m_links.full_link({lang = lang, term = ualt}, "term") .. " (used after consonants or at the beginning of a clause)"
valt = m_links.full_link({lang = lang, term = valt}, "term") .. " (used after vowels)"
local first, second
if ufirst then
first, second = ualt, valt
else
first, second = valt, ualt
end
return "The forms " .. first .. " and " .. second .. " differ in pronunciation but are considered variants of the same word."
end
function export.i_j_alternation_msg(frame)
local params = {
[1] = {}
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local alternant = args[1] or mw.title.getCurrentTitle().text
local ualt, valt, ufirst
if rfind(alternant, "^[йЙ]") then
valt = alternant
ualt = rsub(export.add_monosyllabic_stress(valt), "^([йЙ])", {["й"] = "і", ["Й"] = "І"})
ufirst = false
else
ualt = alternant
valt = export.remove_monosyllabic_stress(rsub(ualt, "^([іІ])", {["і"] = "й", ["І"] = "Й"}))
ufirst = true
end
ualt = m_links.full_link({lang = lang, term = ualt}, "term") .. " (used after consonants or at the beginning of a clause)"
valt = m_links.full_link({lang = lang, term = valt}, "term") .. " (used after vowels)"
local first, second
if ufirst then
first, second = ualt, valt
else
first, second = valt, ualt
end
return "The forms " .. first .. " and " .. second .. " differ in pronunciation but are considered variants of the same word."
end
return export