-- Transliteration for Nepali
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
["क"] = "k",
["ख"] = "kʰ",
["ग"] = "ɡ",
["घ"] = "ɡʱ",
["ङ"] = "ŋ",
["च"] = "ʦ",
["छ"] = "ʦʰ",
["ज"] = "ʣ",
["झ"] = "ʣʱ",
["ञ"] = "n",
["ट"] = "ʈ",
["ठ"] = "ʈʰ",
["ड"] = "ɖ",
["ढ"] = "ɖʱ",
["ण"] = "ɳ",
["त"] = "t̪",
["थ"] = "t̪ʰ",
["द"] = "d̪",
["ध"] = "d̪ʱ",
["न"] = "n",
["प"] = "p",
["फ"] = "pʰ",
["ब"] = "b",
["भ"] = "bʱ",
["म"] = "m",
["य"] = "j",
["र"] = "r",
["ल"] = "l",
["व"] = "w",
["श"] = "s",
["ष"] = "ʂ",
["स"] = "s",
["ह"] = "ɦ",
["क़"] = "q",
["ख़"] = "x",
["ग़"] = "ɣ",
["ऴ"] = "ɭ",
["ळ"] = "ɭ",
["ज़"] = "z",
["श़"] = "ʒ",
["झ़"] = "ʒ",
["ड़"] = "ɽ",
["ढ़"] = "ɽʱ",
["फ़"] = "f",
["थ़"] = "θ",
["द़"] = "ð",
["ऩ"] = "n̪",
["ऱ"] = "ɹ",
["ॽ"] = "ʔ",
["व़"] = "v",
["ॹ"] = "ʒ",
-- vowel diacritics
["ि"] = "i",
["ु"] = "u",
["े"] = "e",
["ो"] = "o",
["ा"] = "ä",
["ी"] = "i",
["ू"] = "u",
["ृ"] = "ri",
["ॄ"] = "ri",
["ॢ"] = "liɾi",
["ॣ"] = "liɾi",
["ै"] = "ʌi̯",
["ौ"] = "ʌu̯",
["ॉ"] = "ɔ",
["ॅ"] = "æ",
-- vowel signs
["अ"] = "ʌ",
["इ"] = "i",
["उ"] = "u",
["ए"] = "e",
["ओ"] = "o",
["आ"] = "ä",
["ई"] = "i",
["ऊ"] = "u",
["ऋ"] = "ri",
["ॠ"] = "ri",
["ऌ"] = "liɾi",
["ॡ"] = "liɾi",
["ऐ"] = "ʌi̯",
["औ"] = "ʌu̯",
["ऑ"] = "ɔ",
["ॲ"] = "æ",
["ऍ"] = "æ",
-- chandrabindu
["ँ"] = "̃",
-- anusvara
["ं"] = "ṃ",
-- visarga
["ः"] = "ː",
-- virama
["्"] = "",
-- om
["ॐ"] = "oːm",
-- zero-width non joiner
[""] = " ͜ ",
-- zero-width joiner
[""] = "ʌ",
-- diphthong marker
["ॱ"] = "̯",
-- numerals
["०"] = "0",
["१"] = "1",
["२"] = "2",
["३"] = "3",
["४"] = "4",
["५"] = "5",
["६"] = "6",
["७"] = "7",
["८"] = "8",
["९"] = "9",
-- punctuation
["।"] = ".", -- danda
["॥"] = ".", -- double danda
["+"] = "", -- compound separator
-- abbreviation sign
["॰"] = "."
}
local nasal_assim = {
["क"] = "ङ",
["ख"] = "ङ",
["ग"] = "ङ",
["घ"] = "ङ",
["च"] = "ञ",
["छ"] = "ञ",
["ज"] = "ञ",
["झ"] = "ञ",
["ट"] = "ण",
["ठ"] = "ण",
["ड"] = "ण",
["ढ"] = "ण",
["प"] = "म",
["फ"] = "म",
["ब"] = "म",
["भ"] = "म",
["म"] = "म",
["त"] = "न",
["थ"] = "न",
["द"] = "न",
["ध"] = "न",
["न"] = "न",
["ष"] = "न",
["श"] = "ङ",
["स"] = "न",
["य"] = "म",
["र"] = "म",
["ल"] = "ँ",
["व"] = "म",
["ह"] = "ङ"
}
local perm_cl = {
["ज्न"] = true,
["ज्ञ"] = true,
["ट्र"] = true,
["ड्र"] = true,
["ट्स"] = true,
["ड्स"] = true,
["स्ड"] = true
}
local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ"
local syncope_pattern = "(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])ʌ(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])"
local nor_cons, sp_cons = "कखगघङचछजझञटठडढतथदधपफबभशषसयरलवणनमयरलवनम", "कलम"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ"
local koka_sign = "ोीाैे"
local koka_pattern = "([" .. koka_sign .. "])(़?[" .. sp_cons .. "])ʌ(़?[" .. gsub(nor_cons, "य", "") .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])"
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = length, 1, -1 do
table.insert(result, mw.ustring.sub(text, i, i))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text =
gsub(
text,
"([" .. all_cons .. "]़?)([" .. vowel .. "्]?)",
function(c, d)
return c .. (d == "" and "ʌ" or d)
end
)
for word in mw.ustring.gmatch(text, "[ऀ-ॿʌ]+") do
local orig_word = word
word = rev_string(word)
word =
gsub(
word,
"^ʌ(़?)([" .. all_cons .. "])(.)(.?)",
function(opt, first, second, third)
return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or
match(first, "[" .. special_cons .. "]") and match(second, "्") and
not perm_cl[first .. second .. third]) or
match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔʌ]") or
match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔʌ]")) and
"ʌ" or
"") ..
opt .. first .. second .. third
end
)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, "%1%2%3%4%5%6%7%8%9")
end
while match(word, koka_pattern) do
word = gsub(word, koka_pattern, "%1%2%3%4")
end
word =
gsub(
word,
"(.?)ं(.)",
function(succ, prev)
return succ ..
(succ .. prev == "ʌ" and "्म" or
(succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "̃")) ..
prev
end
)
local escaped_orig_word = gsub(orig_word, "%+", "")
text = gsub(text, orig_word, rev_string(word))
text = gsub(text, "ईन$", "inʌ")
text = gsub(text, "(...)ईन ", "%1inʌ ")
text = gsub(text, "इन$", "inʌ")
text = gsub(text, "(...)इन ", "%1inʌ ")
text = gsub(text, "ैन$", "ʌi̯nʌ")
text = gsub(text, "(...)ैैैैन ", "%1ʌi̯nʌ ")
text = gsub(text, "उन$", "unʌ")
text = gsub(text, "(...)उन ", "%1unʌ ")
text = gsub(text, "ुन$", "unʌ")
text = gsub(text, "(...)ुन ", "%1unʌ ")
text = gsub(text, "िन$", "inʌ")
text = gsub(text, "(...)िन ", "%1inʌ ")
text = gsub(text, "िछ$", "içʰʌ")
text = gsub(text, "(...)िछ ", "%1iʦʰʌ ")
text = gsub(text, "उछ$", "uʦʰʌ")
text = gsub(text, "(...)उछ ", "%1uʦʰʌ ")
text = gsub(text, "इछ$", "iʦʰʌ")
text = gsub(text, "(...)इछ ", "%1iʦʰʌ ")
text = gsub(text, "एछ$", "eʦʰʌ")
text = gsub(text, "ेछ$", "eʦʰʌ")
text = gsub(text, "(...)ेछ ", "%1eʦʰʌ ")
text = gsub(text, "(...)ेन ", "%1enʌ ")
text = gsub(text, "ेन$", "enʌ")
text = gsub(text, "(...)एन ", "%1enʌ ")
text = gsub(text, "एर$", "eɾʌ")
text = gsub(text, "(...)एर ", "%1eɾʌ ")
text = gsub(text, "ेर$", "eɾʌ")
text = gsub(text, "(...)ेर ", "%1eɾʌ ")
text = gsub(text, "एन$", "enʌ")
text = gsub(text, "उँछ$", "ũʦʰʌ")
text = gsub(text, "(...)उँछ ", "%1ũʦʰʌ ")
text = gsub(text, "ज्ञ", "ɡj")
end
text = gsub(text, ".़?", conv)
text = gsub(text, "ʌ([iu])̯̃", "ʌ̃%1̯̃")
text = gsub(text, "([ʌ])̃([iu])̯", "%1̃%2̯")
text = gsub(text, "[<>]", "")
text = gsub(text, "ॱ", "")
text = gsub(text, "dʌʦʰ$", "dʌʦʰʌ")
text = gsub(text, "(...)dʌʦʰ ", "%1dʌʦʰʌ ")
text = gsub(text, "ʌi̯n$", "ʌi̯nʌ")
text = gsub(text, "(...)ʌi̯n ", "%1ʌi̯nʌ ")
text = gsub(text, "nʌʦʰ$", "nʌʦʰʌ")
text = gsub(text, "(...)nʌʦʰ ", "%1nʌʦʰʌ ")
text = gsub(text, "wʌi̯", "bʌi̯")
text = gsub(text, "w$", "b")
text = gsub(text, "(...)w ", "%1b ")
text = gsub(text, "([rʌäiueo])([r])w", "%1rb")
text = gsub(text, "w([iewuojr])", "b%1")
text = gsub(text, "([w])ʌ([krɾjtcʦʣçʐṅñysśdpɦhn])([tnrṇṣcśkghjɦsçʐueoʌayd])", "bʌ%2%3")
text =
gsub(
text,
"([śsnlcçʦʣʐjzkʰʱɦhpɡtdgb])([w])([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([cspdtçʐnɡgkʦʣbɾrjyṇṣśṇɾṅñṃ])",
"%1w%3%4"
)
text = gsub(text, "([w])ä([cgjṇtdçʐʦʣmyshɦśṣn])", "bä%2")
text = gsub(text, "([w])ä([rɾ])([tdābuṇɦṣh])", "bä%2%3")
text = gsub(text, "([w])ä([l])([m])", "bä%2%3")
text = gsub(text, "([w])ʌ([sśṣṅñṃyjʦʣpdtnçʐc])", "bʌ%2")
text = gsub(text, "([ʌäiueoŏĕ])([nl])([td]̪)", "%1%2̪%3") -- dental assimilation
text = gsub(text, "([ʌäiueoŏĕ])n([ʈɖ])", "%1ɳ%2") -- retroflex aassimilation
text = gsub(text, "([l])([ʈɖ])", "ɭ%2")
text = gsub(text, "([ʌʌ̃äaāiuūəãā̃ī̃ĩũū̃ẽõeeo̯o ̤])r([ʌʌ̃äaāiīuūəãā̃ī̃ĩũū̃ẽõeeyo̯o])", "%1ɾ%2")
text = gsub(text, "([śsnlcjzʐçʦʣkhptdgb])([vw])([aāäiīuūoeĩ])([cspdtngkbrjyṇṣśṇɾṅñṃ])", "%1w%3%4")
text = gsub(text, "([n])([ʌ])ʣ([ʱ]?)([ʌäiueo])", "%1%2ʣ%3%4")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([k])([ʰ])", "%1kʰ")
--text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([d]͡)(z)([ʱ]?)', '%1(d)z')
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])b(ʱ?)([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤jː])", "%1b%3")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])pʰ", "%1ɸ")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõjɔ̃e̤ː])d̪ʱ", "%1d̪")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃je̤ː])ɡ(ʱ?)", "%1ɡ")
--text = gsub(text, 't͡st͡s(ʰ?)', 't̚t͡s%1')
--text = gsub(text, 'd͡zd͡z(ʱ?)', 'd̚d͡z%1')
text = gsub(text, "([ spdtzʱʰɦgkbçʦʣʐrjyɖʈṇṣśṇɾṅñṃ̪])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤])ɦ%2", "%1%2̤ː")
text = gsub(text, "([ʌä])ɦä", "ä̤ː")
text = gsub(text, "([ʌʌ̃])ɦä̃", "ä̤̃ː")
text = gsub(text, "äɦ([äʌ])", "ä̤ː")
text = gsub(text, "([ʌäeoɔæɛʌ̃ä̃ẽõɔ̃e̤])ɦ([iuĩũ])", "%1%2")
text = gsub(text, "([iĩ])ɦ([ũu])", "%1%2")
text = gsub(text, "([uũ])ɦ([iĩ])", "%1%2")
text = gsub(text, "([uũ])ɦ([äʌ])", "%1%2")
text = gsub(text, "([ʌʌ̃iĩ])ɦ([eẽoõ])", "%1%2̤")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̯e̤ː])ɖ(ʱ?)(j?)", "%1ɽ%3")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])ɦr([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])", "%1ɾ%2")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])ɦ([n])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])", "%1̤ː%2%3")
text = gsub(text, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])ɦ([ml])", "%1%2%2")
text = gsub(text, "(#)([spdtzʱʰɦgkbrṇṣśʂʈɖçʐʦʣnṇʌäiuoɔæɛʌ̃ä̃ĩũõɔ̃e̤ːɾṅñ]?)jʌ", "%2e")
text = gsub(text, "(#)([spdtzʱʰɦgkbrṇṣśʂʈɖçʦʣʐnʌäiuoɔæɛʌ̃ä̃ĩũõɔ̃e̤ːṇṅɾñ]?)wʌ", "%2o")
text = gsub(text, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])kʂ([ʌe])", "%1k̚t͡sʰe") -- kṣ ligature
text = gsub(text, "()kʂ", "t͡sʰ") -- kṣ initial
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([spdtzɦgɡkbrṇṣśʃʂʈɖʦʣçʐnṇɾṅñ])(̪?)(ʰʱ?)wʌ", "%1%2%2%3%4o")
text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([spdtzɦgɡkbrṇṣśʂʃʈɖçʐʦʣnṇɾṅñ])(̪?)([ʰʱ]?)jʌ", "%1%2%2%3%4e")
text = gsub(text, "ʂ", "s")
text = gsub(text, "ɦri", "ri")
text = gsub(text, "kʌn$", "kʌnʌ")
text = gsub(text, "(...)kʌn ", "%1kʌnʌ ")
text = gsub(text, "nʌʌ$", "nʌ")
text = gsub(text, "ä̤ː̃", "ä̤̃ː")
text = gsub(text, "nɡj", "ŋɡj")
return mw.ustring.toNFC(text)
end
return export