local u = mw.ustring.char
local data = {}
-- Codepoint ranges (start, end).
-- Note: does not use subtables to save memory.
data.ranges = {
0x3007, 0x3007,
0x3400, 0x4DBF,
0x4E00, 0x9FFF,
0xF900, 0xFA6D,
0xFA70, 0xFAD9,
0x20000, 0x2A6DF,
0x2A700, 0x2B739,
0x2B740, 0x2B81D,
0x2B820, 0x2CEA1,
0x2CEB0, 0x2EBE0,
0x2EBF0, 0x2EE5D,
0x2F800, 0x2FA1D,
0x30000, 0x3134A,
0x31350, 0x323AF
}
data.ranges.n = #data.ranges
-- Characters not included in Unicode, which must be described using IDS.
data.unsupported = {
["⿰丿丨"] = "丿01",
["⿱𠆢𬼽"] = "人03",
["⿱⿻丅⿱冖⿰丶丶双"] = "冖08",
["⿱⿻丅⿱冖⿰丶丶令"] = "冖09",
["⿰十⿺专丶"] = "十05",
["⿰土肅"] = "土13",
["⿰⿸声耳殳"] = "士14",
["⿻㇒夫"] = "大02",
["⿰女人"] = "女02",
["⿱女子"] = "女03",
["⿱𡩧⿺進⿰貝招"] = "宀37",
["⿰扌幸"] = "手08",
["⿰扌𦍒"] = "手09",
["⿰车匡"] = "手10車06",
["⿱犬一"] = "犬01",
["⿰男也"] = "田05",
["⿰纟恋"] = "糹10",
["⿱䒑合"] = "艸06",
["⿰⿳⿰SIR木阝"] = "邑11",
["⿳⻗人𰆊"] = "雨04",
["⿱成龙"] = "龍06",
["⿱成龍"] = "龍06",
}
-- IDS characters paired to the number of characters which must follow them.
data.ids = {
["⿰"] = 2, -- left-to-right
["⿱"] = 2, -- above-to-below
["⿲"] = 3, -- left-to-middle and right
["⿳"] = 3, -- above-to-middle and below
["⿴"] = 2, -- full surround
["⿵"] = 2, -- surround from above
["⿶"] = 2, -- surround from below
["⿷"] = 2, -- surround from left
["⿸"] = 2, -- surround from upper left
["⿹"] = 2, -- surround from upper right
["⿺"] = 2, -- surround from lower left
["⿻"] = 2, -- overlaid
[""] = 2, -- surround from right
[""] = 2, -- surround from lower right
[""] = 1, -- horizontal reflection
[""] = 1, -- rotation
[""] = 1 -- subtraction
}
data.preconvert = {
-- Enclosed CJK Letters and Months
["㈠"] = "一",
["㈡"] = "二",
["㈢"] = "三",
["㈣"] = "四",
["㈤"] = "五",
["㈥"] = "六",
["㈦"] = "七",
["㈧"] = "八",
["㈨"] = "九",
["㈩"] = "十",
["㈪"] = "月",
["㈫"] = "火",
["㈬"] = "水",
["㈭"] = "木",
["㈮"] = "金",
["㈯"] = "土",
["㈰"] = "日",
["㈱"] = "株",
["㈲"] = "有",
["㈳"] = "社",
["㈴"] = "名",
["㈵"] = "特",
["㈶"] = "財",
["㈷"] = "祝",
["㈸"] = "労",
["㈹"] = "代",
["㈺"] = "呼",
["㈻"] = "学",
["㈼"] = "監",
["㈽"] = "企",
["㈾"] = "資",
["㈿"] = "協",
["㉀"] = "祭",
["㉁"] = "休",
["㉂"] = "自",
["㉃"] = "至",
["㉄"] = "問",
["㉅"] = "幼",
["㉆"] = "文",
["㉇"] = "箏",
["㊀"] = "一",
["㊁"] = "二",
["㊂"] = "三",
["㊃"] = "四",
["㊄"] = "五",
["㊅"] = "六",
["㊆"] = "七",
["㊇"] = "八",
["㊈"] = "九",
["㊉"] = "十",
["㊊"] = "月",
["㊋"] = "火",
["㊌"] = "水",
["㊍"] = "木",
["㊎"] = "金",
["㊏"] = "土",
["㊐"] = "日",
["㊑"] = "株",
["㊒"] = "有",
["㊓"] = "社",
["㊔"] = "名",
["㊕"] = "特",
["㊖"] = "財",
["㊗"] = "祝",
["㊘"] = "労",
["㊙"] = "秘",
["㊚"] = "男",
["㊛"] = "女",
["㊜"] = "適",
["㊝"] = "優",
["㊞"] = "印",
["㊟"] = "注",
["㊠"] = "項",
["㊡"] = "休",
["㊢"] = "写",
["㊣"] = "正",
["㊤"] = "上",
["㊥"] = "中",
["㊦"] = "下",
["㊧"] = "左",
["㊨"] = "右",
["㊩"] = "医",
["㊪"] = "宗",
["㊫"] = "学",
["㊬"] = "監",
["㊭"] = "企",
["㊮"] = "資",
["㊯"] = "協",
["㊰"] = "夜",
["㋿"] = "令和",
-- CJK Compatibility
["㍻"] = "平成",
["㍼"] = "昭和",
["㍽"] = "大正",
["㍾"] = "明治",
["㍿"] = "株式会社",
-- Enclosed Ideographic Supplement
["🈐"] = "手",
["🈑"] = "字",
["🈒"] = "双",
["🈔"] = "二",
["🈕"] = "多",
["🈖"] = "解",
["🈗"] = "天",
["🈘"] = "交",
["🈙"] = "映",
["🈚"] = "無",
["🈛"] = "料",
["🈜"] = "前",
["🈝"] = "後",
["🈞"] = "再",
["🈟"] = "新",
["🈠"] = "初",
["🈡"] = "終",
["🈢"] = "生",
["🈣"] = "販",
["🈤"] = "声",
["🈥"] = "吹",
["🈦"] = "演",
["🈧"] = "投",
["🈨"] = "捕",
["🈩"] = "一",
["🈪"] = "三",
["🈫"] = "遊",
["🈬"] = "左",
["🈭"] = "中",
["🈮"] = "右",
["🈯"] = "指",
["🈰"] = "走",
["🈱"] = "打",
["🈲"] = "禁",
["🈳"] = "空",
["🈴"] = "合",
["🈵"] = "満",
["🈶"] = "有",
["🈷"] = "月",
["🈸"] = "申",
["🈹"] = "割",
["🈺"] = "営",
["🈻"] = "配",
["🉀"] = "本",
["🉁"] = "三",
["🉂"] = "二",
["🉃"] = "安",
["🉄"] = "点",
["🉅"] = "打",
["🉆"] = "盗",
["🉇"] = "勝",
["🉈"] = "敗",
["🉐"] = "得",
["🉑"] = "可",
["🉠"] = "福",
["🉡"] = "祿",
["🉢"] = "壽",
["🉣"] = "喜",
["🉤"] = "囍",
["🉥"] = "財",
}
local function add_sequences(from, to, offset, char)
for i = from, to do
local k = u(i)
local v = (i - from + offset) .. char
data.preconvert[k] = v
end
end
add_sequences(0x32C0, 0x32CB, 1, "月")
add_sequences(0x3358, 0x3370, 0, "点")
add_sequences(0x33E0, 0x33FE, 1, "日")
data.radicals = {
"一", "丨", "丶", "丿", "乙", "亅", "二", "亠", "人", "儿", "入", "八", "冂", "冖", "冫", "几", "凵", "刀", "力", "勹", "匕", "匚", "匸", "十", "卜", "卩", "厂", "厶", "又", "口", "囗", "土", "士", "夂", "夊", "夕", "大", "女", "子", "宀", "寸", "小", "尢", "尸", "屮", "山", "巛", "工", "己", "巾", "干", "幺", "广", "廴", "廾", "弋", "弓", "彐", "彡", "彳", "心", "戈", "戶", "手", "支", "攴", "文", "斗", "斤", "方", "无", "日", "曰", "月", "木", "欠", "止", "歹", "殳", "毋", "比", "毛", "氏", "气", "水", "火", "爪", "父", "爻", "爿", "片", "牙", "牛", "犬", "玄", "玉", "瓜", "瓦", "甘", "生", "用", "田", "疋", "疒", "癶", "白", "皮", "皿", "目", "矛", "矢", "石", "示", "禸", "禾", "穴", "立", "竹", "米", "糸", "缶", "网", "羊", "羽", "老", "而", "耒", "耳", "聿", "肉", "臣", "自", "至", "臼", "舌", "舛", "舟", "艮", "色", "艸", "虍", "虫", "血", "行", "衣", "襾", "見", "角", "言", "谷", "豆", "豕", "豸", "貝", "赤", "走", "足", "身", "車", "辛", "辰", "辵", "邑", "酉", "釆", "里", "金", "長", "門", "阜", "隶", "隹", "雨", "靑", "非", "面", "革", "韋", "韭", "音", "頁", "風", "飛", "食", "首", "香", "馬", "骨", "高", "髟", "鬥", "鬯", "鬲", "鬼", "魚", "鳥", "鹵", "鹿", "麥", "麻", "黃", "黍", "黑", "黹", "黽", "鼎", "鼓", "鼠", "鼻", "齊", "齒", "龍", "龜", "龠"
}
local function add_radicals(radicals)
for k, v in pairs(radicals) do
data.preconvert[k] = data.radicals[v]
end
end
-- Kangxi radicals
add_radicals{
["⼀"] = 1, ["⼁"] = 2, ["⼂"] = 3, ["⼃"] = 4, ["⼄"] = 5,
["⼅"] = 6, ["⼆"] = 7, ["⼇"] = 8, ["⼈"] = 9, ["⼉"] = 10,
["⼊"] = 11, ["⼋"] = 12, ["⼌"] = 13, ["⼍"] = 14, ["⼎"] = 15,
["⼏"] = 16, ["⼐"] = 17, ["⼑"] = 18, ["⼒"] = 19, ["⼓"] = 20,
["⼔"] = 21, ["⼕"] = 22, ["⼖"] = 23, ["⼗"] = 24, ["⼘"] = 25,
["⼙"] = 26, ["⼚"] = 27, ["⼛"] = 28, ["⼜"] = 29, ["⼝"] = 30,
["⼞"] = 31, ["⼟"] = 32, ["⼠"] = 33, ["⼡"] = 34, ["⼢"] = 35,
["⼣"] = 36, ["⼤"] = 37, ["⼥"] = 38, ["⼦"] = 39, ["⼧"] = 40,
["⼨"] = 41, ["⼩"] = 42, ["⼪"] = 43, ["⼫"] = 44, ["⼬"] = 45,
["⼭"] = 46, ["⼮"] = 47, ["⼯"] = 48, ["⼰"] = 49, ["⼱"] = 50,
["⼲"] = 51, ["⼳"] = 52, ["⼴"] = 53, ["⼵"] = 54, ["⼶"] = 55,
["⼷"] = 56, ["⼸"] = 57, ["⼹"] = 58, ["⼺"] = 59, ["⼻"] = 60,
["⼼"] = 61, ["⼽"] = 62, ["⼾"] = 63, ["⼿"] = 64, ["⽀"] = 65,
["⽁"] = 66, ["⽂"] = 67, ["⽃"] = 68, ["⽄"] = 69, ["⽅"] = 70,
["⽆"] = 71, ["⽇"] = 72, ["⽈"] = 73, ["⽉"] = 74, ["⽊"] = 75,
["⽋"] = 76, ["⽌"] = 77, ["⽍"] = 78, ["⽎"] = 79, ["⽏"] = 80,
["⽐"] = 81, ["⽑"] = 82, ["⽒"] = 83, ["⽓"] = 84, ["⽔"] = 85,
["⽕"] = 86, ["⽖"] = 87, ["⽗"] = 88, ["⽘"] = 89, ["⽙"] = 90,
["⽚"] = 91, ["⽛"] = 92, ["⽜"] = 93, ["⽝"] = 94, ["⽞"] = 95,
["⽟"] = 96, ["⽠"] = 97, ["⽡"] = 98, ["⽢"] = 99, ["⽣"] = 100,
["⽤"] = 101, ["⽥"] = 102, ["⽦"] = 103, ["⽧"] = 104, ["⽨"] = 105,
["⽩"] = 106, ["⽪"] = 107, ["⽫"] = 108, ["⽬"] = 109, ["⽭"] = 110,
["⽮"] = 111, ["⽯"] = 112, ["⽰"] = 113, ["⽱"] = 114, ["⽲"] = 115,
["⽳"] = 116, ["⽴"] = 117, ["⽵"] = 118, ["⽶"] = 119, ["⽷"] = 120,
["⽸"] = 121, ["⽹"] = 122, ["⽺"] = 123, ["⽻"] = 124, ["⽼"] = 125,
["⽽"] = 126, ["⽾"] = 127, ["⽿"] = 128, ["⾀"] = 129, ["⾁"] = 130,
["⾂"] = 131, ["⾃"] = 132, ["⾄"] = 133, ["⾅"] = 134, ["⾆"] = 135,
["⾇"] = 136, ["⾈"] = 137, ["⾉"] = 138, ["⾊"] = 139, ["⾋"] = 140,
["⾌"] = 141, ["⾍"] = 142, ["⾎"] = 143, ["⾏"] = 144, ["⾐"] = 145,
["⾑"] = 146, ["⾒"] = 147, ["⾓"] = 148, ["⾔"] = 149, ["⾕"] = 150,
["⾖"] = 151, ["⾗"] = 152, ["⾘"] = 153, ["⾙"] = 154, ["⾚"] = 155,
["⾛"] = 156, ["⾜"] = 157, ["⾝"] = 158, ["⾞"] = 159, ["⾟"] = 160,
["⾠"] = 161, ["⾡"] = 162, ["⾢"] = 163, ["⾣"] = 164, ["⾤"] = 165,
["⾥"] = 166, ["⾦"] = 167, ["⾧"] = 168, ["⾨"] = 169, ["⾩"] = 170,
["⾪"] = 171, ["⾫"] = 172, ["⾬"] = 173, ["⾭"] = 174, ["⾮"] = 175,
["⾯"] = 176, ["⾰"] = 177, ["⾱"] = 178, ["⾲"] = 179, ["⾳"] = 180,
["⾴"] = 181, ["⾵"] = 182, ["⾶"] = 183, ["⾷"] = 184, ["⾸"] = 185,
["⾹"] = 186, ["⾺"] = 187, ["⾻"] = 188, ["⾼"] = 189, ["⾽"] = 190,
["⾾"] = 191, ["⾿"] = 192, ["⿀"] = 193, ["⿁"] = 194, ["⿂"] = 195,
["⿃"] = 196, ["⿄"] = 197, ["⿅"] = 198, ["⿆"] = 199, ["⿇"] = 200,
["⿈"] = 201, ["⿉"] = 202, ["⿊"] = 203, ["⿋"] = 204, ["⿌"] = 205,
["⿍"] = 206, ["⿎"] = 207, ["⿏"] = 208, ["⿐"] = 209, ["⿑"] = 210,
["⿒"] = 211, ["⿓"] = 212, ["⿔"] = 213, ["⿕"] = 214
}
-- CJK Radicals Supplement
add_radicals{
["⺀"] = 3, ["⺁"] = 27, ["⺂"] = 5, ["⺃"] = 5, ["⺄"] = 5,
["⺅"] = 9, ["⺆"] = 13, ["⺇"] = 16,["⺈"] = 18, ["⺉"] = 18,
["⺊"] = 25, ["⺋"] = 26, ["⺌"] = 42, ["⺍"] = 42, ["⺎"] = 43,
["⺏"] = 43, ["⺐"] = 43, ["⺑"] = 43, ["⺒"] = 49, ["⺓"] = 52,
["⺔"] = 58, ["⺕"] = 58, ["⺖"] = 61, ["⺗"] = 61, ["⺘"] = 64,
["⺙"] = 66, ["⺛"] = 71, ["⺜"] = 72, ["⺝"] = 74, ["⺞"] = 78,
["⺟"] = 80, ["⺠"] = 83, ["⺡"] = 85, ["⺢"] = 85, ["⺣"] = 86,
["⺤"] = 87, ["⺥"] = 87, ["⺦"] = 90, ["⺧"] = 93, ["⺨"] = 94,
["⺩"] = 96, ["⺪"] = 103, ["⺫"] = 109, ["⺬"] = 113, ["⺭"] = 113,
["⺮"] = 118, ["⺯"] = 120, ["⺰"] = 120, ["⺱"] = 122, ["⺲"] = 109,
["⺳"] = 122, ["⺴"] = 122, ["⺵"] = 122, ["⺶"] = 123, ["⺷"] = 123,
["⺸"] = 123, ["⺹"] = 125, ["⺺"] = 129, ["⺻"] = 129, ["⺼"] = 130,
["⺽"] = 134, ["⺾"] = 140, ["⺿"] = 140, ["⻀"] = 140, ["⻁"] = 141,
["⻂"] = 145, ["⻃"] = 146, ["⻄"] = 146, ["⻅"] = 147, ["⻆"] = 148,
["⻇"] = 148, ["⻈"] = 149, ["⻉"] = 154, ["⻊"] = 157, ["⻋"] = 159,
["⻌"] = 162, ["⻍"] = 162, ["⻎"] = 162, ["⻏"] = 163, ["⻐"] = 167,
["⻑"] = 168, ["⻒"] = 168, ["⻓"] = 168, ["⻔"] = 169, ["⻕"] = 170,
["⻖"] = 170, ["⻗"] = 173, ["⻘"] = 174, ["⻙"] = 178, ["⻚"] = 181,
["⻛"] = 182, ["⻜"] = 183, ["⻝"] = 184, ["⻞"] = 184, ["⻟"] = 184,
["⻠"] = 184, ["⻡"] = 185, ["⻢"] = 187, ["⻣"] = 188, ["⻤"] = 194,
["⻥"] = 195, ["⻦"] = 196, ["⻧"] = 197, ["⻨"] = 199, ["⻩"] = 201,
["⻪"] = 205, ["⻫"] = 210, ["⻬"] = 210, ["⻭"] = 211, ["⻮"] = 211,
["⻯"] = 212, ["⻰"] = 212, ["⻱"] = 213, ["⻲"] = 213, ["⻳"] = 213
}
return data