Module:Hani-sortkey/data/core

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local u = mw.ustring.char

local data = {}

-- Codepoint ranges (start, end).
-- Note: does not use subtables to save memory.
data.ranges = {
	0x3007, 0x3007,
	0x3400, 0x4DBF,
	0x4E00, 0x9FFF,
	0xF900, 0xFA6D,
	0xFA70, 0xFAD9,
	0x20000, 0x2A6DF,
	0x2A700, 0x2B739,
	0x2B740, 0x2B81D,
	0x2B820, 0x2CEA1,
	0x2CEB0, 0x2EBE0,
	0x2EBF0, 0x2EE5D,
	0x2F800, 0x2FA1D,
	0x30000, 0x3134A,
	0x31350, 0x323AF
}
data.ranges.n = #data.ranges

-- Characters not included in Unicode, which must be described using IDS.
data.unsupported = {
	["⿰丿丨"] = "丿01",
	["⿱𠆢𬼽"] = "人03",
	["⿱北共"] = "八09",
	["⿱⿻丅⿱冖⿰丶丶双"] = "冖08",
	["⿱⿻丅⿱冖⿰丶丶令"] = "冖09",
	["⿰十⿺专丶"] = "十05",
	["⿰土肅"] = "土13",
	["⿰⿸声耳殳"] = "士14",
	["⿻㇒夫"] = "大02",
	["⿰女人"] = "女02",
	["⿱女子"] = "女03",
	["⿱𡩧⿺進⿰貝招"] = "宀37",
	["⿰扌幸"] = "手08",
	["⿰扌𦍒"] = "手09",
	["⿰车匡"] = "手10車06",
	["⿱犬一"] = "犬01",
	["⿰男也"] = "田05",
	["⿰纟恋"] = "糹10",
	["⿱䒑合"] = "艸06",
	["⿰⿳⿰SIR木阝"] = "邑11",
	["⿳⻗人𰆊"] = "雨04",
	["⿱成龙"] = "龍06",
	["⿱成龍"] = "龍06",
}

-- IDS characters paired to the number of characters which must follow them.
data.ids = {
	["⿰"] = 2,	-- left-to-right
	["⿱"] = 2,	-- above-to-below
	["⿲"] = 3,	-- left-to-middle and right
	["⿳"] = 3,	-- above-to-middle and below
	["⿴"] = 2,	-- full surround
	["⿵"] = 2,	-- surround from above
	["⿶"] = 2,	-- surround from below
	["⿷"] = 2,	-- surround from left
	["⿸"] = 2,	-- surround from upper left
	["⿹"] = 2,	-- surround from upper right
	["⿺"] = 2,	-- surround from lower left
	["⿻"] = 2,	-- overlaid
	["⿼"] = 2,	-- surround from right
	["⿽"] = 2,	-- surround from lower right
	["⿾"] = 1,	-- horizontal reflection
	["⿿"] = 1,	-- rotation
	["〾"] = 1, -- variant but not equivalent
	["㇯"] = 2	-- subtraction
}

data.preconvert = {
	-- Enclosed CJK Letters and Months
	["㈠"] = "一",
	["㈡"] = "二",
	["㈢"] = "三",
	["㈣"] = "四",
	["㈤"] = "五",
	["㈥"] = "六",
	["㈦"] = "七",
	["㈧"] = "八",
	["㈨"] = "九",
	["㈩"] = "十",
	["㈪"] = "月",
	["㈫"] = "火",
	["㈬"] = "水",
	["㈭"] = "木",
	["㈮"] = "金",
	["㈯"] = "土",
	["㈰"] = "日",
	["㈱"] = "株",
	["㈲"] = "有",
	["㈳"] = "社",
	["㈴"] = "名",
	["㈵"] = "特",
	["㈶"] = "財",
	["㈷"] = "祝",
	["㈸"] = "労",
	["㈹"] = "代",
	["㈺"] = "呼",
	["㈻"] = "学",
	["㈼"] = "監",
	["㈽"] = "企",
	["㈾"] = "資",
	["㈿"] = "協",
	["㉀"] = "祭",
	["㉁"] = "休",
	["㉂"] = "自",
	["㉃"] = "至",
	["㉄"] = "問",
	["㉅"] = "幼",
	["㉆"] = "文",
	["㉇"] = "箏",
	["㊀"] = "一",
	["㊁"] = "二",
	["㊂"] = "三",
	["㊃"] = "四",
	["㊄"] = "五",
	["㊅"] = "六",
	["㊆"] = "七",
	["㊇"] = "八",
	["㊈"] = "九",
	["㊉"] = "十",
	["㊊"] = "月",
	["㊋"] = "火",
	["㊌"] = "水",
	["㊍"] = "木",
	["㊎"] = "金",
	["㊏"] = "土",
	["㊐"] = "日",
	["㊑"] = "株",
	["㊒"] = "有",
	["㊓"] = "社",
	["㊔"] = "名",
	["㊕"] = "特",
	["㊖"] = "財",
	["㊗"] = "祝",
	["㊘"] = "労",
	["㊙"] = "秘",
	["㊚"] = "男",
	["㊛"] = "女",
	["㊜"] = "適",
	["㊝"] = "優",
	["㊞"] = "印",
	["㊟"] = "注",
	["㊠"] = "項",
	["㊡"] = "休",
	["㊢"] = "写",
	["㊣"] = "正",
	["㊤"] = "上",
	["㊥"] = "中",
	["㊦"] = "下",
	["㊧"] = "左",
	["㊨"] = "右",
	["㊩"] = "医",
	["㊪"] = "宗",
	["㊫"] = "学",
	["㊬"] = "監",
	["㊭"] = "企",
	["㊮"] = "資",
	["㊯"] = "協",
	["㊰"] = "夜",
	["㋿"] = "令和",
	-- CJK Compatibility
	["㍻"] = "平成",
	["㍼"] = "昭和",
	["㍽"] = "大正",
	["㍾"] = "明治",
	["㍿"] = "株式会社",
	-- Enclosed Ideographic Supplement
	["🈐"] = "手",
	["🈑"] = "字",
	["🈒"] = "双",
	["🈔"] = "二",
	["🈕"] = "多",
	["🈖"] = "解",
	["🈗"] = "天",
	["🈘"] = "交",
	["🈙"] = "映",
	["🈚"] = "無",
	["🈛"] = "料",
	["🈜"] = "前",
	["🈝"] = "後",
	["🈞"] = "再",
	["🈟"] = "新",
	["🈠"] = "初",
	["🈡"] = "終",
	["🈢"] = "生",
	["🈣"] = "販",
	["🈤"] = "声",
	["🈥"] = "吹",
	["🈦"] = "演",
	["🈧"] = "投",
	["🈨"] = "捕",
	["🈩"] = "一",
	["🈪"] = "三",
	["🈫"] = "遊",
	["🈬"] = "左",
	["🈭"] = "中",
	["🈮"] = "右",
	["🈯"] = "指",
	["🈰"] = "走",
	["🈱"] = "打",
	["🈲"] = "禁",
	["🈳"] = "空",
	["🈴"] = "合",
	["🈵"] = "満",
	["🈶"] = "有",
	["🈷"] = "月",
	["🈸"] = "申",
	["🈹"] = "割",
	["🈺"] = "営",
	["🈻"] = "配",
	["🉀"] = "本",
	["🉁"] = "三",
	["🉂"] = "二",
	["🉃"] = "安",
	["🉄"] = "点",
	["🉅"] = "打",
	["🉆"] = "盗",
	["🉇"] = "勝",
	["🉈"] = "敗",
	["🉐"] = "得",
	["🉑"] = "可",
	["🉠"] = "福",
	["🉡"] = "祿",
	["🉢"] = "壽",
	["🉣"] = "喜",
	["🉤"] = "囍",
	["🉥"] = "財",
}

local function add_sequences(from, to, offset, char)
	for i = from, to do
		local k = u(i)
		local v = (i - from + offset) .. char
		data.preconvert[k] = v
	end
end

add_sequences(0x32C0, 0x32CB, 1, "月")
add_sequences(0x3358, 0x3370, 0, "点")
add_sequences(0x33E0, 0x33FE, 1, "日")

data.radicals = {
	"一", "丨", "丶", "丿", "乙", "亅", "二", "亠", "人", "儿", "入", "八", "冂", "冖", "冫", "几", "凵", "刀", "力", "勹", "匕", "匚", "匸", "十", "卜", "卩", "厂", "厶", "又", "口", "囗", "土", "士", "夂", "夊", "夕", "大", "女", "子", "宀", "寸", "小", "尢", "尸", "屮", "山", "巛", "工", "己", "巾", "干", "幺", "广", "廴", "廾", "弋", "弓", "彐", "彡", "彳", "心", "戈", "戶", "手", "支", "攴", "文", "斗", "斤", "方", "无", "日", "曰", "月", "木", "欠", "止", "歹", "殳", "毋", "比", "毛", "氏", "气", "水", "火", "爪", "父", "爻", "爿", "片", "牙", "牛", "犬", "玄", "玉", "瓜", "瓦", "甘", "生", "用", "田", "疋", "疒", "癶", "白", "皮", "皿", "目", "矛", "矢", "石", "示", "禸", "禾", "穴", "立", "竹", "米", "糸", "缶", "网", "羊", "羽", "老", "而", "耒", "耳", "聿", "肉", "臣", "自", "至", "臼", "舌", "舛", "舟", "艮", "色", "艸", "虍", "虫", "血", "行", "衣", "襾", "見", "角", "言", "谷", "豆", "豕", "豸", "貝", "赤", "走", "足", "身", "車", "辛", "辰", "辵", "邑", "酉", "釆", "里", "金", "長", "門", "阜", "隶", "隹", "雨", "靑", "非", "面", "革", "韋", "韭", "音", "頁", "風", "飛", "食", "首", "香", "馬", "骨", "高", "髟", "鬥", "鬯", "鬲", "鬼", "魚", "鳥", "鹵", "鹿", "麥", "麻", "黃", "黍", "黑", "黹", "黽", "鼎", "鼓", "鼠", "鼻", "齊", "齒", "龍", "龜", "龠"
}

local function add_radicals(radicals)
	for k, v in pairs(radicals) do
		data.preconvert[k] = data.radicals[v]
	end
end

-- Kangxi radicals
add_radicals{
	["⼀"] = 1, ["⼁"] = 2, ["⼂"] = 3, ["⼃"] = 4, ["⼄"] = 5,
	["⼅"] = 6, ["⼆"] = 7, ["⼇"] = 8, ["⼈"] = 9, ["⼉"] = 10,
	["⼊"] = 11, ["⼋"] = 12, ["⼌"] = 13, ["⼍"] = 14, ["⼎"] = 15,
	["⼏"] = 16, ["⼐"] = 17, ["⼑"] = 18, ["⼒"] = 19, ["⼓"] = 20,
	["⼔"] = 21, ["⼕"] = 22, ["⼖"] = 23, ["⼗"] = 24, ["⼘"] = 25,
	["⼙"] = 26, ["⼚"] = 27, ["⼛"] = 28, ["⼜"] = 29, ["⼝"] = 30,
	["⼞"] = 31, ["⼟"] = 32, ["⼠"] = 33, ["⼡"] = 34, ["⼢"] = 35,
	["⼣"] = 36, ["⼤"] = 37, ["⼥"] = 38, ["⼦"] = 39, ["⼧"] = 40,
	["⼨"] = 41, ["⼩"] = 42, ["⼪"] = 43, ["⼫"] = 44, ["⼬"] = 45,
	["⼭"] = 46, ["⼮"] = 47, ["⼯"] = 48, ["⼰"] = 49, ["⼱"] = 50,
	["⼲"] = 51, ["⼳"] = 52, ["⼴"] = 53, ["⼵"] = 54, ["⼶"] = 55,
	["⼷"] = 56, ["⼸"] = 57, ["⼹"] = 58, ["⼺"] = 59, ["⼻"] = 60,
	["⼼"] = 61, ["⼽"] = 62, ["⼾"] = 63, ["⼿"] = 64, ["⽀"] = 65,
	["⽁"] = 66, ["⽂"] = 67, ["⽃"] = 68, ["⽄"] = 69, ["⽅"] = 70,
	["⽆"] = 71, ["⽇"] = 72, ["⽈"] = 73, ["⽉"] = 74, ["⽊"] = 75,
	["⽋"] = 76, ["⽌"] = 77, ["⽍"] = 78, ["⽎"] = 79, ["⽏"] = 80,
	["⽐"] = 81, ["⽑"] = 82, ["⽒"] = 83, ["⽓"] = 84, ["⽔"] = 85,
	["⽕"] = 86, ["⽖"] = 87, ["⽗"] = 88, ["⽘"] = 89, ["⽙"] = 90,
	["⽚"] = 91, ["⽛"] = 92, ["⽜"] = 93, ["⽝"] = 94, ["⽞"] = 95,
	["⽟"] = 96, ["⽠"] = 97, ["⽡"] = 98, ["⽢"] = 99, ["⽣"] = 100,
	["⽤"] = 101, ["⽥"] = 102, ["⽦"] = 103, ["⽧"] = 104, ["⽨"] = 105,
	["⽩"] = 106, ["⽪"] = 107, ["⽫"] = 108, ["⽬"] = 109, ["⽭"] = 110,
	["⽮"] = 111, ["⽯"] = 112, ["⽰"] = 113, ["⽱"] = 114, ["⽲"] = 115,
	["⽳"] = 116, ["⽴"] = 117, ["⽵"] = 118, ["⽶"] = 119, ["⽷"] = 120,
	["⽸"] = 121, ["⽹"] = 122, ["⽺"] = 123, ["⽻"] = 124, ["⽼"] = 125,
	["⽽"] = 126, ["⽾"] = 127, ["⽿"] = 128, ["⾀"] = 129, ["⾁"] = 130,
	["⾂"] = 131, ["⾃"] = 132, ["⾄"] = 133, ["⾅"] = 134, ["⾆"] = 135,
	["⾇"] = 136, ["⾈"] = 137, ["⾉"] = 138, ["⾊"] = 139, ["⾋"] = 140,
	["⾌"] = 141, ["⾍"] = 142, ["⾎"] = 143, ["⾏"] = 144, ["⾐"] = 145,
	["⾑"] = 146, ["⾒"] = 147, ["⾓"] = 148, ["⾔"] = 149, ["⾕"] = 150,
	["⾖"] = 151, ["⾗"] = 152, ["⾘"] = 153, ["⾙"] = 154, ["⾚"] = 155,
	["⾛"] = 156, ["⾜"] = 157, ["⾝"] = 158, ["⾞"] = 159, ["⾟"] = 160,
	["⾠"] = 161, ["⾡"] = 162, ["⾢"] = 163, ["⾣"] = 164, ["⾤"] = 165,
	["⾥"] = 166, ["⾦"] = 167, ["⾧"] = 168, ["⾨"] = 169, ["⾩"] = 170,
	["⾪"] = 171, ["⾫"] = 172, ["⾬"] = 173, ["⾭"] = 174, ["⾮"] = 175,
	["⾯"] = 176, ["⾰"] = 177, ["⾱"] = 178, ["⾲"] = 179, ["⾳"] = 180,
	["⾴"] = 181, ["⾵"] = 182, ["⾶"] = 183, ["⾷"] = 184, ["⾸"] = 185,
	["⾹"] = 186, ["⾺"] = 187, ["⾻"] = 188, ["⾼"] = 189, ["⾽"] = 190,
	["⾾"] = 191, ["⾿"] = 192, ["⿀"] = 193, ["⿁"] = 194, ["⿂"] = 195,
	["⿃"] = 196, ["⿄"] = 197, ["⿅"] = 198, ["⿆"] = 199, ["⿇"] = 200,
	["⿈"] = 201, ["⿉"] = 202, ["⿊"] = 203, ["⿋"] = 204, ["⿌"] = 205,
	["⿍"] = 206, ["⿎"] = 207, ["⿏"] = 208, ["⿐"] = 209, ["⿑"] = 210,
	["⿒"] = 211, ["⿓"] = 212, ["⿔"] = 213, ["⿕"] = 214
}

-- CJK Radicals Supplement
add_radicals{
	["⺀"] = 3, ["⺁"] = 27, ["⺂"] = 5, ["⺃"] = 5, ["⺄"] = 5,
	["⺅"] = 9, ["⺆"] = 13, ["⺇"] = 16,["⺈"] = 18, ["⺉"] = 18,
	["⺊"] = 25, ["⺋"] = 26, ["⺌"] = 42, ["⺍"] = 42, ["⺎"] = 43,
	["⺏"] = 43, ["⺐"] = 43, ["⺑"] = 43, ["⺒"] = 49, ["⺓"] = 52,
	["⺔"] = 58, ["⺕"] = 58, ["⺖"] = 61, ["⺗"] = 61, ["⺘"] = 64,
	["⺙"] = 66, ["⺛"] = 71, ["⺜"] = 72, ["⺝"] = 74, ["⺞"] = 78,
	["⺟"] = 80, ["⺠"] = 83, ["⺡"] = 85, ["⺢"] = 85, ["⺣"] = 86,
	["⺤"] = 87, ["⺥"] = 87, ["⺦"] = 90, ["⺧"] = 93, ["⺨"] = 94,
	["⺩"] = 96, ["⺪"] = 103, ["⺫"] = 109, ["⺬"] = 113, ["⺭"] = 113,
	["⺮"] = 118, ["⺯"] = 120, ["⺰"] = 120, ["⺱"] = 122, ["⺲"] = 109,
	["⺳"] = 122, ["⺴"] = 122, ["⺵"] = 122, ["⺶"] = 123, ["⺷"] = 123,
	["⺸"] = 123, ["⺹"] = 125, ["⺺"] = 129, ["⺻"] = 129, ["⺼"] = 130,
	["⺽"] = 134, ["⺾"] = 140, ["⺿"] = 140, ["⻀"] = 140, ["⻁"] = 141,
	["⻂"] = 145, ["⻃"] = 146, ["⻄"] = 146, ["⻅"] = 147, ["⻆"] = 148,
	["⻇"] = 148, ["⻈"] = 149, ["⻉"] = 154, ["⻊"] = 157, ["⻋"] = 159,
	["⻌"] = 162, ["⻍"] = 162, ["⻎"] = 162, ["⻏"] = 163, ["⻐"] = 167,
	["⻑"] = 168, ["⻒"] = 168, ["⻓"] = 168, ["⻔"] = 169, ["⻕"] = 170,
	["⻖"] = 170, ["⻗"] = 173, ["⻘"] = 174, ["⻙"] = 178, ["⻚"] = 181,
	["⻛"] = 182, ["⻜"] = 183, ["⻝"] = 184, ["⻞"] = 184, ["⻟"] = 184,
	["⻠"] = 184, ["⻡"] = 185, ["⻢"] = 187, ["⻣"] = 188, ["⻤"] = 194,
	["⻥"] = 195, ["⻦"] = 196, ["⻧"] = 197, ["⻨"] = 199, ["⻩"] = 201,
	["⻪"] = 205, ["⻫"] = 210, ["⻬"] = 210, ["⻭"] = 211, ["⻮"] = 211,
	["⻯"] = 212, ["⻰"] = 212, ["⻱"] = 213, ["⻲"] = 213, ["⻳"] = 213
}

return data