Module:Mymr-sortkey
Jump to navigation
Jump to search
- The following documentation is located at Module:Mymr-sortkey/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This is UCA-like sortkey module using PUA to rearrange. It focuses on Myanmar (Burmese) script that is disorderedly used by many languages.
- UCA = Unicode Collation Algorithm. Data from https://unicode.org/reports/tr10/#Allkeys and grep for "MYANMAR".
- PUA = Private Use Area -- a Unicode block.
local export = {}
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local find = mw.ustring.find
local sub = mw.ustring.sub
local glyphTable = {
["ံ"] = u(0xE000), ["း"] = u(0xE001), ["့"] = u(0xE002),
["၊"] = u(0xE010), ["။"] = u(0xE011), ["၌"] = u(0xE012), ["၍"] = u(0xE013),
["၎"] = u(0xE014), ["၏"] = u(0xE015), ["႞"] = u(0xE016), ["႟"] = u(0xE017),
["꩷"] = u(0xE018), ["꩸"] = u(0xE019), ["꩹"] = u(0xE01A), ["ꧦ"] = u(0xE01B), ["ꩰ"] = u(0xE01C),
["၀"] = u(0xE020), ["႐"] = u(0xE021), ["꧰"] = u(0xE022), ["၁"] = u(0xE023), ["႑"] = u(0xE024), ["꧱"] = u(0xE025),
["၂"] = u(0xE026), ["႒"] = u(0xE027), ["꧲"] = u(0xE028), ["၃"] = u(0xE029), ["႓"] = u(0xE02A), ["꧳"] = u(0xE02B),
["၄"] = u(0xE02C), ["႔"] = u(0xE02D), ["꧴"] = u(0xE02E), ["၅"] = u(0xE02F), ["႕"] = u(0xE030), ["꧵"] = u(0xE031),
["၆"] = u(0xE032), ["႖"] = u(0xE033), ["꧶"] = u(0xE034), ["၇"] = u(0xE035), ["႗"] = u(0xE036), ["꧷"] = u(0xE037),
["၈"] = u(0xE038), ["႘"] = u(0xE039), ["꧸"] = u(0xE03A), ["၉"] = u(0xE03B), ["႙"] = u(0xE03C), ["꧹"] = u(0xE03D),
["က"] = u(0xE040), ["ၵ"] = u(0xE041), ["ခ"] = u(0xE042), ["ၶ"] = u(0xE043),
["ဂ"] = u(0xE044), ["ၷ"] = u(0xE045), ["ꩠ"] = u(0xE046), ["ꧩ"] = u(0xE047),
["ဃ"] = u(0xE048), ["ꧠ"] = u(0xE049), ["ꧪ"] = u(0xE04A), ["င"] = u(0xE04B), ["ၚ"] = u(0xE04C),
["စ"] = u(0xE050), ["ၸ"] = u(0xE051), ["ꩡ"] = u(0xE052),
["ဆ"] = u(0xE053), ["ꧡ"] = u(0xE054), ["ꩢ"] = u(0xE055), ["ꩾ"] = u(0xE056),
["ဇ"] = u(0xE057), ["ꩣ"] = u(0xE058), ["ꧫ"] = u(0xE059), ["ၹ"] = u(0xE05A), ["ꩲ"] = u(0xE05B),
["ဈ"] = u(0xE05C), ["ၛ"] = u(0xE05D), ["ꧢ"] = u(0xE05E), ["ꩤ"] = u(0xE05F), ["ꧬ"] = u(0xE060),
["ၡ"] = u(0xE061), ["ꩿ"] = u(0xE062),
["ဉ"] = u(0xE063), ["ၺ"] = u(0xE064), ["ꩥ"] = u(0xE065), ["ꧧ"] = u(0xE066), ["ည"] = u(0xE067),
["ဋ"] = u(0xE070), ["ꩦ"] = u(0xE071), ["ဌ"] = u(0xE072), ["ꩧ"] = u(0xE073),
["ဍ"] = u(0xE074), ["ꩨ"] = u(0xE075), ["ꧭ"] = u(0xE076), ["ဎ"] = u(0xE077), ["ꩩ"] = u(0xE078), ["ꧮ"] = u(0xE079),
["ဏ"] = u(0xE07A), ["ၮ"] = u(0xE07B), ["ꧣ"] = u(0xE07C), ["ꧯ"] = u(0xE07D),
["တ"] = u(0xE080), ["ထ"] = u(0xE081), ["ဒ"] = u(0xE082), ["ၻ"] = u(0xE083), ["ꧻ"] = u(0xE084),
["ဓ"] = u(0xE085), ["ꩪ"] = u(0xE086), ["ꧼ"] = u(0xE087),
["န"] = u(0xE088), ["ၼ"] = u(0xE089), ["ꩫ"] = u(0xE08A), ["ၞ"] = u(0xE08B),
["ပ"] = u(0xE090), ["ဖ"] = u(0xE091), ["ၽ"] = u(0xE092),
["ၾ"] = u(0xE093), ["ꩯ"] = u(0xE094), ["ႎ"] = u(0xE095), ["ꧨ"] = u(0xE096),
["ဗ"] = u(0xE097), ["ၿ"] = u(0xE098), ["ꧽ"] = u(0xE099),
["ဘ"] = u(0xE09A), ["ꧤ"] = u(0xE09B), ["ꧾ"] = u(0xE09C), ["မ"] = u(0xE09D), ["ၟ"] = u(0xE09E),
["ယ"] = u(0xE0A0), ["ျ"] = u(0xE0A1), ["ရ"] = u(0xE0A2), ["ꩳ"] = u(0xE0A3), ["ꩺ"] = u(0xE0A4), ["ြ"] = u(0xE0A5),
["လ"] = u(0xE0A6), ["ၠ"] = u(0xE0A7), ["ဝ"] = u(0xE0A8), ["ွ"] = u(0xE0A9), ["ႂ"] = u(0xE0AA),
["ႀ"] = u(0xE0B0), ["ၐ"] = u(0xE0B1), ["ၑ"] = u(0xE0B2), ["ၥ"] = u(0xE0B3), ["သ"] = u(0xE0B4), ["ꩬ"] = u(0xE0B5),
["ဟ"] = u(0xE0B6), ["ႁ"] = u(0xE0B7), ["ꩭ"] = u(0xE0B8), ["ှ"] = u(0xE0B9), ["ꩮ"] = u(0xE0BA), ["ꩱ"] = u(0xE0BB),
["ဠ"] = u(0xE0BC), ["ꧺ"] = u(0xE0BD), ["ၜ"] = u(0xE0BE), ["ၝ"] = u(0xE0BF),
["ၯ"] = u(0xE0C0), ["ၰ"] = u(0xE0C1), ["ၦ"] = u(0xE0C2),
["အ"] = u(0xE0D0), ["ဢ"] = u(0xE0D1), ["ဣ"] = u(0xE0D2), ["ဤ"] = u(0xE0D3), ["ဥ"] = u(0xE0D4), ["ဦ"] = u(0xE0D5),
["ၒ"] = u(0xE0D6), ["ၓ"] = u(0xE0D7), ["ၔ"] = u(0xE0D8), ["ၕ"] = u(0xE0D9),
["ဧ"] = u(0xE0DA), ["ဨ"] = u(0xE0DB), ["ဩ"] = u(0xE0DC), ["ဪ"] = u(0xE0DD),
["ာ"] = u(0xE0E0), ["ါ"] = u(0xE0E1), ["ႃ"] = u(0xE0E2), ["ၲ"] = u(0xE0E3), ["ႜ"] = u(0xE0E4),
["ိ"] = u(0xE0E5), ["ၱ"] = u(0xE0E6), ["ီ"] = u(0xE0E7), ["ဳ"] = u(0xE0E8),
["ု"] = u(0xE0E9), ["ၳ"] = u(0xE0EA), ["ၴ"] = u(0xE0EB), ["ူ"] = u(0xE0EC),
["ၖ"] = u(0xE0F0), ["ၗ"] = u(0xE0F1), ["ၘ"] = u(0xE0F2), ["ၙ"] = u(0xE0F3),
["ေ"] = u(0xE0F4), ["ႄ"] = u(0xE0F5), ["ဵ"] = u(0xE0F6), ["ႅ"] = u(0xE0F7),
["ဲ"] = u(0xE0F8), ["ႝ"] = u(0xE0F9), ["ႆ"] = u(0xE0FA),
["ဴ"] = u(0xE0FB), ["ၢ"] = u(0xE0FC), ["ၧ"] = u(0xE0FD), ["ၨ"] = u(0xE0FE),
["ꧥ"] = u(0xE100), ["္"] = u(0xE101), ["်"] = u(0xE102), ["ၣ"] = u(0xE103), ["ၤ"] = u(0xE104),
["ၩ"] = u(0xE105), ["ၪ"] = u(0xE106), ["ၫ"] = u(0xE107), ["ၬ"] = u(0xE108), ["ၭ"] = u(0xE109),
["ႇ"] = u(0xE10A), ["ႋ"] = u(0xE10B), ["ႈ"] = u(0xE10C), ["ႌ"] = u(0xE10D), ["ႍ"] = u(0xE10E),
["ႉ"] = u(0xE10F), ["ႊ"] = u(0xE110), ["ႏ"] = u(0xE111), ["ႚ"] = u(0xE112), ["ႛ"] = u(0xE113),
["ꩻ"] = u(0xE114), ["ꩼ"] = u(0xE115), ["ꩽ"] = u(0xE116), ["ꩴ"] = u(0xE117), ["ꩵ"] = u(0xE118), ["ꩶ"] = u(0xE119),
}
function export.makeSortKey(text, lang, sc)
if sc and sc ~= "Mymr" then
return mw.ustring.upper(text)
end
if not text then
return nil
end
text = gsub(text, "ဥ".."ီ", "ဦ")
text = gsub(text, "ဿ", "သ္သ")
if lang == "shn" or lang == "tjl" or lang == "kht" then
text = gsub(text, "ၢ", u(0xE0E3)) -- vowel aa with final
end
text = gsub(text, ".", glyphTable)
return text
end
function export.showSorting(frame)
local terms = {}
for _, term in ipairs(frame.args) do
table.insert(terms, term)
end
local makeSortKey = require("Module:fun").memoize(export.makeSortKey)
local function comp(term1, term2)
return makeSortKey(term1) < makeSortKey(term2)
end
table.sort(terms, comp)
for i, term in pairs(terms) do
terms[i] = "\n* " .. term .. " (<code>" .. makeSortKey(term) .. "</code>)"
end
return table.concat(terms)
end
return export