local p = {} -- 转化为upvalue,提升速度 local type = type local ipairs = ipairs local concat = table.concat local u_find = mw.ustring.find local ruby_templates = { 'Photrans', 'Photrans2', 'Photransa', 'Ruby' } local ruby_template_indexes = {} for i, name in ipairs(ruby_templates) do ruby_template_indexes[name] = i end ---@param s string local function ucfirst(s) return s:sub(1, 1):upper()..s:sub(2) end local kanji_like_pattern = (function () local kanji_like_unicode_ranges = { { 0x2E80, 0x2EFF }, -- CJK部首补充 { 0x3005, 0x3007 }, -- "々、〆、〇" { 0x31C0, 0x31EF }, -- CJK笔画 { 0x3400, 0x4DBF }, -- CJK统一表意文字扩展A { 0x4E00, 0x9FFF }, -- CJK统一表意文字 { 0xF900, 0xFAFF }, -- CJK兼容表意文字 { 0x20000, 0x2A6DF }, -- CJK统一表意文字扩展B { 0x2A700, 0x2EE5F }, -- CJK统一表意文字扩展C-I { 0x2F800, 0x2FA1F }, -- CJK兼容表意文字补充 { 0x30000, 0x323AF }, -- CJK统一表意文字扩展G-H } local char = mw.ustring.char local parts = {} for i, range in ipairs(kanji_like_unicode_ranges) do parts[i] = type(range) == 'table' and char(range[1])..'-'..char(range[2]) or char(range) end return '['..concat(parts)..']' end)() local template_pattern = '{{%s*(%S[^{}|\n]-)%s*|('..kanji_like_pattern..'+)|([^{}|=\n]+)}}' ---获取出现最频繁的ruby模板名 ---@param code string ---@return string local function get_most_frequent_ruby_template_name(code) -- 统计代码中`ruby_templates`出现次数,选择出现次数最多的作为`template_name_for_writing` -- 代码中没有`ruby_templates`时,选择'Ruby' -- 次数并列时,选择靠前的一个 local count = {} for _, name in ipairs(ruby_templates) do count[name] = 0 end for template_name in mw.ustring.gmatch(code, template_pattern) do local capitalized = ucfirst(template_name) if count[capitalized] then count[capitalized] = count[capitalized] + 1 end end local order = {} for name, c in pairs(count) do order[#order+1] = { name = name, count = c } end table.sort(order, function (a, b) if a.count > b.count then return true end if a.count < b.count then return false end if ruby_template_indexes[a.name] < ruby_template_indexes[b.name] then return true end return false end) return order[1].name end ---@param code string ---@param template_name? string ---@return string function p.transform_code(code, template_name) local template_name_for_writing = template_name if not template_name then template_name = get_most_frequent_ruby_template_name(code) end local ucfirst_template_name = ucfirst(template_name) if not template_name_for_writing and ucfirst_template_name ~= 'Ruby' then template_name_for_writing = template_name end local parts = { '{{振假名', template_name_for_writing and '|template='..template_name_for_writing or '', '\n|', } parts[#parts+1] = mw.ustring.gsub( code, '('..kanji_like_pattern..'?)'..template_pattern, function (non_rb_kanji, name, arg1, arg2) if ucfirst(name) ~= ucfirst_template_name then return end if non_rb_kanji == '' then return arg1..'('..arg2..')' end return non_rb_kanji..'|'..arg1..'('..arg2..')' end ):match('^\n?(.-)\n?$') parts[#parts+1] = '\n}}' return concat(parts) end function p.transform(frame) assert(mw.isSubsting(), '必须subst此模块') local args = frame.args local code = mw.text.trim(args[1]) local template_name = args.template ~= '' and args.template or nil return p.transform_code(code, template_name) end return p