此模块用于把多行wikitext压缩至一行。
此模块最初是为了{{Hid}}编写的。由于MediaWiki的wikitext解析器存在问题,导致把多行wikitext放在列表(*#
)缩进(:;
)上时会出现错误的结果。
一个著名的例子就是{{Hide}}不能与列表和缩进联用(参见滥用过滤器30):
代码 | 效果 | ||
---|---|---|---|
* {{Hide}} * 文本 文本 |
|
由于{{Hide}}展开后是多行wikitext,与列表或缩进连用会导致后续内容全部缩进。而此模块能够预先把多行wikitext压缩至一行,从而避免该问题。
代码 | 效果 | ||
---|---|---|---|
* {{#invoke:Neutralizer|main| {{Hide}} }} * 文本 文本 |
文本 |
此模块同样适用于面临相同困扰的其他模板,例如{{VersionHistory}}、{{Clade}}等。
此模块用Lua部分重写了MediaWiki内置的wikitext解析器,因此能够解析表格、列表以及段落。
传入的wikitext可以用<nowiki>
包围,这时此模块将先去掉外面的<nowiki>
再进行解析。
然而,此模块尚未经过相对充分的测试,其解析结果可能会与预期存在一定的差别。
local string = string local table = table local ipairs = ipairs local function match_with_offset(pattern, str, match, offset) offset = offset or 1 match[1] = string.match(str, pattern, offset) match[2] = string.find(str, pattern, offset) if match[1] == nil then return false else return true end end local function explode(delim, str, limit) local result = {} local m = {0, 0} local offset = 1 local count = 1 while (limit == nil or count < limit) and match_with_offset(delim, str, m, offset) do table.insert(result, string.sub(str, offset, m[2]-1)) offset = m[2] + string.len(delim) count = count + 1 end table.insert(result, string.sub(str, offset)) return result end local function trim(str) return string.match(str, '^ *(.-) *$') end local function rtrim(str) return string.match(str, '^(.-) *$') end local function strspn(str, charlist) return string.len(string.match(str, '^[' .. charlist .. ']+') or '') end -- converted from Parser.php with a few adjustments local function parseTables(text) local lines = explode('\n', text) local out = {} local td_history = {}; local last_tag_history = {}; local tr_history = {}; local tr_attributes = {}; local has_opened_tr = {}; local indent_level = 0; for i, outLine in ipairs(lines) do local line = trim(outLine) if line == '' then table.insert(out, outLine .. '\n') else--CONTINUE local first_character = string.sub(line, 1, 1) local first_two = string.sub(line, 1, 2) local matches = {} matches[1], matches[3], matches[2] = string.match(line, '^(:*)%s*({|)(.*)$') if matches[3] ~= nil then indent_level = string.len(matches[1] or '') local attributes = matches[2] or '' -- unstripBoth & fixTagAttributes outLine = string.rep('<dl><dd>', indent_level) .. '<table ' .. attributes .. '>' table.insert(td_history, false) table.insert(last_tag_history, '') table.insert(tr_history, false) table.insert(tr_attributes, '') table.insert(has_opened_tr, false) elseif #td_history == 0 then elseif first_two == '|}' then line = '</table>' .. string.sub(line, 3) local last_tag = table.remove(last_tag_history) if not table.remove(has_opened_tr) then line = '<tr><td></td></tr>' .. line end if table.remove(tr_history) then line = '</tr>' .. line end if table.remove(td_history) then line = '</' .. last_tag .. '>' .. line end table.remove(tr_attributes) if indent_level > 0 then outLine = rtrim(line) .. string.rep('</dd></dl>', indent_level) else outLine = line end elseif first_two == '|-' then line = string.gsub(line, '^|%-+', '') local attributes = line -- unstripBoth & fixTagAttributes table.remove(tr_attributes) table.insert(tr_attributes, attributes) line = '' local last_tag = table.remove(last_tag_history) table.remove(has_opened_tr) table.insert(has_opened_tr, true) if table.remove(tr_history) then line = '</tr>' end if table.remove(td_history) then line = '</' .. last_tag .. '>' .. line end outLine = line table.insert(tr_history, false) table.insert(td_history, false) table.insert(last_tag_history, '') elseif first_character == '|' or first_character == '!' or first_two == '|+' then if first_two == '|+' then first_character = '+' line = string.sub(line, 3) else line = string.sub(line, 2) end if first_character == '!' then -- replaceMarkup line = string.gsub(line, '!!', '||') end local cells = explode('||', line) outLine = '' for j, cell in ipairs(cells) do local previous = '' if first_character ~= '+' then local tr_after = table.remove(tr_attributes) if not table.remove(tr_history) then previous = '<tr ' .. tr_after .. '>' end table.insert(tr_history, true) table.insert(tr_attributes, '') table.remove(has_opened_tr) table.insert(has_opened_tr, true) end local last_tag = table.remove(last_tag_history) if table.remove(td_history) then previous = '</' .. last_tag .. '>\n' .. previous end if first_character == '|' then last_tag = 'td' elseif first_character == '!' then last_tag = 'th' elseif first_character == '+' then last_tag = 'caption' else last_tag = '' end table.insert(last_tag_history, last_tag) local cell_data_iter = explode('|', cell, 2) local cell_data = {} for k, item in ipairs(cell_data_iter) do table.insert(cell_data, item) end if string.match('[[', cell_data[1]) or string.match('%-{', cell_data[1]) then cell = previous .. '<' .. last_tag .. '>' .. trim(cell) elseif #cell_data == 1 then cell = previous .. '<' .. last_tag .. '>' .. trim(cell_data[1]) else local attributes = cell_data[1] -- unstripBoth & fixTagAttributes cell = previous .. '<' .. last_tag .. ' ' .. attributes .. '>' .. trim(cell_data[2]) end outLine = outLine .. cell table.insert(td_history, true) end end table.insert(out, outLine .. '\n') end--CONTINUE end while #td_history > 0 do if table.remove(td_history) then table.insert(out, '</td>') end if table.remove(tr_history) then table.insert(out, '</tr>') end if not table.remove(has_opened_tr) then table.insert(out, '<tr><td></td></tr>') end table.insert(out, '</table>') end if out[#out] == '\n' then table.remove(out) end --if out == '<table><tr><td></td></tr></table>' then -- out = '' --end return table.concat(out) end -- converted from BlockLevelPass.php with a few adjustments local DTopen = false local lastParagraph = '' local COLON_STATE = { ['TEXT']=0, ['TAG']=1, ['TAGSTART']=2, ['CLOSETAG']=3, ['TAGSLASH']=4, ['COMMENT']=5, ['COMMENTDASH']=6, ['COMMENTDASHDASH']=7, ['LC']=8 } local function hasOpenParagraph() return lastParagraph ~= '' end local function closeParagraph(atTheEnd) atTheEnd = atTheEnd or false local result = '' if hasOpenParagraph() then result = '</' .. lastParagraph .. '>' if not atTheEnd then result = result .. '\n' end end lastParagraph = '' return result end local function getCommon(st1, st2) local shorter = math.min(string.len(st1), string.len(st2)) local count=0 for i=1, shorter do if string.sub(st1, i, i) ~= string.sub(st2, i, i) then break end count = count + 1 end return count end local function openList(char) local result = closeParagraph() if char == '*' then result = result .. '<ul><li>' elseif char == '#' then result = result .. '<ol><li>' elseif char == ':' then result = result .. '<dl><dd>' elseif char == ';' then result = result .. '<dl><dt>' DTopen = true end return result end local function nextItem(char) if char == '*' or char == '#' then return '</li>\n<li>' elseif char == ':' or char == ';' then local close = '</dd>\n' if DTopen then close = '</dt>\n' end if char == ';' then DTopen = true return close .. '<dt>' else DTopen = false return close .. '<dd>' end end return '' end local function closeList(char) local text = '' if char == '*' then text = '</li></ul>' elseif char == '#' then text = '</li></ol>' elseif char == ':' then if DTopen then DTopen = false text = '</dt></dl>' else text = '</dd></dl>' end end return text end local function findColonNoLinks(str, before_after) local m = {0, 0} if not (match_with_offset(':', str, m) or match_with_offset('<', str, m) or match_with_offset('%-{', str, m)) then return false end if m[1] == ':' then local colonPos = m[2] before_after[1] = string.sub(str, 1, colonPos+1) before_after[2] = string.sub(str, colonPos+2) return colonPos end local state = COLON_STATE.TEXT local ltLevel = 0 local lcLevel = 0 local len = string.len(str) local i = m[2] while i < len do local c = string.sub(str, i, i) if state == COLON_STATE.TEXT then if c == '<' then state = COLON_STATE.TAGSTART elseif c == ':' then if ltLevel == 0 then before_after[1] = string.sub(str, 1, i+1) before_after[2] = string.sub(str, i+2) return i end else if not (match_with_offset(':', str, m) or match_with_offset('<', str, m) or match_with_offset('%-{', str, m)) then return false end if m[1] == '-{' then state = COLON_STATE.LC lcLevel = lcLevel + 1 i = m[2] + 1 else i = m[2] - 1 end end elseif state == COLON_STATE.LC then if not (match_with_offset('%-{', str, m, i+1) or match_with_offset('}%-', str, m, i+1)) then break end if m[1] == '-{' then i = m[2] + 1 lcLevel = lcLevel + 1 elseif m[1] == '}-' then i = m[2] + 1 lcLevel = lcLevel - 1 if lcLevel == 0 then state = COLON_STATE.TEXT end end elseif state == COLON_STATE.TAG then if c == '>' then ltLevel = ltLevel + 1 state = COLON_STATE.TEXT elseif c == '/' then state = COLON_STATE.TAGSLASH end elseif state == COLON_STATE.TAGSTART then if c == '/' then state = COLON_STATE.CLOSETAG elseif c == '!' then state = COLON_STATE.COMMENT elseif c == '>' then state = COLON_STATE.TEXT else state = COLON_STATE.TAG end elseif state == COLON_STATE.CLOSETAG then if c == '>' then if ltLevel > 0 then ltLevel = ltLevel - 1 end state = COLON_STATE.TEXT end elseif state == COLON_STATE.TAGSLASH then if c == '-' then state = COLON_STATE.COMMENTDASH else state = COLON_STATE.COMMENT end elseif state == COLON_STATE.COMMENTDASH then if c == '>' then state = COLON_STATE.TEXT else state = COLON_STATE.COMMENT end end end return false end local function parseBlockLevel(text) local textLines = explode('\n', text) local lastPrefix = '' local output = {} DTopen = false local inBlockElem = false local prefixLength = 0 local pendingPTag = false local inBlockquote = false local prefix2 = '' for i, inputLine in ipairs(textLines) do local lastPrefixLength = string.len(lastPrefix) prefixLength = strspn(inputLine, '*#:;') local prefix = string.sub(inputLine, 1, prefixLength) prefix2 = string.gsub(prefix, ';', ':') local t = string.sub(inputLine, prefixLength+1) if prefixLength ~= 0 and lastPrefix == prefix2 then table.insert(output, nextItem(string.sub(prefix, -1, -1))) pendingPTag = false if string.sub(prefix, -1, -1) == ';' then local term_t2 = {'', ''} if findColonNoLinks(t, term_t2) ~= false then t = term_t2[1] table.insert(output, trim(term_t2[1]) .. nextItem(':')) end end elseif prefixLength ~= 0 or lastPrefixLength ~= 0 then local commonPrefixLength = getCommon(prefix, lastPrefix) pendingPTag = false while commonPrefixLength < lastPrefixLength do table.insert(output, closeList(string.sub(lastPrefix, lastPrefixLength, lastPrefixLength))) lastPrefixLength = lastPrefixLength - 1 end if prefixLength <= commonPrefixLength and commonPrefixLength > 0 then table.insert(output, nextItem(string.sub(prefix, commonPrefixLength, commonPrefixLength))) end if DTopen and commonPrefixLength > 0 and string.sub(prefix, commonPrefixLength, commonPrefixLength) == ':' then table.insert(output, nextItem(':')) end if lastPrefix ~= '' and prefixLength > commonPrefixLength then table.insert(output, '\n') end while prefixLength > commonPrefixLength do local char = string.sub(prefix, commonPrefixLength + 1, commonPrefixLength + 1) table.insert(output, openList(char)) if char == ';' then local term_t2 = {'', ''} if findColonNoLinks(t, term_t2) ~= false then t = term_t2[1] table.insert(output, trim(term_t2[1]) .. nextItem(':')) end end commonPrefixLength = commonPrefixLength + 1 end if not prefixLength ~= 0 and lastPrefix ~= 0 then table.insert(output, '\n') end lastPrefix = prefix2 end if prefixLength == 0 then local blockElems = {'table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre', 'p', 'ul', 'ol', 'dl'} local antiBlockElems = {'td', 'th'} local openMatch = false for j, elem in ipairs(blockElems) do if string.match(t, '<' .. elem .. '%f[%W]') then openMatch = true end end if not openMatch then for j, elem in ipairs(antiBlockElems) do if string.match(t, '</' .. elem .. '%f[%W]') then openMatch = true end end end if not openMatch then for j, elem in ipairs({'tr', 'caption', 'dt', 'dd', 'li'}) do if string.match(t, '</?' .. elem .. '%f[%W]') then openMatch = true end end end local closeMatch = false for j, elem in ipairs(blockElems) do if string.match(t, '</' .. elem .. '%f[%W]') then closeMatch = true end end if not closeMatch then for j, elem in ipairs(antiBlockElems) do if string.match(t, '<' .. elem .. '%f[%W]') then closeMatch = true end end end if not closeMatch then for j, elem in ipairs({'center', 'blockquote', 'div', 'hr', 'mw:', 'aside', 'figure'}) do if string.match(t, '</?' .. elem .. '%f[%W]') then closeMatch = true end end end if openMatch or closeMatch then pendingPTag = false closeParagraph() local bqOffset = 1 local bqMatch = {0, 0} while match_with_offset('<(/?)blockquote[%s>]', t, bqMatch, bqOffset) do inBlockquote = not bqMatch[1] bqOffset = bqMatch[2] + string.len(bqMatch[1]) end inBlockElem = not closeMatch elseif not inBlockElem then if trim(t) ~= '' and string.sub(t, 1, 2) == ' ' and not inBlockquote then t = string.sub(t, 2) elseif string.match(t, '^<style%f[%W][^>]*>.-</style>$') or string.match(t, '<link%f[%W][^>]*>%s*') then if pendingPTag ~= '' and pendingPTag ~= false then table.insert(output, closeParagraph()) pendingPTag = false end else if trim(t) == '' then if pendingPTag ~= '' and pendingPTag ~= false then table.insert(output, pendingPTag .. '<br />') pendingPTag = false lastParagraph = 'p' elseif lastParagraph ~= 'p' then table.insert(output, closeParagraph()) pendingPTag = '<p>' else pendingPTag = '</p><p>' end elseif pendingPTag ~= '' and pendingPTag ~= false then table.insert(output, pendingPTag) pendingPTag = false lastParagraph = 'p' elseif lastParagraph ~= 'p' then table.insert(output, closeParagraph() .. '<p>') lastParagraph = 'p' end end end end if pendingPTag == false then if prefixLength == 0 then table.insert(output, t) if hasOpenParagraph() then table.insert(output, '\n') end else table.insert(output, trim(t)) end end end while prefixLength > 0 do table.insert(output, closeList(string.sub(prefix2, prefixLength, prefixLength))) prefixLength = prefixLength - 1 if prefixLength ~= 0 and hasOpenParagraph() then table.insert(output, '\n') end end table.insert(output, closeParagraph(true)) return table.concat(output) end -- the Module local p = {} local getArgs = require('Module:Arguments').getArgs function p.main(frame) local args = getArgs(frame, { removeBlanks = false, frameOnly = true }) local rawText = args[1] or '' rawText = mw.text.unstripNoWiki(rawText) rawText = mw.text.decode(rawText) rawText = frame:preprocess(rawText) local processedText = parseTables(rawText) processedText = parseBlockLevel(processedText) processedText = string.gsub(processedText, '\n', '') return processedText end return p