function explode(delim, str, limit) local result = {} local m = {} local offset = 1 local count = 1 while (limit == nil or count < limit) and match_with_offset(delim, str, m, offset) do table.insert(result, string.sub(str, offset, m[1]-1)) offset = m[1] + string.len(delim) count = count + 1 end table.insert(result, string.sub(str, offset)) return result end function trim(str) return string.match(str, '^ *(.-) *$') end function rtrim(str) return string.match(str, '^(.-) *$') end function strspn(str, charlist) return string.len(string.match(str, '^[' .. charlist .. ']+') or '') end function match_with_offset(pattern, str, match, offset) offset = offset or 1 match[0] = string.match(str, pattern, offset) match[1] = string.find(str, pattern, offset) if match[0] == nil then return false else return true end end -- converted from Parser.php with a few adjustments function parseTables(text) local lines = explode('\n', text) local out = '' local td_history = {}; local last_tag_history = {}; local tr_history = {}; local tr_attributes = {}; local has_opened_tr = {}; local indent_level = 0; for i, outLine in ipairs(lines) do line = trim(outLine) if line == '' then out = out .. outLine .. '\n' else--CONTINUE local first_character = string.sub(line, 1, 1) local first_two = string.sub(line, 1, 2) local matches = {} matches[1], matches[3], matches[2] = string.match(line, '^(:*)%s*({|)(.*)$') if matches[3] ~= nil then indent_level = string.len(matches[1] or '') local attributes = matches[2] or '' -- unstripBoth & fixTagAttributes outLine = string.rep('<dl><dd>', indent_level) .. '<table ' .. attributes .. '>' table.insert(td_history, false) table.insert(last_tag_history, '') table.insert(tr_history, false) table.insert(tr_attributes, '') table.insert(has_opened_tr, false) elseif #td_history == 0 then elseif first_two == '|}' then line = '</table>' .. string.sub(line, 3) local last_tag = table.remove(last_tag_history) if not table.remove(has_opened_tr) then line = '<tr><td></td></tr>' .. line end if table.remove(tr_history) then line = '</tr>' .. line end if table.remove(td_history) then line = '</' .. last_tag .. '>' .. line end table.remove(tr_attributes) if indent_level > 0 then outLine = rtrim(line) .. string.rep('</dd></dl>', indent_level) else outLine = line end elseif first_two == '|-' then line = string.gsub(line, '^|%-+', '') local attributes = line -- unstripBoth & fixTagAttributes table.remove(tr_attributes) table.insert(tr_attributes, attributes) line = '' local last_tag = table.remove(last_tag_history) table.remove(has_opened_tr) table.insert(has_opened_tr, true) if table.remove(tr_history) then line = '</tr>' end if table.remove(td_history) then line = '</' .. last_tag .. '>' .. line end outLine = line table.insert(tr_history, false) table.insert(td_history, false) table.insert(last_tag_history, '') elseif first_character == '|' or first_character == '!' or first_two == '|+' then if first_two == '|+' then first_character = '+' line = string.sub(line, 3) else line = string.sub(line, 2) end if first_character == '!' then -- replaceMarkup line = string.gsub(line, '!!', '||') end local cells = explode('||', line) outLine = '' for i, cell in ipairs(cells) do local previous = '' if first_character ~= '+' then local tr_after = table.remove(tr_attributes) if not table.remove(tr_history) then previous = '<tr ' .. tr_after .. '>' end table.insert(tr_history, true) table.insert(tr_attributes, '') table.remove(has_opened_tr) table.insert(has_opened_tr, true) end local last_tag = table.remove(last_tag_history) if table.remove(td_history) then previous = '</' .. last_tag .. '>\n' .. previous end if first_character == '|' then last_tag = 'td' elseif first_character == '!' then last_tag = 'th' elseif first_character == '+' then last_tag = 'caption' else last_tag = '' end table.insert(last_tag_history, last_tag) local cell_data_iter = explode('|', cell, 2) local cell_data = {} for i, item in ipairs(cell_data_iter) do table.insert(cell_data, item) end if string.match('[[', cell_data[1]) or string.match('%-{', cell_data[1]) then cell = previous .. '<' .. last_tag .. '>' .. trim(cell) elseif #cell_data == 1 then cell = previous .. '<' .. last_tag .. '>' .. trim(cell_data[1]) else local attributes = cell_data[1] -- unstripBoth & fixTagAttributes cell = previous .. '<' .. last_tag .. ' ' .. attributes .. '>' .. trim(cell_data[2]) end outLine = outLine .. cell table.insert(td_history, true) end end out = out .. outLine .. '\n' end--CONTINUE end while #td_history > 0 do if table.remove(td_history) then out = out .. '</td>' end if table.remove(tr_history) then out = out .. '</tr>' end if not table.remove(has_opened_tr) then out = out .. '<tr><td></td></tr>' end out = out .. '</table>' end if string.sub(out, -1) == '\n' then out = string.sub(out, 1, -2) end if out == '<table><tr><td></td></tr></table>' then out = '' end return out end -- converted from BlockLevelPass.php with a few adjustments DTopen = false lastParagraph = '' COLON_STATE = { ['TEXT']=0, ['TAG']=1, ['TAGSTART']=2, ['CLOSETAG']=3, ['TAGSLASH']=4, ['COMMENT']=5, ['COMMENTDASH']=6, ['COMMENTDASHDASH']=7, ['LC']=8 } function hasOpenParagraph() return lastParagraph ~= '' end function closeParagraph(atTheEnd) atTheEnd = atTheEnd or false local result = '' if hasOpenParagraph() then result = '</' .. lastParagraph .. '>' if not atTheEnd then result = result .. '\n' end end lastParagraph = '' return result end function getCommon(st1, st2) local shorter = math.min(string.len(st1), string.len(st2)) local count=0 for i=1, shorter do if string.sub(st1, i, i) ~= string.sub(st2, i, i) then break end count = count + 1 end return count end function openList(char) local result = closeParagraph() if char == '*' then result = result .. '<ul><li>' elseif char == '#' then result = result .. '<ol><li>' elseif char == ':' then result = result .. '<dl><dd>' elseif char == ';' then result = result .. '<dl><dt>' DTopen = true end return result end function nextItem(char) if char == '*' or char == '#' then return '</li>\n<li>' elseif char == ':' or char == ';' then local close = '</dd>\n' if DTopen then close = '</dt>\n' end if char == ';' then DTopen = true return close .. '<dt>' else DTopen = false return close .. '<dd>' end end return '' end function closeList(char) local text = '' if char == '*' then text = '</li></ul>' elseif char == '#' then text = '</li></ol>' elseif char == ':' then if DTopen then DTopen = false text = '</dt></dl>' else text = '</dd></dl>' end end return text end function parseBlockLevel(text) local textLines = explode('\n', text) local lastPrefix = '' local output = '' local DTopen = false local inBlockElem = false local prefixLength = 0 local pendingPTag = false local inBlockquote = false local prefix2 = '' for i, inputLine in ipairs(textLines) do local lastPrefixLength = string.len(lastPrefix) prefixLength = strspn(inputLine, '*#:;') local prefix = string.sub(inputLine, 1, prefixLength) prefix2 = string.gsub(prefix, ';', ':') local t = string.sub(inputLine, prefixLength+1) if prefixLength ~= 0 and lastPrefix == prefix2 then output = output .. nextItem(string.sub(prefix, -1, -1)) pendingPTag = false if string.sub(prefix, -1, -1) == ';' then local term_t2 = {'', ''} if findColonNoLinks(t, term_t2) ~= false then t = term_t2[1] output = output .. trim(term) .. nextItem(':') end end elseif prefixLength ~= 0 or lastPrefixLength ~= 0 then local commonPrefixLength = getCommon(prefix, lastPrefix) pendingPTag = false while commonPrefixLength < lastPrefixLength do output = output .. closeList(string.sub(lastPrefix, lastPrefixLength, lastPrefixLength)) lastPrefixLength = lastPrefixLength - 1 end if prefixLength <= commonPrefixLength and commonPrefixLength > 0 then output = output .. nextItem(string.sub(prefix, commonPrefixLength, commonPrefixLength)) end if DTopen and commonPrefixLength > 0 and string.sub(prefix, commonPrefixLength, commonPrefixLength) == ':' then output = output .. nextItem(':') end if lastPrefix ~= '' and prefixLength > commonPrefixLength then output = output .. '\n' end while prefixLength > commonPrefixLength do local char = string.sub(prefix, commonPrefixLength + 1, commonPrefixLength + 1) output = output .. openList(char) if char == ';' then local term_t2 = {'', ''} if findColonNoLinks(t, term_t2) ~= false then t = term_t2[1] output = output .. trim(term) .. nextItem(':') end end commonPrefixLength = commonPrefixLength + 1 end if not prefixLength ~= 0 and lastPrefix ~= 0 then output = output .. '\n' end lastPrefix = prefix2 end if prefixLength == 0 then local blockElems = {'table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre', 'p', 'ul', 'ol', 'dl'} local antiBlockElems = {'td', 'th'} local openMatch = false for i, elem in ipairs(blockElems) do if string.match(t, '<' .. elem .. '%f[%W]') then openMatch = true end end if not openMatch then for i, elem in ipairs(antiBlockElems) do if string.match(t, '</' .. elem .. '%f[%W]') then openMatch = true end end end if not openMatch then for i, elem in ipairs({'tr', 'caption', 'dt', 'dd', 'li'}) do if string.match(t, '</?' .. elem .. '%f[%W]') then openMatch = true end end end local closeMatch = false for i, elem in ipairs(blockElems) do if string.match(t, '</' .. elem .. '%f[%W]') then closeMatch = true end end if not closeMatch then for i, elem in ipairs(antiBlockElems) do if string.match(t, '<' .. elem .. '%f[%W]') then closeMatch = true end end end if not closeMatch then for i, elem in ipairs({'center', 'blockquote', 'div', 'hr', 'mw:', 'aside', 'figure'}) do if string.match(t, '</?' .. elem .. '%f[%W]') then closeMatch = true end end end if openMatch or closeMatch then pendingPTag = false closeParagraph() local bqOffset = 1 local bqMatch = {} while match_with_offset('<(/?)blockquote[%s>]', t, bqMatch, bqOffset) do inBlockquote = not bqMatch[0] bqOffset = bqMatch[1] + string.len(bqMatch[0]) end inBlockElem = not closeMatch elseif not inBlockElem then -- "and -- trim(" triggers WAF if trim(t) ~= '' and string.sub(t, 1, 2) == ' ' and not inBlockquote then t = string.sub(t, 2) elseif string.match(t, '^<style%f[%W][^>]*>.-</style>$') or string.match(t, '<link%f[%W][^>]*>%s*') then if pendingPTag ~= '' and pendingPTag ~= false then output = output .. closeParagraph() pendingPTag = false end else if trim(t) == '' then if pendingPTag ~= '' and pendingPTag ~= false then output = output .. pendingPTag .. '<br />' pendingPTag = false lastParagraph = 'p' elseif lastParagraph ~= 'p' then output = output .. closeParagraph() pendintPTag = '<p>' else pendingPTag = '</p><p>' end elseif pendingPTag ~= '' and pendingPTag ~= false then output = output .. pendingPTag pendingPTag = false lastParagraph = 'p' elseif lastParagraph ~= 'p' then output = output .. closeParagraph() .. '<p>' lastParagraph = 'p' end end end end if pendingPTag == false then if prefixLength == 0 then output = output .. t if notLastLine or hasOpenParagraph() then output = output .. '\n' end else output = output .. trim(t) end end end while prefixLength > 0 do output = output .. closeList(string.sub(prefix2, prefixLength, prefixLength)) prefixLength = prefixLength - 1 if prefixLength ~= 0 and hasOpenParagraph() then output = output .. '\n' end end output = output .. closeParagraph(true) return output end function findColonNoLinks(str, before_after) m = {} if not (match_with_offset(':', str, m) or match_with_offset('<', str, m) or match_with_offset('-{', str, m)) then return false end if m[0] == ':' then local colonPos = m[1] before_after[1] = string.sub(str, 1, colonPos+1) before_after[2] = string.sub(str, colonPos+2) return colonPos end local state = COLON_STATE.TEXT local ltLevel = 0 local lcLevel = 0 local len = string.len(str) for i = m[1], len-1 do local c = string.sub(str, i, i) if state == COLON_STATE.TEXT then if c == '<' then state = COLON_STATE.TAGSTART elseif c == ':' then if ltLevel == 0 then before_after[1] = string.sub(str, 1, i+1) before_after[2] = string.sub(str, i+2) return i end else if not (match_with_offset(':', str, m) or match_with_offset('<', str, m) or match_with_offset('%-{', str, m)) then return false end if m[0] == '-{' then state = COLON_STATE.LC lcLevel = lcLevel + 1 i = m[1] + 1 else i = m[1] - 1 end end elseif state == COLON_STATE.LC then if not (match_with_offset('%-{', str, m, i+1) or match_with_offset('}%-', str, m, i+1)) then break end if m[0] == '-{' then i = m[1] + 1 lcLevel = lcLevel + 1 elseif m[0] == '}-' then i = m[1] + 1 lcLevel = lcLevel - 1 if lcLevel == 0 then state = COLON_STATE.TEXT end end elseif state == CONON_STATE.TAG then if c == '>' then ltLevel = ltLevel + 1 state = COLON_STATE.TEXT elseif c == '/' then state = COLON_STATE.TAGSLASH end elseif state == COLON_STATE.TAGSTART then if c == '/' then state = COLON_STATE.CLOSETAG elseif c == '!' then state = COLON_STATE.COMMENT elseif c == '>' then state = COLON_STATE.TEXT else state = COLON_STATE.TAG end elseif state == COLON_STATE.CLOSETAG then if c == '>' then if ltLevel > 0 then ltLevel = ltLevel - 1 end state = COLON_STATE.TEXT end elseif state == COLON_STATE.TAGSLASH then if c == '-' then state = COLON_STATE.COMMENTDASH else state = COLON_STATE.COMMENT end elseif state == COLON_STATE.COMMENTDASH then if c == '>' then state = COLON_STATE.TEXT else state = COLON_STATE.COMMENT end end end return false end -- the Module local p = {} local getArgs = require('Module:Arguments').getArgs function p.main(frame) local args = getArgs(frame, { removeBlanks = false, frameOnly = true }) local rawText = args[1] or '' rawText = mw.text.unstripNoWiki(rawText) rawText = mw.text.decode(rawText) rawText = frame:preprocess(rawText) processedText = rawText processedText = parseTables(processedText) processedText = parseBlockLevel(processedText) processedText = string.gsub(processedText, '\n', '') if string.sub(processedText, 1, 4) == '<th>' then processedText = string.sub(processedText, 5) end if string.sub(processedText, -5, -1) == '</th>' then processedText = string.sub(processedText, 1, -6) end return processedText end return p