''' Wikicreole style parser. Mostly complient. Written 2010-12-19. danomagnum.com ''' __version__ = 1.0 import re import math def tocmaker(contents): '''Creates a toc from a list of ids''' ret = "" for header in contents: ret += "" + header + "" return ret def parse(text): '''Parses text and returns html according to (almost) wikicreole \'\'italic\'\' \'\'\'bold\'\'\' __underline__ [[link]] [[link|linktext]] {{image}} {{image|imagetext}} *list **list2 #numbered list ##numbered list 2 ==Heading 1 ===Heading 2 \\\\ ->
---- ->
''' in_pre = False in_ulist = 0 in_olist = 0 in_table = False #in_bold = False #in_ital = False #in_und = False formattings = [['**','b',False],['//','i',False],['__','u',False]] outstring = '' contents = [] headings = [('=====','h5'),('====','h4'),('===','h3'),('==','h2'),('=','h1')] re_image_1 = re.compile(r'\{\{([^\|^\}]+?)\|(.+?)\}\}') re_image_1s = r'' re_image_2 = re.compile(r'\{\{([^\|^\}]+?)\}\}') re_image_2s = r'' re_outlink_1 = re.compile(r'\[\[([^\|^\]]+?)\|(.+?)\]\]') re_outlink_1s = r'\2' re_outlink_2 = re.compile(r'\[\[([^\|^\]]+?)\]\]') re_outlink_2s = r'\1' re_pre_inline = re.compile(r"\{\{\{(.+?)\}\}\}") re_pre_inline_s = r'
\1
' #re_italic = re.compile(r"//(.+?)//") #re_italic_s = r'\1' #re_underline = re.compile("__(.+?)__") #re_underline_s = r'\1' for line in text.split('\n'): if in_pre: if line.startswith('}}}'): outstring += '' in_pre = False else: outstring += line + "\n" continue line = line.lstrip() if line == '': for fmt in formattings: if fmt[2]: outstring += '' fmt[2] = False outstring += "

" continue if line.startswith('%'): #comments start with %, so just ignore it continue if line.startswith('----'): outstring += "


" continue if line.startswith ('{{{'): outstring += "
"
			#if you start a line with {{{format, the pre gets its class set to that format
			in_pre = True
			continue


		for h in headings:
			if line.startswith(h[0]):
				line = line.strip(h[0])
				outstring += "<" + h[1] + " id='" + line + "'>"
				contents.append(line)
				line = line + ""


		if line.startswith('#'): #check for ordered lists
			if not in_olist:
				outstring += "
    " in_olist = 1 else: level = len(line[:in_olist+1].split('#')) - 1 delta = math.fabs(level - in_olist) while delta: level = len(line[:in_olist+1].split('#')) - 1 if level > in_olist: outstring += "
      " in_olist += 1 else: outstring += "
    " in_olist -= 1 delta = math.fabs(level - in_olist) line = "
  1. " + line[in_olist:] + "
  2. " elif in_olist: while in_olist: outstring += "
" in_olist -= 1 if line.startswith('*'): #check for unordered lists if not in_ulist: outstring += "" in_ulist -= 1 if line.startswith('|'): if not in_table: in_table = True outstring += "" parts = line.split('|') output = "" for p in parts[1:-1]: if p.startswith("="): output += "" else: output += "" output += "" line = output elif in_table: outstring += "
" + p[1:] + "" + p + "
" in_table = False if line.count(r'\\'): line = line.replace(r'\\','
') line = re_outlink_1.sub(re_outlink_1s,line) line = re_outlink_2.sub(re_outlink_2s,line) line = re_image_1.sub(re_image_1s,line) line = re_image_2.sub(re_image_2s,line) #these lines protect https and ftps from getting clobbered by the italics line = line.replace('http://','!http:~~!') line = line.replace('ftp://','!ftp:~~!') for fmt in formattings: while line.count(fmt[0]): if fmt[2]: line = line.replace(fmt[0],'',1) fmt[2] = False else: line = line.replace(fmt[0],'<' + fmt[1] + '>',1) fmt[2] = True line = line.replace('!http:~~!','http://') line = line.replace('!ftp:~~!','ftp://') line = re_pre_inline.sub(re_pre_inline_s,line) #ine = re_italic.sub(re_italic_s,line) #ine = re_underline.sub(re_underline_s,line) outstring += line + "\n" #outstring = tocmaker(contents) + outstring return outstring if __name__ == '__main__': string = "[[test]]" print parse(string)