1.1 --- a/parser.py Sun Apr 22 20:48:55 2012 +0200
1.2 +++ b/parser.py Sun Apr 22 21:06:35 2012 +0200
1.3 @@ -70,23 +70,34 @@
1.4 else:
1.5 return None, s
1.6
1.7 -# List extraction.
1.8 +# Heading, table and list extraction.
1.9
1.10 list_regexp_str = r"^(?P<listtype>[*#-])[*#-]*.*\n((?P=listtype).*(?:\n|$))*"
1.11 -list_regexp = re.compile(list_regexp_str, re.MULTILINE)
1.12 +table_regexp_str = r"^((?P<celltype>[|]{1,2})(.+?(?P=celltype))+(\n|$))+"
1.13 +blocktext_regexp_str = r"^(?P<type>h\d|bq)\.\s+(?P<text>.*)$"
1.14
1.15 -def get_lists(s):
1.16 +blockelement_regexp = re.compile(
1.17 + "(" + list_regexp_str + ")"
1.18 + "|"
1.19 + "(" + table_regexp_str + ")"
1.20 + "|"
1.21 + "(" + blocktext_regexp_str + ")",
1.22 + re.MULTILINE
1.23 + )
1.24 +
1.25 +def get_block_elements(s):
1.26
1.27 """
1.28 - Extract lists from the given string 's'.
1.29 + Extract headings, tables and lists from the given string 's'.
1.30 """
1.31
1.32 last = 0
1.33 blocks = []
1.34 - for match in list_regexp.finditer(s):
1.35 + for match in blockelement_regexp.finditer(s):
1.36 start, end = match.span()
1.37 + matchtype = match.group("listtype") and "list" or match.group("celltype") and "table" or match.group("type")
1.38 blocks.append((None, s[last:start]))
1.39 - blocks.append(("list", s[start:end]))
1.40 + blocks.append((matchtype, match.group("text") or s[start:end]))
1.41 last = end
1.42 blocks.append((None, s[last:]))
1.43 return blocks
1.44 @@ -107,9 +118,6 @@
1.45
1.46 # Block inspection.
1.47
1.48 -blocktext_regexp_str = r"^(?P<type>h\d|bq)\.\s+(?P<text>.*)$"
1.49 -blocktext_regexp = re.compile(blocktext_regexp_str, re.MULTILINE)
1.50 -
1.51 def get_blocks(s):
1.52
1.53 """
1.54 @@ -119,9 +127,9 @@
1.55
1.56 blocks = []
1.57
1.58 - for blocktype, blocktext in get_lists(s):
1.59 + for blocktype, blocktext in get_block_elements(s):
1.60
1.61 - # Collect list blocks.
1.62 + # Collect heading, list and table blocks.
1.63
1.64 if blocktype is not None:
1.65 blocks.append((blocktype, blocktext))
1.66 @@ -130,32 +138,71 @@
1.67
1.68 else:
1.69 for block in get_basic_blocks(blocktext):
1.70 - last = 0
1.71 - for match in blocktext_regexp.finditer(block):
1.72 - start, end = match.span()
1.73 -
1.74 - # Add preceding non-block text.
1.75 -
1.76 - preceding = block[last:start]
1.77 - if preceding.strip():
1.78 - blocks.append((None, preceding))
1.79 -
1.80 - # Add the subblock.
1.81 -
1.82 - blocks.append((match.group("type"), match.group("text")))
1.83 - last = end
1.84 -
1.85 - # Add trailing non-block text.
1.86 -
1.87 - trailing = block[last:]
1.88 - if trailing.strip():
1.89 - blocks.append((None, trailing))
1.90 + blocks.append((None, block))
1.91
1.92 return blocks
1.93
1.94 -listitem_regexp_str = r"^([*#-])+\s*(.*)$"
1.95 +# List item inspection.
1.96 +
1.97 +listitem_regexp_str = r"^(?P<marker>[*#-])+\s*(?P<text>.*)$"
1.98 listitem_regexp = re.compile(listitem_regexp_str, re.MULTILINE)
1.99
1.100 +def get_list_items(text):
1.101 +
1.102 + "Return a list of (marker, text) tuples for the given list 'text'."
1.103 +
1.104 + items = []
1.105 +
1.106 + for match in listitem_regexp.finditer(text):
1.107 + items.append((match.group("marker"), match.group("text")))
1.108 +
1.109 + return items
1.110 +
1.111 +# Table row inspection.
1.112 +
1.113 +link_regexp_str = r"[[](?P<linktext>.*?)]"
1.114 +image_regexp_str = r"!(?P<imagetext>.*?)!"
1.115 +cellsep_regexp_str = r"(?P<celltype>[|]{1,2})"
1.116 +content_regexp = re.compile(
1.117 + "(" + link_regexp_str + ")"
1.118 + "|"
1.119 + "(" + image_regexp_str + ")"
1.120 + "|"
1.121 + "(" + cellsep_regexp_str + ")"
1.122 + )
1.123 +
1.124 +def get_table_rows(text):
1.125 +
1.126 + "Return a list of (cellsep, columns) tuples for the given table 'text'."
1.127 +
1.128 + rows = []
1.129 +
1.130 + for line in text.split("\n"):
1.131 + cellsep = None
1.132 + columns = [""]
1.133 + last = 0
1.134 + for match in content_regexp.finditer(line):
1.135 + start, end = match.span()
1.136 + columns[-1] += line[last:start]
1.137 +
1.138 + if match.group("celltype"):
1.139 + if cellsep is None:
1.140 + cellsep = match.group("celltype")
1.141 + columns.append("")
1.142 + else:
1.143 + columns[-1] += line[start:end]
1.144 +
1.145 + last = end
1.146 +
1.147 + columns[-1] += line[last:]
1.148 +
1.149 + if cellsep:
1.150 + rows.append((cellsep, columns[1:-1]))
1.151 +
1.152 + return rows
1.153 +
1.154 +# General parsing and translation.
1.155 +
1.156 blocktypes = {
1.157 "h1" : "= %s =",
1.158 "h2" : "== %s ==",
1.159 @@ -166,6 +213,40 @@
1.160 "bq" : "{{{%s}}}",
1.161 }
1.162
1.163 +markers = {
1.164 + "*" : "*",
1.165 + "#" : "1.",
1.166 + "-" : "*",
1.167 + }
1.168 +
1.169 +def translate_marker(marker):
1.170 +
1.171 + "Translate the given 'marker' to a suitable Moin representation."
1.172 +
1.173 + return " " * len(marker) + markers[marker[-1]]
1.174 +
1.175 +cellseps = {
1.176 + "|" : "||",
1.177 + "||" : "||",
1.178 + }
1.179 +
1.180 +cellextra = {
1.181 + "|" : "",
1.182 + "||" : "'''",
1.183 + }
1.184 +
1.185 +def translate_cellsep(cellsep):
1.186 +
1.187 + "Translate the given 'cellsep' to a suitable Moin representation."
1.188 +
1.189 + return cellseps[cellsep]
1.190 +
1.191 +def translate_cell(cellsep, text):
1.192 +
1.193 + "Using 'cellsep', translate the cell 'text'."
1.194 +
1.195 + return cellextra[cellsep] + text + cellextra[cellsep]
1.196 +
1.197 def parse(s, out):
1.198
1.199 "Parse the content in the string 's', writing a translation to 'out'."
1.200 @@ -176,16 +257,39 @@
1.201
1.202 if type is None:
1.203 for blocktype, blocktext in get_blocks(text):
1.204 +
1.205 + # Translate headings and blockquotes.
1.206 +
1.207 if blocktypes.has_key(blocktype):
1.208 print >>out, blocktypes[blocktype] % blocktext
1.209 +
1.210 + # Translate list items.
1.211 +
1.212 + elif blocktype == "list":
1.213 + for listmarker, listitem in get_list_items(blocktext):
1.214 + print >>out, "%s %s" % (translate_marker(listmarker), listitem)
1.215 +
1.216 + # Translate table items.
1.217 +
1.218 + elif blocktype == "table":
1.219 + for cellsep, columns in get_table_rows(blocktext):
1.220 + moinsep = translate_cellsep(cellsep)
1.221 + print >>out, moinsep + moinsep.join([translate_cell(cellsep, column) for column in columns]) + moinsep
1.222 +
1.223 + # Handle anonymous blocks.
1.224 +
1.225 else:
1.226 - print >>out, blocktext
1.227 + print >>out, blocktext.rstrip()
1.228 +
1.229 + print >>out
1.230
1.231 # Handle sections.
1.232
1.233 else:
1.234 - print >>out, "Region type:", type
1.235 - print >>out, text
1.236 + print >>out, "{{{",
1.237 + print >>out, text,
1.238 + print >>out, "}}}"
1.239 + print >>out
1.240
1.241 if __name__ == "__main__":
1.242 import sys