1.1 --- a/moinformat/parsing.py Fri May 05 22:38:31 2017 +0200
1.2 +++ b/moinformat/parsing.py Fri May 12 00:51:20 2017 +0200
1.3 @@ -40,20 +40,14 @@
1.4 patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)
1.5 return patterns
1.6
1.7 -def combine_patterns(patterns, syntax):
1.8 +def get_subset(d, keys):
1.9
1.10 - "Combine 'patterns' with those defined by the given 'syntax' mapping."
1.11 -
1.12 - return combine_dicts([patterns, get_patterns(syntax)])
1.13 + "Return a subset of 'd' having the given 'keys'."
1.14
1.15 -def combine_dicts(dicts):
1.16 -
1.17 - "Combine the given 'dicts'."
1.18 -
1.19 - combined = {}
1.20 - for d in dicts:
1.21 - combined.update(d)
1.22 - return combined
1.23 + subset = {}
1.24 + for key in keys:
1.25 + subset[key] = d[key]
1.26 + return subset
1.27
1.28
1.29
1.30 @@ -63,9 +57,8 @@
1.31
1.32 "A stream of tokens taken from a string."
1.33
1.34 - def __init__(self, s, patterns, pos=0):
1.35 + def __init__(self, s, pos=0):
1.36 self.s = s
1.37 - self.patterns = patterns
1.38 self.pos = pos
1.39 self.match = None
1.40 self.matching = None
1.41 @@ -76,12 +69,12 @@
1.42
1.43 self.pos -= min(length, self.pos)
1.44
1.45 - def read_until(self, pattern_names, remaining=True):
1.46 + def read_until(self, patterns, remaining=True):
1.47
1.48 """
1.49 - Find the first match for the given 'pattern_names'. Return the text
1.50 - preceding any match, the remaining text if no match was found, or None
1.51 - if no match was found and 'remaining' is given as a false value.
1.52 + Find the first match for the given 'patterns'. Return the text preceding
1.53 + any match, the remaining text if no match was found, or None if no match
1.54 + was found and 'remaining' is given as a false value.
1.55 """
1.56
1.57 first = None
1.58 @@ -89,8 +82,8 @@
1.59
1.60 # Find the first matching pattern.
1.61
1.62 - for pattern_name in pattern_names:
1.63 - match = self.patterns[pattern_name].search(self.s, self.pos)
1.64 + for pattern_name, pattern in patterns.items():
1.65 + match = pattern.search(self.s, self.pos)
1.66 if match:
1.67 start, end = match.span()
1.68 if self.matching is None or start < first:
1.69 @@ -143,6 +136,8 @@
1.70
1.71 "Common parsing methods."
1.72
1.73 + region_pattern_names = None
1.74 +
1.75 def __init__(self, formats=None):
1.76
1.77 """
1.78 @@ -151,26 +146,66 @@
1.79 """
1.80
1.81 self.formats = formats
1.82 - self.replaced_items = None
1.83 +
1.84 + def get_parser(self, format_type):
1.85 +
1.86 + """
1.87 + Return a parser for 'format_type' or None if no suitable parser is found.
1.88 + """
1.89 +
1.90 + if not self.formats:
1.91 + return None
1.92 +
1.93 + cls = self.formats.get(format_type)
1.94 + if cls:
1.95 + return cls(self.formats)
1.96 + else:
1.97 + return None
1.98 +
1.99 + def get_patterns(self, pattern_names):
1.100 +
1.101 + "Return a mapping of the given 'pattern_names' to patterns."
1.102 +
1.103 + return get_subset(self.patterns, pattern_names)
1.104
1.105 def get_items(self, s, pos=0):
1.106
1.107 "Return a sequence of token items for 's' and 'pos'."
1.108
1.109 - raise NotImplementedError
1.110 + return TokenStream(s, pos)
1.111 +
1.112 + def set_region(self, items, region):
1.113 +
1.114 + "Set the 'items' used to populate the given 'region'."
1.115
1.116 - def replace_items(self, items):
1.117 + self.items = items
1.118 + self.region = region
1.119 +
1.120 + def read_until(self, pattern_names, remaining=True):
1.121
1.122 - "Replace the given 'items' with a sequence employing the same state."
1.123 + """
1.124 + Read the next portion of input, matching using 'pattern_names'. Return
1.125 + the text preceding any match, the remaining text if no match was found,
1.126 + or None if no match was found and 'remaining' is given as a false value.
1.127 + """
1.128
1.129 - self.replaced_items = items
1.130 - return self.get_items(items.s, items.pos)
1.131 + return self.items.read_until(self.get_patterns(pattern_names))
1.132 +
1.133 + def read_match(self, group=1):
1.134 +
1.135 + """
1.136 + Return the group of the matching pattern with the given 'group' number.
1.137 + """
1.138
1.139 - def update_items(self, items):
1.140 + return self.items.read_match(group)
1.141 +
1.142 + def read_matching(self):
1.143
1.144 - "Update the state of the replaced items with that of 'items'."
1.145 + "Return the name of the matching pattern."
1.146
1.147 - self.replaced_items.pos = items.pos
1.148 + return self.items.matching
1.149 +
1.150 + # Parser methods invoked from other objects.
1.151
1.152 def parse(self, s):
1.153
1.154 @@ -178,92 +213,104 @@
1.155 Parse page text 's'. Pages consist of regions delimited by markers.
1.156 """
1.157
1.158 - return self.parse_region(self.get_items(s))
1.159 + self.items = self.get_items(s)
1.160 + self.region = self.parse_region()
1.161 + return self.region
1.162 +
1.163 + def parse_region_content(self, items, region):
1.164 +
1.165 + "Parse the data provided by 'items' to populate a 'region'."
1.166 +
1.167 + self.set_region(items, region)
1.168
1.169 - def parse_region(self, items, level=0, indent=0):
1.170 + # Define a block to hold text and start parsing.
1.171 +
1.172 + new_block(region)
1.173 +
1.174 + if self.region_pattern_names:
1.175 + self.parse_region_details(region, self.region_pattern_names)
1.176 +
1.177 + # Top-level parser handler methods.
1.178 +
1.179 + def parse_region(self, level=0, indent=0):
1.180
1.181 """
1.182 - Parse the data provided by 'items' to populate a region with the given
1.183 - 'level' at the given 'indent'.
1.184 + Parse the data to populate a region with the given 'level' at the given
1.185 + 'indent'.
1.186 """
1.187
1.188 region = Region([], level, indent)
1.189
1.190 # Parse section headers, then parse according to region type.
1.191
1.192 - self.parse_region_header(items, region)
1.193 - self.parse_region_type(items, region)
1.194 + self.parse_region_header(region)
1.195 + self.parse_region_type(region)
1.196
1.197 return region
1.198
1.199 - def parse_region_type(self, items, region):
1.200 + def parse_region_type(self, region):
1.201
1.202 """
1.203 - Given data provided by 'items', use configured parsers to parse the
1.204 - 'region' based on its type.
1.205 + Use configured parsers to parse 'region' based on its type.
1.206 """
1.207
1.208 # Find an appropriate parser given the type.
1.209
1.210 - if self.formats.has_key(region.type):
1.211 - self.formats[region.type].parse_region_content(items, region)
1.212 + parser = self.get_parser(region.type)
1.213 +
1.214 + if parser:
1.215 + parser.parse_region_content(self.items, region)
1.216
1.217 # Otherwise, treat the section as opaque.
1.218
1.219 else:
1.220 - self.parse_region_opaque(items, region)
1.221 + self.parse_region_opaque(region)
1.222
1.223 - def parse_region_header(self, items, region):
1.224 + def parse_region_header(self, region):
1.225
1.226 """
1.227 - Parse the region header from the 'items', setting it for the given 'region'.
1.228 + Parse the region header, setting it on the 'region' object.
1.229 """
1.230
1.231 - if items.read_until(["header"], False) == "": # None means no header
1.232 - region.type = items.read_match()
1.233 + if self.read_until(["header"], False) == "": # None means no header
1.234 + region.type = self.read_match()
1.235
1.236 - def parse_region_opaque(self, items, region):
1.237 + def parse_region_opaque(self, region):
1.238
1.239 - "Parse the data provided by 'items' to populate an opaque 'region'."
1.240 + "Parse the data to populate an opaque 'region'."
1.241
1.242 region.transparent = False
1.243 - self.parse_region_details(items, region, ["regionend"])
1.244 -
1.245 - def parse_region_content(self, items, region):
1.246 -
1.247 - "Parse the data provided by 'items' to populate the given 'region'."
1.248 -
1.249 - pass
1.250 + self.parse_region_details(region, ["regionend"])
1.251
1.252 # Parsing utilities.
1.253
1.254 - def parse_region_details(self, items, region, pattern_names):
1.255 + def parse_region_details(self, region, pattern_names):
1.256
1.257 - "Parse 'items' within 'region' searching using 'pattern_names'."
1.258 + "Search 'region' using the 'pattern_names'."
1.259
1.260 try:
1.261 while True:
1.262
1.263 # Obtain text before any marker or the end of the input.
1.264
1.265 - preceding = items.read_until(pattern_names)
1.266 + preceding = self.read_until(pattern_names)
1.267 if preceding:
1.268 region.append_inline(Text(preceding))
1.269
1.270 # End of input.
1.271
1.272 - if not items.matching:
1.273 + if not self.read_matching():
1.274 break
1.275
1.276 # Obtain any feature.
1.277
1.278 - feature = items.read_match()
1.279 - handler = self.handlers.get(items.matching)
1.280 + feature = self.read_match()
1.281 + handler = self.handlers.get(self.read_matching())
1.282
1.283 # Handle each feature or add text to the region.
1.284
1.285 if handler:
1.286 - handler(self, items, region)
1.287 + handler(self, region)
1.288 else:
1.289 region.append_inline(Text(feature))
1.290
1.291 @@ -272,26 +319,10 @@
1.292
1.293 region.normalise()
1.294
1.295 - def end_region(self, items, region):
1.296 + def end_region(self, region):
1.297
1.298 "End the parsing of 'region', breaking out of the parsing loop."
1.299
1.300 raise StopIteration
1.301
1.302 -
1.303 -# Format mapping initialisation.
1.304 -
1.305 -def init_formats(formats):
1.306 -
1.307 - """
1.308 - Convert the given 'formats' mapping from a name-to-class mapping to a
1.309 - name-to-instance mapping with each parser instance employing the format
1.310 - mapping itself. Return the converted mapping.
1.311 - """
1.312 -
1.313 - d = {}
1.314 - for name, cls in formats.items():
1.315 - d[name] = cls(d)
1.316 - return d
1.317 -
1.318 # vim: tabstop=4 expandtab shiftwidth=4