# HG changeset patch # User Paul Boddie # Date 1387578344 -3600 # Node ID 8ccb413b7cf73eddee78c2b360f24e2fdba2d336 # Parent 6b6b9a5ed8bc165ced228a96f28bf249cd1099f0 Added a shell-like string tokeniser. diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 README.txt --- a/README.txt Mon Nov 11 13:42:55 2013 +0100 +++ b/README.txt Fri Dec 20 23:25:44 2013 +0100 @@ -69,6 +69,8 @@ * Moved ItemStore and related functionality into ItemSupport. * Added support for subpage-based item stores. * Added groupHasMember from ApproveChanges. + * Added the TokenSupport module to try and have a reliable shell-like + tokeniser. New in MoinSupport 0.4.1 (Changes since MoinSupport 0.4) -------------------------------------------------------- diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 TokenSupport.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TokenSupport.py Fri Dec 20 23:25:44 2013 +0100 @@ -0,0 +1,109 @@ +# -*- coding: iso-8859-1 -*- +""" + MoinMoin - TokenSupport library + + @copyright: 2013 by Paul Boddie + @license: GNU GPL (v2 or later), see COPYING.txt for details. +""" + +import re + +identifier_expr = re.compile( + """(?P[^'" ]+)""" + "|" + "(?P +)" + "|" + "(?P'[^']*')" + "|" + '(?P"[^"]*")' + ) + +def getIdentifiers(s, doubling=False): + + """ + Return 's' containing space-separated quoted identifiers, parsed into + regions that hold the individual identifiers. The optional 'doubling' + argument can be used to support convenient quote doubling to reproduce + single quote characters. + + Quoting of identifiers can be done using the single-quote and double-quote + characters in order to include spaces within identifiers. For example: + + 'contains space' + -> contains space (a single identifier) + + Where one kind of quote (or apostrophe) is to be included in an identifier, + the other quoting character can be used to delimit the identifier. For + example: + + "Python's syntax" + -> Python's syntax (a single identifier) + + Where the 'doubling' argument is set to a true value, a quote character can + be doubled to include it in an identifier. For example: + + Python''s syntax + -> Python's syntax (a single identifier) + + Where a mixture of quotes is required in a single identifier, adjacent + quoted regions can be used. For example: + + "Python's "'"intuitive" syntax' + -> "Python's " (region #1) + + '"intuitive" syntax' (region #2) + -> Python's "intuitive" syntax (a single identifier) + + Where unquoted regions are adjacent to quoted regions, the regions are + combined. For example: + + "Python's "intuitive" syntax" + -> "Python's " (region #1) + + intuitive (region #2) + + " syntax" (region #3) + -> Python's intuitive syntax (a single identifier) + """ + + regions = [] + in_literal = False + + for match in identifier_expr.finditer(s): + non_literal, spaces, literal1, literal2 = match.groups() + + identifier = None + + # Spaces prevent continuation of identifier regions. + + if spaces: + in_literal = False + + # Unquoted regions contribute to the current identifier. + + if non_literal and non_literal.strip(): + identifier = non_literal.strip() + + # Quoted regions also contribute to the current identifier. + + for s in (literal1, literal2): + if s is not None: + + # Either strip the quoting or for empty regions, adopt the + # quote character. + + if not doubling or len(s) > 2: + identifier = s[1:-1] + elif doubling: + identifier = s[0] + + # Either continue or add an identifier, and indicate possible + # continuation. + + if identifier: + if in_literal: + regions[-1] += identifier + else: + regions.append(identifier) + in_literal = True + + return regions + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 setup.py --- a/setup.py Mon Nov 11 13:42:55 2013 +0100 +++ b/setup.py Fri Dec 20 23:25:44 2013 +0100 @@ -11,5 +11,6 @@ version = "0.5", py_modules = ["ContentTypeSupport", "DateSupport", "GeneralSupport", "ItemSupport", "LocationSupport", "MoinDateSupport", - "MoinRemoteSupport", "MoinSupport", "ViewSupport"] + "MoinRemoteSupport", "MoinSupport", "TokenSupport", + "ViewSupport"] ) diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 tests/test_tokens.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_tokens.py Fri Dec 20 23:25:44 2013 +0100 @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +from TokenSupport import getIdentifiers + +tests = [ + (1, False, """'contains space'""", ["contains space"]), + (2, False, """contains space""", ["contains", "space"]), + (1, False, '''"Python's syntax"''', ["Python's syntax"]), + (2, False, """Python''s syntax""", ["Pythons", "syntax"]), + (2, True, """Python''s syntax""", ["Python's", "syntax"]), + (1, False, '''"Python's "'"intuitive" syntax' ''', ['''Python's "intuitive" syntax''']), + (1, False, '''"Python's "intuitive" syntax" ''', ['''Python's intuitive syntax''']), + ] + +for n, doubling, s, e in tests: + l = getIdentifiers(s, doubling) + print l == e, l, "==", e, len(l) == n, len(l), "==", n, "<-", doubling, s + +# vim: tabstop=4 expandtab shiftwidth=4