# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1387578344 -3600
# Node ID 8ccb413b7cf73eddee78c2b360f24e2fdba2d336
# Parent  6b6b9a5ed8bc165ced228a96f28bf249cd1099f0
Added a shell-like string tokeniser.

diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 README.txt
--- a/README.txt	Mon Nov 11 13:42:55 2013 +0100
+++ b/README.txt	Fri Dec 20 23:25:44 2013 +0100
@@ -69,6 +69,8 @@
   * Moved ItemStore and related functionality into ItemSupport.
   * Added support for subpage-based item stores.
   * Added groupHasMember from ApproveChanges.
+  * Added the TokenSupport module to try and have a reliable shell-like
+    tokeniser.
 
 New in MoinSupport 0.4.1 (Changes since MoinSupport 0.4)
 --------------------------------------------------------
diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 TokenSupport.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TokenSupport.py	Fri Dec 20 23:25:44 2013 +0100
@@ -0,0 +1,109 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - TokenSupport library
+
+    @copyright: 2013 by Paul Boddie <paul@boddie.org.uk>
+    @license: GNU GPL (v2 or later), see COPYING.txt for details.
+"""
+
+import re
+
+identifier_expr = re.compile(
+    """(?P<non_literal>[^'" ]+)"""
+    "|"
+    "(?P<spaces> +)"
+    "|"
+    "(?P<literal1>'[^']*')"
+    "|"
+    '(?P<literal2>"[^"]*")'
+    )
+
+def getIdentifiers(s, doubling=False):
+
+    """
+    Return 's' containing space-separated quoted identifiers, parsed into
+    regions that hold the individual identifiers. The optional 'doubling'
+    argument can be used to support convenient quote doubling to reproduce
+    single quote characters.
+
+    Quoting of identifiers can be done using the single-quote and double-quote
+    characters in order to include spaces within identifiers. For example:
+
+       'contains space'
+    -> contains space                   (a single identifier)
+
+    Where one kind of quote (or apostrophe) is to be included in an identifier,
+    the other quoting character can be used to delimit the identifier. For
+    example:
+
+       "Python's syntax"
+    -> Python's syntax                  (a single identifier)
+
+    Where the 'doubling' argument is set to a true value, a quote character can
+    be doubled to include it in an identifier. For example:
+
+       Python''s syntax
+    -> Python's syntax                  (a single identifier)
+
+    Where a mixture of quotes is required in a single identifier, adjacent
+    quoted regions can be used. For example:
+
+       "Python's "'"intuitive" syntax'
+    -> "Python's "                      (region #1)
+     + '"intuitive" syntax'             (region #2)
+    -> Python's "intuitive" syntax      (a single identifier)
+
+    Where unquoted regions are adjacent to quoted regions, the regions are
+    combined. For example:
+
+       "Python's "intuitive" syntax"
+    -> "Python's "                      (region #1)
+     + intuitive                        (region #2)
+     + " syntax"                        (region #3)
+    -> Python's intuitive syntax        (a single identifier)
+    """
+
+    regions = []
+    in_literal = False
+
+    for match in identifier_expr.finditer(s):
+        non_literal, spaces, literal1, literal2 = match.groups()
+
+        identifier = None
+
+        # Spaces prevent continuation of identifier regions.
+
+        if spaces:
+            in_literal = False
+
+        # Unquoted regions contribute to the current identifier.
+
+        if non_literal and non_literal.strip():
+            identifier = non_literal.strip()
+
+        # Quoted regions also contribute to the current identifier.
+
+        for s in (literal1, literal2):
+            if s is not None:
+
+                # Either strip the quoting or for empty regions, adopt the
+                # quote character.
+
+                if not doubling or len(s) > 2:
+                    identifier = s[1:-1]
+                elif doubling:
+                    identifier = s[0]
+
+        # Either continue or add an identifier, and indicate possible
+        # continuation.
+
+        if identifier:
+            if in_literal:
+                regions[-1] += identifier
+            else:
+                regions.append(identifier)
+            in_literal = True
+
+    return regions
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 setup.py
--- a/setup.py	Mon Nov 11 13:42:55 2013 +0100
+++ b/setup.py	Fri Dec 20 23:25:44 2013 +0100
@@ -11,5 +11,6 @@
     version      = "0.5",
     py_modules   = ["ContentTypeSupport", "DateSupport", "GeneralSupport",
                     "ItemSupport", "LocationSupport", "MoinDateSupport",
-                    "MoinRemoteSupport", "MoinSupport", "ViewSupport"]
+                    "MoinRemoteSupport", "MoinSupport", "TokenSupport",
+                    "ViewSupport"]
     )
diff -r 6b6b9a5ed8bc -r 8ccb413b7cf7 tests/test_tokens.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_tokens.py	Fri Dec 20 23:25:44 2013 +0100
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+from TokenSupport import getIdentifiers
+
+tests = [
+    (1, False, """'contains space'""",                 ["contains space"]),
+    (2, False, """contains space""",                   ["contains", "space"]),
+    (1, False, '''"Python's syntax"''',                ["Python's syntax"]),
+    (2, False, """Python''s syntax""",                 ["Pythons", "syntax"]),
+    (2, True,  """Python''s syntax""",                 ["Python's", "syntax"]),
+    (1, False, '''"Python's "'"intuitive" syntax' ''', ['''Python's "intuitive" syntax''']),
+    (1, False, '''"Python's "intuitive" syntax" ''',   ['''Python's intuitive syntax''']),
+    ]
+
+for n, doubling, s, e in tests:
+    l = getIdentifiers(s, doubling)
+    print l == e, l, "==", e, len(l) == n, len(l), "==", n, "<-", doubling, s
+
+# vim: tabstop=4 expandtab shiftwidth=4