1.1 --- a/actions/AddLinkToPage.py Sun Aug 15 18:31:26 2010 +0200
1.2 +++ b/actions/AddLinkToPage.py Mon Oct 04 01:03:07 2010 +0200
1.3 @@ -20,14 +20,58 @@
1.4
1.5 # Page parsing.
1.6
1.7 -macro_pattern = re.compile(ur'^(?P<leading>.*?)<<AddLinkToPage\((?P<identifier>[^\s,)]+).*?\)>>(?P<trailing>.*)$',
1.8 +macro_pattern = re.compile(
1.9 + ur'^(?P<leading>.*?)' # leading text on the line
1.10 + ur'<<AddLinkToPage\(' # macro prologue
1.11 + ur'(?P<identifier>[^\s,)]+).*?' # identifier
1.12 + ur'\)>>' # macro epilogue
1.13 + ur'(?P<trailing>.*)$', # trailing text on the line
1.14 re.MULTILINE | re.UNICODE)
1.15
1.16 # Link visiting and parsing.
1.17
1.18 -title_pattern = re.compile(ur'<(?P<tag>title|h\d)(\s.*?)?>(?P<title>.*?)</(?P=tag)>', re.MULTILINE | re.UNICODE | re.DOTALL)
1.19 -paragraph_pattern = re.compile(ur'<p(\s.*?)?>(?P<text>.*?)(?=<p(\s.*?)?>|</p>)', re.MULTILINE | re.UNICODE | re.DOTALL)
1.20 -tag_pattern = re.compile(ur'<.*?>', re.MULTILINE | re.UNICODE | re.DOTALL)
1.21 +def attr_pattern(agroup, attrname, qgroup, vgroup):
1.22 + return (
1.23 + ur'''(?P<%s>%s)''' # attribute name
1.24 + ur'''\s*=\s*''' # =
1.25 + ur'''(?P<%s>['"])''' # opening quote
1.26 + ur'''(?P<%s>.*?)''' # value
1.27 + ur'''(?P=%s)''' # closing quote
1.28 + % (agroup, attrname, qgroup, vgroup, qgroup)
1.29 + )
1.30 +
1.31 +meta_pattern = re.compile(
1.32 + ur'<meta'
1.33 + ur'([^>]*?'
1.34 + ur'('
1.35 + ur'(' + attr_pattern("nattr", "name", "nquote", "name") + ')'
1.36 + ur'|(' + attr_pattern("cattr", "content", "cquote", "content") + ')'
1.37 + ur')'
1.38 + ur')*'
1.39 + ur'[^>]*?>',
1.40 + re.MULTILINE | re.DOTALL)
1.41 +
1.42 +title_pattern = re.compile(
1.43 + ur'<(?P<tag>title|h\d)(\s.*?)?>'
1.44 + ur'(?P<title>.*?)'
1.45 + ur'</(?P=tag)>',
1.46 + re.MULTILINE | re.DOTALL)
1.47 +
1.48 +paragraph_pattern = re.compile(
1.49 + ur'<p(\s.*?)?>'
1.50 + ur'(?P<text>.*?)'
1.51 + ur'(?=<p(\s.*?)?>|</p>)',
1.52 + re.MULTILINE | re.DOTALL)
1.53 +
1.54 +tag_pattern = re.compile(
1.55 + ur'<.*?>',
1.56 + re.MULTILINE | re.DOTALL)
1.57 +
1.58 +def get_text(s):
1.59 + try:
1.60 + return unicode(s, "utf-8")
1.61 + except UnicodeError:
1.62 + return unicode(s, "iso-8859-1")
1.63
1.64 def get_link_info(link):
1.65
1.66 @@ -42,6 +86,25 @@
1.67
1.68 try:
1.69 s = f.read()
1.70 +
1.71 + # Look for metadata.
1.72 +
1.73 + title = None
1.74 + intro = None
1.75 +
1.76 + for meta_match in meta_pattern.finditer(s):
1.77 + name = meta_match.group("name")
1.78 + content = meta_match.group("content")
1.79 + if name == "title":
1.80 + title = content
1.81 + elif name == "description":
1.82 + intro = content
1.83 +
1.84 + if title and intro:
1.85 + return get_text(title), get_text(intro)
1.86 +
1.87 + # Look for titles/headings and accompanying text.
1.88 +
1.89 first_title = ""
1.90
1.91 for title_match in title_pattern.finditer(s):
1.92 @@ -54,11 +117,11 @@
1.93 for intro_match in paragraph_pattern.finditer(s[end:]):
1.94 intro = get_flattened_content(intro_match.group("text")).strip()
1.95 if intro:
1.96 - return title, intro
1.97 + return get_text(title), get_text(intro)
1.98 finally:
1.99 f.close()
1.100
1.101 - return first_title, ""
1.102 + return get_text(first_title), u""
1.103
1.104 def get_flattened_content(s):
1.105
1.106 @@ -71,7 +134,7 @@
1.107 l.append(s[last:start])
1.108 last = end
1.109 l.append(s[last:])
1.110 - return "".join(l).replace("\n", " ")
1.111 + return get_text("".join(l).replace("\n", " "))
1.112
1.113 # Action class and supporting functions.
1.114
1.115 @@ -116,7 +179,7 @@
1.116 "page_url" : wikiutil.quoteWikinameURL(page.page_name)
1.117 }
1.118
1.119 - html = '''
1.120 + html = u'''
1.121 <form class="macro" method="POST" action="%(script_name)s/%(page_url)s">
1.122 <input type="hidden" name="identifier" value="%(identifier)s" />
1.123 <input type="hidden" name="doit" value="1" />
1.124 @@ -201,7 +264,7 @@
1.125 # NOTE: Should support different formatting options.
1.126
1.127 link_details = "%s[[%s%s]]%s" % (
1.128 - introduction and (get_verbatim('"%s" ' % introduction)) or "",
1.129 + introduction and ('"%s" ' % get_verbatim(introduction)) or "",
1.130 link,
1.131 title and ('|%s' % title) or "",
1.132 description and (" - ''%s''" % description) or ""
2.1 --- a/macros/AddLinkToPage.py Sun Aug 15 18:31:26 2010 +0200
2.2 +++ b/macros/AddLinkToPage.py Mon Oct 04 01:03:07 2010 +0200
2.3 @@ -37,7 +37,10 @@
2.4
2.5 # The macro's identifier should always appear first.
2.6
2.7 - identifier = parsed_args[0]
2.8 + try:
2.9 + identifier = parsed_args[0]
2.10 + except IndexError, exc:
2.11 + return macro.format_error(exc)
2.12
2.13 # Look for keywords determining the action of the macro.
2.14