# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1383315696 -3600
# Node ID 04eb43f3cd34d0dfaa3512a210492ac5723cb408
# Parent  d785b3e054f85118d6d5897c7317736eec3c3410
Added wkhtmltopdf support; improved documentation of different tool options.

diff -r d785b3e054f8 -r 04eb43f3cd34 README.txt
--- a/README.txt	Fri Nov 01 01:45:00 2013 +0100
+++ b/README.txt	Fri Nov 01 15:21:36 2013 +0100
@@ -9,17 +9,33 @@
 
 The following configuration settings are present in the ExportPDF.py module:
 
-XSLT_PROCESSOR              Path to the xsltproc program
-FO_PROCESSOR                Path to the fop program
-DOCBOOK_STYLESHEET_BASE     Path to the directory containing DocBook resources
+PDF_EXPORT_MODE             Indicates the tool to use
+                            ("docbook", "wkhtmltopdf", "htmldoc")
+
+Depending on the above setting, the following settings may apply:
+
+For "docbook":
 
-Another setting that should not need modifying is the following:
+  XSLT_PROCESSOR            Path to the xsltproc program
+  FO_PROCESSOR              Path to the fop program
+  DOCBOOK_STYLESHEET_BASE   Path to the directory containing DocBook resources
 
-DOCBOOK_TO_FO_STYLESHEET    Relative path to the docbook.xsl stylesheet file
+  Another setting that should not need modifying is the following:
+
+  DOCBOOK_TO_FO_STYLESHEET  Relative path to the docbook.xsl stylesheet file
                             from the DOCBOOK_STYLESHEET_BASE path (putting
                             them together should reference the file with an
                             absolute path)
 
+For "wkhtmltopdf":
+
+  XVFB_WRAPPER              Path to the xvfb-run program
+  WKHTMLTOPDF_PROCESSOR     Path to the wkhtmltopdf program
+
+For "htmldoc":
+
+  HTMLDOC_PROCESSOR         Path to the htmldoc program
+
 Once configured, copy the ExportPDF.py module into your wiki's actions
 directory.
 
@@ -32,8 +48,34 @@
 Basic Usage
 -----------
 
-Select the ExportPDF action from the actions menu; a PDF document should be
-offered for download.
+Select the ExportPDF action from the actions menu. After choosing a paper/page
+size, a PDF document should be offered for download.
+
+Choosing a Processor
+--------------------
+
+The mode used in the action, indicated using the PDF_EXPORT_MODE setting,
+determines which processor or processing toolchain will be used to generate
+PDF documents. Different processors have different advantages and
+disadvantages and these are summarised below.
+
+The "docbook" mode relies on Apache FOP which is a Java-based solution. This
+obviously demands a functioning Java runtime environment, and the process of
+setting up such an environment can be a chore. Moreover, the speed of the
+resulting solution is not necessarily impressive, although the output is
+better than the other processors.
+
+The "wkhtmltopdf" mode relies on a virtual X server and a WebKit-based tool,
+and the installation of such packages is likely to be much more convenient if
+they are available in your operating system distribution. The output suffers
+from being generated from wiki page HTML and problems with the wkhtmltopdf
+tool itself such as clumsy pagination.
+
+The "htmldoc" mode relies only on a single program, but this program does not
+support UTF-8 content and also suffers from having to generate PDF output from
+wiki page HTML.
+
+In summary, the "docbook" mode is by far the recommended solution.
 
 Recommended Software
 --------------------
@@ -50,7 +92,8 @@
 Dependencies
 ------------
 
-The ExportPDF action has the following basic dependencies:
+The ExportPDF action has the following basic dependencies when used in
+"docbook" mode:
 
 Packages                    Release Information
 --------                    -------------------
@@ -72,7 +115,29 @@
                             Source: http://www.oracle.com/technetwork/java/index.html
 
 The Java dependency is unfortunate and would ideally be avoided by using
-something other than Apache FOP.
+something other than Apache FOP to convert XSL-FO content to PDF.
+
+When used in the "wkhtmltopdf" mode, the following dependencies apply:
+
+Packages                    Release Information
+--------                    -------------------
+
+wkhtmltopdf                 Tested with 0.9.9
+                            Debian package: wkhtmltopdf
+                            Source: https://code.google.com/p/wkhtmltopdf/
+
+xvfb                        Tested with 1.12.4
+                            Debian package: xvfb
+                            Source: http://www.x.org/
+
+When used in the "htmldoc" mode, the following dependencies apply:
+
+Packages                    Release Information
+--------                    -------------------
+
+htmldoc                     Tested with 1.8.27
+                            Debian package: htmldoc
+                            Source: http://www.htmldoc.org/
 
 Contact, Copyright and Licence Information
 ------------------------------------------
diff -r d785b3e054f8 -r 04eb43f3cd34 actions/ExportPDF.py
--- a/actions/ExportPDF.py	Fri Nov 01 01:45:00 2013 +0100
+++ b/actions/ExportPDF.py	Fri Nov 01 15:21:36 2013 +0100
@@ -21,6 +21,7 @@
 # Choose one value for the export mode.
 
 PDF_EXPORT_MODE             = "docbook"
+#PDF_EXPORT_MODE             = "wkhtmltopdf"
 #PDF_EXPORT_MODE             = "htmldoc"
 
 # Settings for "docbook" mode.
@@ -33,6 +34,11 @@
 
 DOCBOOK_TO_FO_STYLESHEET    = "docbook-xsl/fo/docbook.xsl"
 
+# Settings for "wkhtmltopdf" mode.
+
+XVFB_WRAPPER                = "/usr/bin/xvfb-run"
+WKHTMLTOPDF_PROCESSOR       = "/usr/bin/wkhtmltopdf"
+
 # Settings for "htmldoc" mode.
 
 HTMLDOC_PROCESSOR           = "/usr/bin/htmldoc"
@@ -54,6 +60,12 @@
     "2A0" : "Double A0"
     }
 
+wkhtmltopdf_paper_sizes = [
+    "A4", "Letter"
+    ]
+
+wkhtmltopdf_paper_size_labels = {}
+
 # NOTE: From the htmldoc man page.
 
 htmldoc_paper_sizes = [
@@ -67,6 +79,18 @@
     "universal" : "US universal"
     }
 
+paper_sizes = {
+    "docbook" : docbook_paper_sizes,
+    "wkhtmltopdf" : wkhtmltopdf_paper_sizes,
+    "htmldoc" : htmldoc_paper_sizes
+    }
+
+paper_size_labels = {
+    "docbook" : docbook_paper_size_labels,
+    "wkhtmltopdf" : wkhtmltopdf_paper_size_labels,
+    "htmldoc" : htmldoc_paper_size_labels
+    }
+
 class ExportPDF(ActionBase, ActionSupport):
 
     "Export the current page as PDF."
@@ -74,16 +98,10 @@
     mode = PDF_EXPORT_MODE
 
     def _get_paper_sizes(self):
-        if self.mode == "docbook":
-            return docbook_paper_sizes
-        else:
-            return htmldoc_paper_sizes
+        return paper_sizes.get(self.mode)
 
     def _get_paper_size_labels(self):
-        if self.mode == "docbook":
-            return docbook_paper_size_labels
-        else:
-            return htmldoc_paper_size_labels
+        return paper_size_labels.get(self.mode)
 
     def get_form_html(self, buttons_html):
 
@@ -96,9 +114,9 @@
         paper_size = form.get("paper-size", ["A4"])[0]
 
         paper_size_options = []
-        paper_size_labels = self._get_paper_size_labels()
+        paper_size_labels = self._get_paper_size_labels() or {}
 
-        for size in self._get_paper_sizes():
+        for size in self._get_paper_sizes() or []:
             paper_size_options.append('<option value="%s" %s>%s</option>' % (
                 escattr(size), self._get_selected(size, paper_size),
                 escape(_(paper_size_labels.get(size) or size))
@@ -132,23 +150,25 @@
 
         paper_size = form.get("paper-size", [""])[0]
 
-        if not paper_size in self._get_paper_sizes():
+        if not paper_size in self._get_paper_sizes() or []:
             return 0, _("A paper size must be chosen.")
 
         if self.mode == "docbook":
             return self._export_using_docbook(paper_size)
+        elif self.mode == "wkhtmltopdf":
+            return self._export_using_wkhtmltopdf(paper_size)
         elif self.mode == "htmldoc":
             return self._export_using_htmldoc(paper_size)
         else:
             return 0, _("The action must be configured to use a particular PDF generation tool.")
 
-    def _export_using_htmldoc(self, paper_size):
+    def _get_page_as_html(self):
+
+        "Get the page in HTML format."
 
         request = self.request
         page = self.page
 
-        # Get the page in HTML format.
-
         fmt = getFormatterClass(request, "text_html")(request)
         fmt.setPage(page)
 
@@ -168,41 +188,15 @@
 </html>
 """)
 
-        # Send the HTML to the htmldoc processor.
-
-        os.environ["HTMLDOC_NOCGI"] = "1"
-
-        p = subprocess.Popen([
-            HTMLDOC_PROCESSOR,
-            "-t", "pdf", "--quiet", "--webpage",
-            "--size", paper_size,
-            "-"
-            ],
-            shell=False,
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
+        return u"".join(page_as_html)
 
-        writer = codecs.getwriter("utf-8")(p.stdin)
-        writer.write(u"".join(page_as_html))
-
-        out, err = p.communicate()
-
-        retcode = p.wait()
+    def _get_page_as_docbook(self):
 
-        if retcode != 0:
-            return 0, err
-
-        self._write_pdf(out)
-        return 1, None
-
-    def _export_using_docbook(self, paper_size):
+        "Get the page in DocBook format."
 
         request = self.request
         page = self.page
 
-        # Get the page in DocBook format.
-
         fmt = getFormatterClass(request, "text_docbook")(request)
         fmt.setPage(page)
 
@@ -217,7 +211,75 @@
         append(fmt.endContent())
         append(fmt.endDocument())
 
-        # Send the DocBook XML to the XSLT processor.
+        return "".join(page_as_docbook)
+
+    def _write_pdf_for_html(self, p, page_as_html):
+
+        """
+        Write to the process 'p', the HTML for the page, reading the PDF output
+        from the process and writing it to the browser.
+        """
+
+        writer = codecs.getwriter("utf-8")(p.stdin)
+        writer.write(page_as_html)
+
+        out, err = p.communicate()
+
+        retcode = p.wait()
+
+        if retcode != 0:
+            return 0, err
+
+        self._write_pdf(out)
+        return 1, None
+
+    def _export_using_wkhtmltopdf(self, paper_size):
+
+        """
+        Send the page HTML to the processor, indicating the given 'paper_size'.
+        """
+
+        p = subprocess.Popen([
+            XVFB_WRAPPER, "--",
+            WKHTMLTOPDF_PROCESSOR,
+            "--page-size", paper_size,
+            "-",
+            "-"
+            ],
+            shell=False,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE)
+
+        return self._write_pdf_for_html(p, self._get_page_as_html())
+
+    def _export_using_htmldoc(self, paper_size):
+
+        """
+        Send the page HTML to the processor, indicating the given 'paper_size'.
+        """
+
+        os.environ["HTMLDOC_NOCGI"] = "1"
+
+        p = subprocess.Popen([
+            HTMLDOC_PROCESSOR,
+            "-t", "pdf", "--quiet", "--webpage",
+            "--size", paper_size,
+            "-"
+            ],
+            shell=False,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE)
+
+        return self._write_pdf_for_html(p, self._get_page_as_html())
+
+    def _export_using_docbook(self, paper_size):
+
+        """
+        Send the page DocBook XML to the processor, indicating the given
+        'paper_size'.
+        """
 
         p1 = subprocess.Popen([
             XSLT_PROCESSOR,
@@ -231,7 +293,7 @@
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
 
-        p1.stdin.write("".join(page_as_docbook))
+        p1.stdin.write(self._get_page_as_docbook())
         p1.stdin.close()
 
         # Pipe the XML-FO output to the FO processor.