# HG changeset patch # User Paul Boddie # Date 1383315696 -3600 # Node ID 04eb43f3cd34d0dfaa3512a210492ac5723cb408 # Parent d785b3e054f85118d6d5897c7317736eec3c3410 Added wkhtmltopdf support; improved documentation of different tool options. diff -r d785b3e054f8 -r 04eb43f3cd34 README.txt --- a/README.txt Fri Nov 01 01:45:00 2013 +0100 +++ b/README.txt Fri Nov 01 15:21:36 2013 +0100 @@ -9,17 +9,33 @@ The following configuration settings are present in the ExportPDF.py module: -XSLT_PROCESSOR Path to the xsltproc program -FO_PROCESSOR Path to the fop program -DOCBOOK_STYLESHEET_BASE Path to the directory containing DocBook resources +PDF_EXPORT_MODE Indicates the tool to use + ("docbook", "wkhtmltopdf", "htmldoc") + +Depending on the above setting, the following settings may apply: + +For "docbook": -Another setting that should not need modifying is the following: + XSLT_PROCESSOR Path to the xsltproc program + FO_PROCESSOR Path to the fop program + DOCBOOK_STYLESHEET_BASE Path to the directory containing DocBook resources -DOCBOOK_TO_FO_STYLESHEET Relative path to the docbook.xsl stylesheet file + Another setting that should not need modifying is the following: + + DOCBOOK_TO_FO_STYLESHEET Relative path to the docbook.xsl stylesheet file from the DOCBOOK_STYLESHEET_BASE path (putting them together should reference the file with an absolute path) +For "wkhtmltopdf": + + XVFB_WRAPPER Path to the xvfb-run program + WKHTMLTOPDF_PROCESSOR Path to the wkhtmltopdf program + +For "htmldoc": + + HTMLDOC_PROCESSOR Path to the htmldoc program + Once configured, copy the ExportPDF.py module into your wiki's actions directory. @@ -32,8 +48,34 @@ Basic Usage ----------- -Select the ExportPDF action from the actions menu; a PDF document should be -offered for download. +Select the ExportPDF action from the actions menu. After choosing a paper/page +size, a PDF document should be offered for download. + +Choosing a Processor +-------------------- + +The mode used in the action, indicated using the PDF_EXPORT_MODE setting, +determines which processor or processing toolchain will be used to generate +PDF documents. Different processors have different advantages and +disadvantages and these are summarised below. + +The "docbook" mode relies on Apache FOP which is a Java-based solution. This +obviously demands a functioning Java runtime environment, and the process of +setting up such an environment can be a chore. Moreover, the speed of the +resulting solution is not necessarily impressive, although the output is +better than the other processors. + +The "wkhtmltopdf" mode relies on a virtual X server and a WebKit-based tool, +and the installation of such packages is likely to be much more convenient if +they are available in your operating system distribution. The output suffers +from being generated from wiki page HTML and problems with the wkhtmltopdf +tool itself such as clumsy pagination. + +The "htmldoc" mode relies only on a single program, but this program does not +support UTF-8 content and also suffers from having to generate PDF output from +wiki page HTML. + +In summary, the "docbook" mode is by far the recommended solution. Recommended Software -------------------- @@ -50,7 +92,8 @@ Dependencies ------------ -The ExportPDF action has the following basic dependencies: +The ExportPDF action has the following basic dependencies when used in +"docbook" mode: Packages Release Information -------- ------------------- @@ -72,7 +115,29 @@ Source: http://www.oracle.com/technetwork/java/index.html The Java dependency is unfortunate and would ideally be avoided by using -something other than Apache FOP. +something other than Apache FOP to convert XSL-FO content to PDF. + +When used in the "wkhtmltopdf" mode, the following dependencies apply: + +Packages Release Information +-------- ------------------- + +wkhtmltopdf Tested with 0.9.9 + Debian package: wkhtmltopdf + Source: https://code.google.com/p/wkhtmltopdf/ + +xvfb Tested with 1.12.4 + Debian package: xvfb + Source: http://www.x.org/ + +When used in the "htmldoc" mode, the following dependencies apply: + +Packages Release Information +-------- ------------------- + +htmldoc Tested with 1.8.27 + Debian package: htmldoc + Source: http://www.htmldoc.org/ Contact, Copyright and Licence Information ------------------------------------------ diff -r d785b3e054f8 -r 04eb43f3cd34 actions/ExportPDF.py --- a/actions/ExportPDF.py Fri Nov 01 01:45:00 2013 +0100 +++ b/actions/ExportPDF.py Fri Nov 01 15:21:36 2013 +0100 @@ -21,6 +21,7 @@ # Choose one value for the export mode. PDF_EXPORT_MODE = "docbook" +#PDF_EXPORT_MODE = "wkhtmltopdf" #PDF_EXPORT_MODE = "htmldoc" # Settings for "docbook" mode. @@ -33,6 +34,11 @@ DOCBOOK_TO_FO_STYLESHEET = "docbook-xsl/fo/docbook.xsl" +# Settings for "wkhtmltopdf" mode. + +XVFB_WRAPPER = "/usr/bin/xvfb-run" +WKHTMLTOPDF_PROCESSOR = "/usr/bin/wkhtmltopdf" + # Settings for "htmldoc" mode. HTMLDOC_PROCESSOR = "/usr/bin/htmldoc" @@ -54,6 +60,12 @@ "2A0" : "Double A0" } +wkhtmltopdf_paper_sizes = [ + "A4", "Letter" + ] + +wkhtmltopdf_paper_size_labels = {} + # NOTE: From the htmldoc man page. htmldoc_paper_sizes = [ @@ -67,6 +79,18 @@ "universal" : "US universal" } +paper_sizes = { + "docbook" : docbook_paper_sizes, + "wkhtmltopdf" : wkhtmltopdf_paper_sizes, + "htmldoc" : htmldoc_paper_sizes + } + +paper_size_labels = { + "docbook" : docbook_paper_size_labels, + "wkhtmltopdf" : wkhtmltopdf_paper_size_labels, + "htmldoc" : htmldoc_paper_size_labels + } + class ExportPDF(ActionBase, ActionSupport): "Export the current page as PDF." @@ -74,16 +98,10 @@ mode = PDF_EXPORT_MODE def _get_paper_sizes(self): - if self.mode == "docbook": - return docbook_paper_sizes - else: - return htmldoc_paper_sizes + return paper_sizes.get(self.mode) def _get_paper_size_labels(self): - if self.mode == "docbook": - return docbook_paper_size_labels - else: - return htmldoc_paper_size_labels + return paper_size_labels.get(self.mode) def get_form_html(self, buttons_html): @@ -96,9 +114,9 @@ paper_size = form.get("paper-size", ["A4"])[0] paper_size_options = [] - paper_size_labels = self._get_paper_size_labels() + paper_size_labels = self._get_paper_size_labels() or {} - for size in self._get_paper_sizes(): + for size in self._get_paper_sizes() or []: paper_size_options.append('' % ( escattr(size), self._get_selected(size, paper_size), escape(_(paper_size_labels.get(size) or size)) @@ -132,23 +150,25 @@ paper_size = form.get("paper-size", [""])[0] - if not paper_size in self._get_paper_sizes(): + if not paper_size in self._get_paper_sizes() or []: return 0, _("A paper size must be chosen.") if self.mode == "docbook": return self._export_using_docbook(paper_size) + elif self.mode == "wkhtmltopdf": + return self._export_using_wkhtmltopdf(paper_size) elif self.mode == "htmldoc": return self._export_using_htmldoc(paper_size) else: return 0, _("The action must be configured to use a particular PDF generation tool.") - def _export_using_htmldoc(self, paper_size): + def _get_page_as_html(self): + + "Get the page in HTML format." request = self.request page = self.page - # Get the page in HTML format. - fmt = getFormatterClass(request, "text_html")(request) fmt.setPage(page) @@ -168,41 +188,15 @@ """) - # Send the HTML to the htmldoc processor. - - os.environ["HTMLDOC_NOCGI"] = "1" - - p = subprocess.Popen([ - HTMLDOC_PROCESSOR, - "-t", "pdf", "--quiet", "--webpage", - "--size", paper_size, - "-" - ], - shell=False, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + return u"".join(page_as_html) - writer = codecs.getwriter("utf-8")(p.stdin) - writer.write(u"".join(page_as_html)) - - out, err = p.communicate() - - retcode = p.wait() + def _get_page_as_docbook(self): - if retcode != 0: - return 0, err - - self._write_pdf(out) - return 1, None - - def _export_using_docbook(self, paper_size): + "Get the page in DocBook format." request = self.request page = self.page - # Get the page in DocBook format. - fmt = getFormatterClass(request, "text_docbook")(request) fmt.setPage(page) @@ -217,7 +211,75 @@ append(fmt.endContent()) append(fmt.endDocument()) - # Send the DocBook XML to the XSLT processor. + return "".join(page_as_docbook) + + def _write_pdf_for_html(self, p, page_as_html): + + """ + Write to the process 'p', the HTML for the page, reading the PDF output + from the process and writing it to the browser. + """ + + writer = codecs.getwriter("utf-8")(p.stdin) + writer.write(page_as_html) + + out, err = p.communicate() + + retcode = p.wait() + + if retcode != 0: + return 0, err + + self._write_pdf(out) + return 1, None + + def _export_using_wkhtmltopdf(self, paper_size): + + """ + Send the page HTML to the processor, indicating the given 'paper_size'. + """ + + p = subprocess.Popen([ + XVFB_WRAPPER, "--", + WKHTMLTOPDF_PROCESSOR, + "--page-size", paper_size, + "-", + "-" + ], + shell=False, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + return self._write_pdf_for_html(p, self._get_page_as_html()) + + def _export_using_htmldoc(self, paper_size): + + """ + Send the page HTML to the processor, indicating the given 'paper_size'. + """ + + os.environ["HTMLDOC_NOCGI"] = "1" + + p = subprocess.Popen([ + HTMLDOC_PROCESSOR, + "-t", "pdf", "--quiet", "--webpage", + "--size", paper_size, + "-" + ], + shell=False, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + return self._write_pdf_for_html(p, self._get_page_as_html()) + + def _export_using_docbook(self, paper_size): + + """ + Send the page DocBook XML to the processor, indicating the given + 'paper_size'. + """ p1 = subprocess.Popen([ XSLT_PROCESSOR, @@ -231,7 +293,7 @@ stdout=subprocess.PIPE, stderr=subprocess.PIPE) - p1.stdin.write("".join(page_as_docbook)) + p1.stdin.write(self._get_page_as_docbook()) p1.stdin.close() # Pipe the XML-FO output to the FO processor.