1.1 --- a/README.txt Fri Nov 01 01:45:00 2013 +0100
1.2 +++ b/README.txt Fri Nov 01 15:21:36 2013 +0100
1.3 @@ -9,17 +9,33 @@
1.4
1.5 The following configuration settings are present in the ExportPDF.py module:
1.6
1.7 -XSLT_PROCESSOR Path to the xsltproc program
1.8 -FO_PROCESSOR Path to the fop program
1.9 -DOCBOOK_STYLESHEET_BASE Path to the directory containing DocBook resources
1.10 +PDF_EXPORT_MODE Indicates the tool to use
1.11 + ("docbook", "wkhtmltopdf", "htmldoc")
1.12 +
1.13 +Depending on the above setting, the following settings may apply:
1.14 +
1.15 +For "docbook":
1.16
1.17 -Another setting that should not need modifying is the following:
1.18 + XSLT_PROCESSOR Path to the xsltproc program
1.19 + FO_PROCESSOR Path to the fop program
1.20 + DOCBOOK_STYLESHEET_BASE Path to the directory containing DocBook resources
1.21
1.22 -DOCBOOK_TO_FO_STYLESHEET Relative path to the docbook.xsl stylesheet file
1.23 + Another setting that should not need modifying is the following:
1.24 +
1.25 + DOCBOOK_TO_FO_STYLESHEET Relative path to the docbook.xsl stylesheet file
1.26 from the DOCBOOK_STYLESHEET_BASE path (putting
1.27 them together should reference the file with an
1.28 absolute path)
1.29
1.30 +For "wkhtmltopdf":
1.31 +
1.32 + XVFB_WRAPPER Path to the xvfb-run program
1.33 + WKHTMLTOPDF_PROCESSOR Path to the wkhtmltopdf program
1.34 +
1.35 +For "htmldoc":
1.36 +
1.37 + HTMLDOC_PROCESSOR Path to the htmldoc program
1.38 +
1.39 Once configured, copy the ExportPDF.py module into your wiki's actions
1.40 directory.
1.41
1.42 @@ -32,8 +48,34 @@
1.43 Basic Usage
1.44 -----------
1.45
1.46 -Select the ExportPDF action from the actions menu; a PDF document should be
1.47 -offered for download.
1.48 +Select the ExportPDF action from the actions menu. After choosing a paper/page
1.49 +size, a PDF document should be offered for download.
1.50 +
1.51 +Choosing a Processor
1.52 +--------------------
1.53 +
1.54 +The mode used in the action, indicated using the PDF_EXPORT_MODE setting,
1.55 +determines which processor or processing toolchain will be used to generate
1.56 +PDF documents. Different processors have different advantages and
1.57 +disadvantages and these are summarised below.
1.58 +
1.59 +The "docbook" mode relies on Apache FOP which is a Java-based solution. This
1.60 +obviously demands a functioning Java runtime environment, and the process of
1.61 +setting up such an environment can be a chore. Moreover, the speed of the
1.62 +resulting solution is not necessarily impressive, although the output is
1.63 +better than the other processors.
1.64 +
1.65 +The "wkhtmltopdf" mode relies on a virtual X server and a WebKit-based tool,
1.66 +and the installation of such packages is likely to be much more convenient if
1.67 +they are available in your operating system distribution. The output suffers
1.68 +from being generated from wiki page HTML and problems with the wkhtmltopdf
1.69 +tool itself such as clumsy pagination.
1.70 +
1.71 +The "htmldoc" mode relies only on a single program, but this program does not
1.72 +support UTF-8 content and also suffers from having to generate PDF output from
1.73 +wiki page HTML.
1.74 +
1.75 +In summary, the "docbook" mode is by far the recommended solution.
1.76
1.77 Recommended Software
1.78 --------------------
1.79 @@ -50,7 +92,8 @@
1.80 Dependencies
1.81 ------------
1.82
1.83 -The ExportPDF action has the following basic dependencies:
1.84 +The ExportPDF action has the following basic dependencies when used in
1.85 +"docbook" mode:
1.86
1.87 Packages Release Information
1.88 -------- -------------------
1.89 @@ -72,7 +115,29 @@
1.90 Source: http://www.oracle.com/technetwork/java/index.html
1.91
1.92 The Java dependency is unfortunate and would ideally be avoided by using
1.93 -something other than Apache FOP.
1.94 +something other than Apache FOP to convert XSL-FO content to PDF.
1.95 +
1.96 +When used in the "wkhtmltopdf" mode, the following dependencies apply:
1.97 +
1.98 +Packages Release Information
1.99 +-------- -------------------
1.100 +
1.101 +wkhtmltopdf Tested with 0.9.9
1.102 + Debian package: wkhtmltopdf
1.103 + Source: https://code.google.com/p/wkhtmltopdf/
1.104 +
1.105 +xvfb Tested with 1.12.4
1.106 + Debian package: xvfb
1.107 + Source: http://www.x.org/
1.108 +
1.109 +When used in the "htmldoc" mode, the following dependencies apply:
1.110 +
1.111 +Packages Release Information
1.112 +-------- -------------------
1.113 +
1.114 +htmldoc Tested with 1.8.27
1.115 + Debian package: htmldoc
1.116 + Source: http://www.htmldoc.org/
1.117
1.118 Contact, Copyright and Licence Information
1.119 ------------------------------------------
2.1 --- a/actions/ExportPDF.py Fri Nov 01 01:45:00 2013 +0100
2.2 +++ b/actions/ExportPDF.py Fri Nov 01 15:21:36 2013 +0100
2.3 @@ -21,6 +21,7 @@
2.4 # Choose one value for the export mode.
2.5
2.6 PDF_EXPORT_MODE = "docbook"
2.7 +#PDF_EXPORT_MODE = "wkhtmltopdf"
2.8 #PDF_EXPORT_MODE = "htmldoc"
2.9
2.10 # Settings for "docbook" mode.
2.11 @@ -33,6 +34,11 @@
2.12
2.13 DOCBOOK_TO_FO_STYLESHEET = "docbook-xsl/fo/docbook.xsl"
2.14
2.15 +# Settings for "wkhtmltopdf" mode.
2.16 +
2.17 +XVFB_WRAPPER = "/usr/bin/xvfb-run"
2.18 +WKHTMLTOPDF_PROCESSOR = "/usr/bin/wkhtmltopdf"
2.19 +
2.20 # Settings for "htmldoc" mode.
2.21
2.22 HTMLDOC_PROCESSOR = "/usr/bin/htmldoc"
2.23 @@ -54,6 +60,12 @@
2.24 "2A0" : "Double A0"
2.25 }
2.26
2.27 +wkhtmltopdf_paper_sizes = [
2.28 + "A4", "Letter"
2.29 + ]
2.30 +
2.31 +wkhtmltopdf_paper_size_labels = {}
2.32 +
2.33 # NOTE: From the htmldoc man page.
2.34
2.35 htmldoc_paper_sizes = [
2.36 @@ -67,6 +79,18 @@
2.37 "universal" : "US universal"
2.38 }
2.39
2.40 +paper_sizes = {
2.41 + "docbook" : docbook_paper_sizes,
2.42 + "wkhtmltopdf" : wkhtmltopdf_paper_sizes,
2.43 + "htmldoc" : htmldoc_paper_sizes
2.44 + }
2.45 +
2.46 +paper_size_labels = {
2.47 + "docbook" : docbook_paper_size_labels,
2.48 + "wkhtmltopdf" : wkhtmltopdf_paper_size_labels,
2.49 + "htmldoc" : htmldoc_paper_size_labels
2.50 + }
2.51 +
2.52 class ExportPDF(ActionBase, ActionSupport):
2.53
2.54 "Export the current page as PDF."
2.55 @@ -74,16 +98,10 @@
2.56 mode = PDF_EXPORT_MODE
2.57
2.58 def _get_paper_sizes(self):
2.59 - if self.mode == "docbook":
2.60 - return docbook_paper_sizes
2.61 - else:
2.62 - return htmldoc_paper_sizes
2.63 + return paper_sizes.get(self.mode)
2.64
2.65 def _get_paper_size_labels(self):
2.66 - if self.mode == "docbook":
2.67 - return docbook_paper_size_labels
2.68 - else:
2.69 - return htmldoc_paper_size_labels
2.70 + return paper_size_labels.get(self.mode)
2.71
2.72 def get_form_html(self, buttons_html):
2.73
2.74 @@ -96,9 +114,9 @@
2.75 paper_size = form.get("paper-size", ["A4"])[0]
2.76
2.77 paper_size_options = []
2.78 - paper_size_labels = self._get_paper_size_labels()
2.79 + paper_size_labels = self._get_paper_size_labels() or {}
2.80
2.81 - for size in self._get_paper_sizes():
2.82 + for size in self._get_paper_sizes() or []:
2.83 paper_size_options.append('<option value="%s" %s>%s</option>' % (
2.84 escattr(size), self._get_selected(size, paper_size),
2.85 escape(_(paper_size_labels.get(size) or size))
2.86 @@ -132,23 +150,25 @@
2.87
2.88 paper_size = form.get("paper-size", [""])[0]
2.89
2.90 - if not paper_size in self._get_paper_sizes():
2.91 + if not paper_size in self._get_paper_sizes() or []:
2.92 return 0, _("A paper size must be chosen.")
2.93
2.94 if self.mode == "docbook":
2.95 return self._export_using_docbook(paper_size)
2.96 + elif self.mode == "wkhtmltopdf":
2.97 + return self._export_using_wkhtmltopdf(paper_size)
2.98 elif self.mode == "htmldoc":
2.99 return self._export_using_htmldoc(paper_size)
2.100 else:
2.101 return 0, _("The action must be configured to use a particular PDF generation tool.")
2.102
2.103 - def _export_using_htmldoc(self, paper_size):
2.104 + def _get_page_as_html(self):
2.105 +
2.106 + "Get the page in HTML format."
2.107
2.108 request = self.request
2.109 page = self.page
2.110
2.111 - # Get the page in HTML format.
2.112 -
2.113 fmt = getFormatterClass(request, "text_html")(request)
2.114 fmt.setPage(page)
2.115
2.116 @@ -168,41 +188,15 @@
2.117 </html>
2.118 """)
2.119
2.120 - # Send the HTML to the htmldoc processor.
2.121 -
2.122 - os.environ["HTMLDOC_NOCGI"] = "1"
2.123 -
2.124 - p = subprocess.Popen([
2.125 - HTMLDOC_PROCESSOR,
2.126 - "-t", "pdf", "--quiet", "--webpage",
2.127 - "--size", paper_size,
2.128 - "-"
2.129 - ],
2.130 - shell=False,
2.131 - stdin=subprocess.PIPE,
2.132 - stdout=subprocess.PIPE,
2.133 - stderr=subprocess.PIPE)
2.134 + return u"".join(page_as_html)
2.135
2.136 - writer = codecs.getwriter("utf-8")(p.stdin)
2.137 - writer.write(u"".join(page_as_html))
2.138 -
2.139 - out, err = p.communicate()
2.140 -
2.141 - retcode = p.wait()
2.142 + def _get_page_as_docbook(self):
2.143
2.144 - if retcode != 0:
2.145 - return 0, err
2.146 -
2.147 - self._write_pdf(out)
2.148 - return 1, None
2.149 -
2.150 - def _export_using_docbook(self, paper_size):
2.151 + "Get the page in DocBook format."
2.152
2.153 request = self.request
2.154 page = self.page
2.155
2.156 - # Get the page in DocBook format.
2.157 -
2.158 fmt = getFormatterClass(request, "text_docbook")(request)
2.159 fmt.setPage(page)
2.160
2.161 @@ -217,7 +211,75 @@
2.162 append(fmt.endContent())
2.163 append(fmt.endDocument())
2.164
2.165 - # Send the DocBook XML to the XSLT processor.
2.166 + return "".join(page_as_docbook)
2.167 +
2.168 + def _write_pdf_for_html(self, p, page_as_html):
2.169 +
2.170 + """
2.171 + Write to the process 'p', the HTML for the page, reading the PDF output
2.172 + from the process and writing it to the browser.
2.173 + """
2.174 +
2.175 + writer = codecs.getwriter("utf-8")(p.stdin)
2.176 + writer.write(page_as_html)
2.177 +
2.178 + out, err = p.communicate()
2.179 +
2.180 + retcode = p.wait()
2.181 +
2.182 + if retcode != 0:
2.183 + return 0, err
2.184 +
2.185 + self._write_pdf(out)
2.186 + return 1, None
2.187 +
2.188 + def _export_using_wkhtmltopdf(self, paper_size):
2.189 +
2.190 + """
2.191 + Send the page HTML to the processor, indicating the given 'paper_size'.
2.192 + """
2.193 +
2.194 + p = subprocess.Popen([
2.195 + XVFB_WRAPPER, "--",
2.196 + WKHTMLTOPDF_PROCESSOR,
2.197 + "--page-size", paper_size,
2.198 + "-",
2.199 + "-"
2.200 + ],
2.201 + shell=False,
2.202 + stdin=subprocess.PIPE,
2.203 + stdout=subprocess.PIPE,
2.204 + stderr=subprocess.PIPE)
2.205 +
2.206 + return self._write_pdf_for_html(p, self._get_page_as_html())
2.207 +
2.208 + def _export_using_htmldoc(self, paper_size):
2.209 +
2.210 + """
2.211 + Send the page HTML to the processor, indicating the given 'paper_size'.
2.212 + """
2.213 +
2.214 + os.environ["HTMLDOC_NOCGI"] = "1"
2.215 +
2.216 + p = subprocess.Popen([
2.217 + HTMLDOC_PROCESSOR,
2.218 + "-t", "pdf", "--quiet", "--webpage",
2.219 + "--size", paper_size,
2.220 + "-"
2.221 + ],
2.222 + shell=False,
2.223 + stdin=subprocess.PIPE,
2.224 + stdout=subprocess.PIPE,
2.225 + stderr=subprocess.PIPE)
2.226 +
2.227 + return self._write_pdf_for_html(p, self._get_page_as_html())
2.228 +
2.229 + def _export_using_docbook(self, paper_size):
2.230 +
2.231 + """
2.232 + Send the page DocBook XML to the processor, indicating the given
2.233 + 'paper_size'.
2.234 + """
2.235
2.236 p1 = subprocess.Popen([
2.237 XSLT_PROCESSOR,
2.238 @@ -231,7 +293,7 @@
2.239 stdout=subprocess.PIPE,
2.240 stderr=subprocess.PIPE)
2.241
2.242 - p1.stdin.write("".join(page_as_docbook))
2.243 + p1.stdin.write(self._get_page_as_docbook())
2.244 p1.stdin.close()
2.245
2.246 # Pipe the XML-FO output to the FO processor.