Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDF generation #505

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pantheon-bundle/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@
com.redhat.pantheon.use
</Sling-Model-Packages>
<Import-Package>
javax.security.auth.x500,
javax.security.cert,
!jnr.a64asm,!jnr.x86asm,!org.apache.bsf,!org.apache.bsf.util,
!org.apache.tools.ant,!org.joda.convert,!org.objectweb.asm,!sun.misc,
!javax.ejb,
Expand Down Expand Up @@ -276,6 +278,11 @@
<artifactId>asciidoctorj</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.asciidoctor</groupId>
<artifactId>asciidoctorj-pdf</artifactId>
<version>1.5.4</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Locale;
Expand Down Expand Up @@ -134,6 +141,47 @@ public String getDocumentHtml(@Nonnull Document document,
return html;
}

public InputStream getDocumentPdf(@Nonnull Document document,
@Nonnull Locale locale,
@Nonnull String variantName,
boolean draft,
Map<String, Object> context,
boolean forceRegen) throws IOException {
Child<? extends DocumentVariant> traversal = document.locale(locale)
.toChild(DocumentLocale::variants)
.toChild(variants -> variants.variant(variantName));

Optional<? extends DocumentVersion> moduleVersion;
if (draft) {
moduleVersion =
traversal.toChild(DocumentVariant::draft)
.asOptional();
} else {
moduleVersion =
traversal.toChild(DocumentVariant::released)
.asOptional();
}

InputStream pdf;
// If regeneration is forced, the content doesn't exist yet, or it needs generation because the original
// asciidoc has changed,
// then generate and save it
// TODO To keep things simple, regeneration will not happen automatically when the source of the module
// has changed. This can be added later
if (forceRegen
|| !moduleVersion.isPresent()
|| moduleVersion.get().cachedPdf().get() == null) {
pdf = buildDocumentPdf(document, locale, variantName, draft, context, true);
} else {
pdf = moduleVersion.get()
.cachedPdf().get()
.jcrContent().get()
.jcrData().toFieldType(InputStream.class).get();
}

return pdf;
}

/**
* Builds a context Map that is initially populated from request parameters which are prefixed with "ctx_".
*
Expand Down Expand Up @@ -343,6 +391,206 @@ private String buildDocument(@Nonnull Document base, @Nonnull Locale locale, @No
}
}

/**
* Builds a document PDF. This means generating the pdf file for the document at one of its revisions.
* @param base The base document which is being generated.
* The module will only be used as a base for resolving included resources and images.
* @param locale The locale to build
* @param variantName The variant name to generate. If unknown, provide {@link ModuleVariant#DEFAULT_VARIANT_NAME}.
* @param isDraft True if aiming to generate the draft version of the module. False, to generate the released version.
* @param context Any asciidoc attributes necessary to inject into the generation process
* @param regenMetadata If true, metadata will be extracted from the content and repopulated into the JCR module.
* @return The generated html string.
* @return An {@link InputStream} capable of producing the PDF contents.
* @throws IOException If there is a problem generating the PDF file
*/
public InputStream buildDocumentPdf(@Nonnull Document base, @Nonnull Locale locale, @Nonnull String variantName, boolean isDraft,
Map<String, Object> context, final boolean regenMetadata) throws IOException {

Optional<HashableFileResource> sourceFile =
Child.from(base)
.toChild(m -> m.locale(locale))
.toChild(DocumentLocale::source)
.toChild(sourceContent -> isDraft ? sourceContent.draft() : sourceContent.released())
.asOptional();

if (!sourceFile.isPresent()) {
throw new RuntimeException("Cannot find source content for module: " + base.getPath() + ", locale: " + locale
+ ",variant: " + variantName + ", draft: " + isDraft);
}

// Use a service-level resource resolver to build the module or assemblies as it will require write access to the resources
try (ResourceResolver serviceResourceResolver = serviceResourceResolverProvider.getServiceResourceResolver()) {

Class cls = base.getResourceType().equals(PantheonConstants.RESOURCE_TYPE_ASSEMBLY) ? Assembly.class : Module.class;
Document serviceDocument = (Document) SlingModels.getModel(serviceResourceResolver, base.getPath(), cls);

DocumentVariant documentVariant = serviceDocument.locale(locale).getOrCreate()
.variants().getOrCreate()
.variant(variantName).getOrCreate();

DocumentVersion documentVersion;
if (isDraft) {
documentVersion = documentVariant.draft().getOrCreate();
} else {
documentVersion = documentVariant.released().getOrCreate();
}

// process product and version.
Optional<ProductVersion> productVersion =
documentVersion.metadata()
.toReference(DocumentMetadata::productVersion)
.asOptional();

String productName = null;
if (productVersion.isPresent()) {
productName = productVersion.get().getProduct().name().get();
}

SimpleDateFormat dateFormat = new SimpleDateFormat("dd MMMMM yyyy");

String entitiesPath = base.getWorkspace().entities().get().getPath();
Optional<String> attributesFilePath =
base.getWorkspace().moduleVariantDefinitions()
.toChild(vdf -> vdf.variant(variantName))
.toField(ModuleVariantDefinition::attributesFilePath)
.asOptional();

// build the attributes (default + those coming from http parameters)
AttributesBuilder atts = AttributesBuilder.attributes()
// show the title on the generated html
.attribute("showtitle")
// show pantheonproduct on the generated html. Base the value from metadata.
.attribute("pantheonproduct", productName)
// show pantheonversion on the generated html. Base the value from metadata.
.attribute("pantheonversion", productVersion.isPresent() ? productVersion.get().name().get() : "")
// Shows custom rendering attribute to Haml
.attribute("pantheonenv", System.getenv("PANTHEON_ENV") != null ? System.getenv("PANTHEON_ENV") : "dev")
// Provide doctype for haml use
.attribute("pantheondoctype", Assembly.class.equals(cls) ? "assembly" : "module")
// we want to avoid the footer on the generated html
.noFooter(true)
// link the css instead of embedding it
.linkCss(true)
// only needed for PDF
.allowUriRead(true)
// only needed for PDF
// TODO If a url prefix is given here, asciidoctor pdf is able to resolve images
// from this base url. So, giving it a hardcoded pantheon base url would allow this
// pantheon instance to serve images to itself while generating PDFs
//.imagesDir("")
// stylesheet reference
.styleSheetName("/static/rhdocs.css");

if (attributesFilePath.isPresent()) {
// provide attribute file as argument to ASCIIDOCTOR for building doc.
if (PathUtils.isAbsolute(attributesFilePath.get())) {
// remove the starting slash
attributesFilePath = attributesFilePath.map(p -> p.substring(1));
}
atts.attribute("attsFile", PathUtils.concat(entitiesPath, attributesFilePath.get()));
}

Calendar updatedDate = documentVersion.metadata().get().datePublished().get();
if (updatedDate != null) {
// show pantheonupdateddate on generated html. Base the value from metadata.
atts.attribute("pantheonupdateddate", dateFormat.format(updatedDate.getTime()));

// This is for docs that were published before we changed the date logic, and therefore do not have
// "first published" metadata.
atts.attribute("pantheonpublisheddate", dateFormat.format(updatedDate.getTime()));
}

Calendar publishedDate = documentVersion.metadata().get().dateFirstPublished().get();
if (publishedDate != null) {
// show pantheonpublisheddate on generated html. Base the value from metadata.
atts.attribute("pantheonpublisheddate", dateFormat.format(publishedDate.getTime()));
}

// Add the context as attributes to the generation process
context.entrySet().stream().forEach(entry -> {
atts.attribute(entry.getKey(), entry.getValue());
});

// generate pdf
File outputFile = File.createTempFile("pantheon-pdf-", ".pdf");
OptionsBuilder ob = OptionsBuilder.options()
// we're generating html
.backend("pdf")
// no physical file is being generated
.toFile(outputFile)
// allow for some extra flexibility
.safe(SafeMode.UNSAFE) // This probably needs to change
.inPlace(false)
// Generate the html header and footer
.headerFooter(true)
// use the provided attributes
.attributes(atts);
globalConfig.getTemplateDirectory().ifPresent(ob::templateDir);

long start = System.currentTimeMillis();
Asciidoctor asciidoctor = asciidoctorPool.borrowObject();
InputStream pdfStream;
try {
TableOfContents tableOfContents = new TableOfContents();
PantheonXrefProcessor xrefProcessor = new PantheonXrefProcessor(documentVariant, tableOfContents
);
// extensions needed to generate a module's html
asciidoctor.javaExtensionRegistry().includeProcessor(
new SlingResourceIncludeProcessor(base, tableOfContents, xrefProcessor));

asciidoctor.javaExtensionRegistry().inlineMacro(MACRO_INCLUDE,
new PantheonLeveloffsetProcessor(tableOfContents));

asciidoctor.javaExtensionRegistry().inlineMacro(PantheonXrefProcessor.MACRO_PREFIX,
xrefProcessor);

asciidoctor.javaExtensionRegistry().inlineMacro(PantheonXrefTargetProcessor.MACRO_PREFIX,
new PantheonXrefTargetProcessor());

// asciidoctor.javaExtensionRegistry().postprocessor(
// new HtmlModulePostprocessor(base));

// add specific extensions for metadata regeneration
if (regenMetadata) {
asciidoctor.javaExtensionRegistry().treeprocessor(
new MetadataExtractorTreeProcessor(documentVersion.metadata().getOrCreate()));
}

StringBuilder content = new StringBuilder();
if (attributesFilePath.isPresent() && !isNullOrEmpty(attributesFilePath.get())) {
content.append("include::")
.append("{attsFile}")
.append("[]")
.append(System.lineSeparator());
}
String rawContent = sourceFile.get()
.jcrContent().get()
.jcrData().get();
content.append(xrefProcessor.preprocess(rawContent));

asciidoctor.convert(content.toString(), ob);
// if (documentVersion instanceof AssemblyVersion) {
// ((AssemblyVersion) documentVersion).consumeTableOfContents(tableOfContents);
// }
pdfStream = cachePdfContent(documentVersion, outputFile);

// ack_status
// TODO: re-evaluate where ack_status node should be created
documentVersion.ackStatus().getOrCreate();
} finally {
asciidoctorPool.returnObject(asciidoctor);
outputFile.delete();
}
log.info("Rendering finished in {} ms.", System.currentTimeMillis() - start);
serviceResourceResolver.commit();

return pdfStream;
} catch (PersistenceException pex) {
throw new RuntimeException(pex);
}
}

/**
* Stores (cache) the generated html content into the provided module for later retrieval. This method assumes
* that the generated html is a result of the transformation of the Module's asciidoc content; but it will not
Expand All @@ -357,4 +605,25 @@ private void cacheContent(final DocumentVersion version, final String html) {
cachedHtmlFile.jcrData().set(html);
cachedHtmlFile.mimeType().set("text/html");
}

/**
* Stores (cache) the generated pdf content into the provided document for later retrieval. This method assumes
* that the generated pdf is a result of the transformation of the Module's asciidoc content; but it will not
* check this assertion.
*
* @param version The specific document version for which to cache the html
* @param pdf The pdf file that was generated
* @return An {@link InputStream} with the contents of the cached PDF as it was stored.
*/
private InputStream cachePdfContent(final DocumentVersion version, final File pdf) {
FileResource.JcrContent cachedPdf = version.cachedPdf().getOrCreate()
.jcrContent().getOrCreate();
try( FileInputStream is = new FileInputStream(pdf) ) {
cachedPdf.jcrData().toFieldType(InputStream.class).set(is);
cachedPdf.mimeType().set("application/pdf");
return cachedPdf.jcrData().toFieldType(InputStream.class).get();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public interface DocumentVersion extends WorkspaceChild {
@Named("cached_html")
Child<FileResource> cachedHtml();

@Named("cached_pdf")
Child<FileResource> cachedPdf();

Child<? extends DocumentMetadata> metadata();

@Named("ack_status")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package com.redhat.pantheon.servlet.module;

import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
import com.redhat.pantheon.asciidoctor.AsciidoctorService;
import com.redhat.pantheon.conf.GlobalConfig;
import com.redhat.pantheon.model.document.Document;
import com.redhat.pantheon.model.document.DocumentVariant;
import com.redhat.pantheon.model.module.Module;
import org.apache.sling.api.SlingHttpServletRequest;
import org.apache.sling.api.SlingHttpServletResponse;
import org.apache.sling.api.servlets.SlingSafeMethodsServlet;
import org.apache.sling.servlets.annotations.SlingServletResourceTypes;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;

import javax.servlet.Servlet;
import javax.servlet.ServletException;
import java.io.IOException;
import java.io.InputStream;

/**
* Rendering servlet for PDFs
*/
@Component(
service = Servlet.class,
property = {
Constants.SERVICE_DESCRIPTION + "=Servlet which transforms asciidoc content into pdf",
Constants.SERVICE_VENDOR + "=Red Hat Content Tooling team"
})
@SlingServletResourceTypes(
resourceTypes = {"pantheon/module", "pantheon/assembly"},
methods = "GET",
extensions = "pdf")
public class PdfRenderer extends SlingSafeMethodsServlet {

@Reference
AsciidoctorService asciidoctorService;

@Override
protected void doGet(SlingHttpServletRequest request, SlingHttpServletResponse response) throws ServletException, IOException {

Document document = request.getResource().adaptTo(Document.class);

InputStream pdfFile =
asciidoctorService.buildDocumentPdf(document, GlobalConfig.DEFAULT_MODULE_LOCALE,
DocumentVariant.DEFAULT_VARIANT_NAME, true, Maps.newHashMap(), true);

response.setStatus(200);
response.setContentType("application/pdf");
ByteStreams.copy(pdfFile, response.getOutputStream());
}
}
Loading