Skip to content

Commit 8ba48fa

Browse files
committed
#69: add option to cache EAD HTML transformations to disk
1 parent 3b738dd commit 8ba48fa

File tree

6 files changed

+186
-4
lines changed

6 files changed

+186
-4
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ In `charlie`, we have three logical containers:
117117

118118
Do note that the code tries not to make any assumptions about the numbering of boxes or folders. Folders could either be numbered sequentially across boxes (in which case specifying a range of folders could make sense when specifying a range of boxes) or specific to a box. Additionally, pluralization of types is largely ignored.
119119

120+
### Q. What does the EAD caching feature do?
121+
122+
A. When enabled, the EAD HTML caching will perform the XSLT transform of the EAD to HTML and then cache the resulting HTML to a temporary file, for later reuse. This file will be written into Drupal's Temporary directory, and the last modification time will be used to gauge whether the cache expiration has been exceeded. If Islandora Solr Search is enabled, Drupal's cron wil pre-cache any EADs.
123+
120124
## Maintainers/Sponsors
121125
Current maintainers:
122126

includes/admin.form.inc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ function islandora_manuscript_admin_settings_form(array $form, array &$form_stat
5151
'#default_value' => variable_get('islandora_manuscript_metadata_display', FALSE),
5252
),
5353
),
54+
'islandora_manuscript_ead_caching' => array(
55+
'#type' => 'textfield',
56+
'#title' => t('Cache HTML rendering of EAD'),
57+
'#description' => t("The length of time to cache the HTML rendering of EAD, e.g. \"+90 minutes\"."),
58+
'#default_value' => variable_get('islandora_manuscript_ead_caching', ''),
59+
'#element_validate' => array('islandora_manuscript_validate_strtotime'),
60+
'#size' => 30,
61+
),
5462
);
5563

5664
// Solr field containing the parent book PID.
@@ -96,6 +104,20 @@ function islandora_manuscript_admin_settings_form(array $form, array &$form_stat
96104
return system_settings_form($form);
97105
}
98106

107+
/**
108+
* Check if the form element is parseable by strtotime
109+
*
110+
* @param array $element
111+
* The element to check.
112+
* @param array $form_state
113+
* The Drupal form state.
114+
*/
115+
function islandora_manuscript_validate_strtotime($element, &$form_state) {
116+
if (!empty($element['#value']) && (!is_numeric(strtotime($element['#value'])) || strtotime($element['#value']) < 0)) {
117+
form_error($element, t('The "!name" option must contain a valid value. You may either leave the text field empty or enter a string like "+30 minutes", "+4 hours", or "+1 week 2 days 4 hours 2 seconds".', array('!name' => t($element['#title']))));
118+
}
119+
}
120+
99121
/**
100122
* Check if the required resources are enabled.
101123
*

includes/ead_html.inc

Lines changed: 79 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ function islandora_manuscript_preprocess_ead_display_variables(&$variables) {
5252
* This function populates:
5353
* - processor: The XSLTProcessor instance which was used.
5454
* - markup_doc: A DOMDocument containing the markup to output, after
55-
* this function has run.
55+
* this function has run, iff not cached.
5656
*/
5757
function islandora_manuscript_process_ead_display_variables(&$variables) {
5858
$variables['processor'] = $proc = new XSLTProcessor();
@@ -61,8 +61,19 @@ function islandora_manuscript_process_ead_display_variables(&$variables) {
6161
$proc->setParameter($namespace_uri, $parameters);
6262
}
6363
$proc->registerPhpFunctions($variables['xslt_functions']);
64-
$variables['markup_doc'] = $proc->transformToDoc($variables['doc']);
65-
$variables['rendered_ead_html'] = $variables['markup_doc']->saveXML($variables['markup_doc']->documentElement);
64+
$variables['rendered_ead_html'] = islandora_manuscript_getcache_eadhtml($variables['object']->id);
65+
if (!$variables['rendered_ead_html']) {
66+
// TODO: handle this failure more elegantly
67+
// Pending that, write a default template out to the cache so that when we die unexpectedly in $proc->transformToDoc($variables['doc'])
68+
// we don't pick up and retry this same object again immediately
69+
$failureContent = theme('islandora_manuscript_ead_display_failure', $variables);
70+
islandora_manuscript_setcache_eadhtml($variables['object']->id, $failureContent);
71+
$variables['markup_doc'] = $proc->transformToDoc($variables['doc']);
72+
$variables['rendered_ead_html'] = $variables['markup_doc']->saveXML($variables['markup_doc']->documentElement);
73+
islandora_manuscript_setcache_eadhtml($variables['object']->id, $variables['rendered_ead_html']);
74+
} else {
75+
$variables['markup_doc'] = null;
76+
}
6677
}
6778

6879

@@ -287,3 +298,68 @@ function islandora_manuscript_build_flat_subfile_query(array $containers) {
287298
// Merge down to single array.
288299
return empty($parts) ? array() : call_user_func_array('array_merge', $parts);
289300
}
301+
302+
/**
303+
* Return the path to the cache of EAD HTML files
304+
* @return string The path to the files
305+
*/
306+
function islandora_manuscript_cache_eadhtml_path() {
307+
return file_directory_temp() . DIRECTORY_SEPARATOR . 'islandora_manuscript_eadhtml' . DIRECTORY_SEPARATOR;
308+
}
309+
310+
/**
311+
* Return the suffix to the cached EAD HTML files
312+
* @return string The suffix to the files
313+
*/
314+
function islandora_manuscript_cache_eadhtml_suffix() {
315+
return '.cache';
316+
}
317+
318+
319+
/**
320+
* Get a rendered EAD to HTML transform, indexed by object id
321+
*
322+
* @param string $objectid The object's PID
323+
* @return string|boolean The HTML result of the transformation, or false if none
324+
*/
325+
function islandora_manuscript_getcache_eadhtml($objectid) {
326+
if (!variable_get('islandora_manuscript_ead_caching', false)) {
327+
return false;
328+
}
329+
$cached_output = false;
330+
$cachepath = islandora_manuscript_cache_eadhtml_path();
331+
$cachekey = md5($objectid).islandora_manuscript_cache_eadhtml_suffix();
332+
if (file_exists($cachepath.$cachekey)) {
333+
$updated = filemtime($cachepath.$cachekey);
334+
if ($updated + strtotime(variable_get('islandora_manuscript_ead_caching', '')) - time() > time()) {
335+
$cache = file_get_contents($cachepath.$cachekey);
336+
if (isset($cache)) {
337+
$cached_output = $cache;
338+
}
339+
}
340+
}
341+
return $cached_output;
342+
}
343+
344+
/**
345+
* Cache a rendered EAD to HTML transform, indexed by object id
346+
* We are caching into the temp directory instead of using Drupal's
347+
* native cache function because the HTML will probably exceed size limits
348+
* for the native cache configuration.
349+
*
350+
* @param string $objectid The object's PID
351+
* @param string $html The HTML result of the XSLT transform
352+
* @return boolean Success
353+
*/
354+
function islandora_manuscript_setcache_eadhtml($objectid, $html) {
355+
if (!variable_get('islandora_manuscript_ead_caching', false)) {
356+
return false;
357+
}
358+
$cachepath = islandora_manuscript_cache_eadhtml_path();
359+
if (!file_exists($cachepath)) {
360+
mkdir($cachepath);
361+
}
362+
$cachekey = md5($objectid).islandora_manuscript_cache_eadhtml_suffix();
363+
return (boolean) file_put_contents($cachepath.$cachekey, $html);
364+
}
365+

islandora_manuscript.module

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,14 @@ function islandora_manuscript_theme() {
205205
'file' => 'theme/theme.inc',
206206
'template' => 'theme/islandora-manuscript-ead-display',
207207
),
208+
'islandora_manuscript_ead_display_failure' => array(
209+
'variables' => array(
210+
'object' => NULL,
211+
),
212+
'file' => 'theme/theme.inc',
213+
'template' => 'theme/islandora-manuscript-ead-display-failure',
214+
),
215+
208216
);
209217
}
210218

@@ -644,3 +652,64 @@ EOQ;
644652
),
645653
);
646654
}
655+
656+
/**
657+
* Implements hook_cron().
658+
*/
659+
function islandora_manuscript_cron() {
660+
if (variable_get('islandora_manuscript_ead_caching', false)) {
661+
module_load_include('inc', 'islandora_manuscript', 'includes/ead_html');
662+
$cachepath = islandora_manuscript_cache_eadhtml_path();
663+
$files = glob($cachepath.'*'.islandora_manuscript_cache_eadhtml_suffix());
664+
foreach ($files as $file) {
665+
$updated = filemtime($file);
666+
if ($updated + strtotime(variable_get('islandora_manuscript_ead_caching', '')) - time() < time()) {
667+
unlink($file);
668+
}
669+
}
670+
if (module_exists('islandora_solr')) {
671+
module_load_include('inc', 'islandora_solr', 'includes/utilities');
672+
// Find any findingAidCModel objects with an EAD datastream
673+
$qp = new IslandoraSolrQueryProcessor();
674+
$qp->buildQuery('fedora_datastreams_ms:EAD');
675+
$qp->solrParams['fq'][] = format_string('!field:("info:fedora/!pid" OR "!pid")', array(
676+
'!field' => islandora_solr_lesser_escape(variable_get('islandora_solr_content_model_field', 'RELS_EXT_hasModel_uri_ms')),
677+
'!pid' => 'islandora:findingAidCModel',
678+
));
679+
$qp->solrParams['fl'] = implode(',', array(
680+
'PID',
681+
));
682+
$qp->executeQuery();
683+
// queue this object, if a cachefile does not already exist
684+
$toCache = array();
685+
$pageIndex = 0;
686+
$numFound = $qp->islandoraSolrResult['response']['numFound'];
687+
while ($pageIndex < $numFound - 1) {
688+
foreach ($qp->islandoraSolrResult['response']['objects'] as $result) {
689+
$pageIndex++;
690+
if (!islandora_manuscript_getcache_eadhtml($result['PID'])) {
691+
$toCache[$result['PID']] = $result['PID'];
692+
}
693+
}
694+
if ($pageIndex < $numFound - 1) {
695+
$qp->solrStart = $pageIndex;
696+
$qp->solrLimit = $numFound;
697+
$qp->executeQuery();
698+
$numFound = $qp->islandoraSolrResult['response']['numFound'];
699+
}
700+
}
701+
unset($qp);
702+
// cache each queued object
703+
foreach ($toCache as $pid) {
704+
705+
$themevars = islandora_manuscript_theme();
706+
$vars = $themevars['islandora_manuscript_ead_display']['variables'];
707+
$vars['object'] = islandora_object_load($pid);
708+
islandora_manuscript_preprocess_ead_display_variables($vars);
709+
islandora_manuscript_process_ead_display_variables($vars);
710+
islandora_manuscript_setcache_eadhtml($pid, $vars['rendered_ead_html']);
711+
unset($vars);
712+
}
713+
}
714+
}
715+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
/**
3+
* This file will be temporarily used as the cache of EAD to HTML transformation
4+
* and will be overwritten when the transform succeeds. The presence of this content
5+
* in the cache directory means that the transform failed with a PHP Fatal error,
6+
* such as memory limits, time limits, etc.
7+
*
8+
* If you want to render a error message to the user, do so here.
9+
*/
10+
?>
11+
<!-- out of memory error: <?php echo $object->id; ?> -->

theme/islandora-manuscript-ead-display.tpl.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* - $xslt_doc: A DOMDocument containing the parsed XSLT to run.
1515
* - $processor: The XSLTProcessor instance which was used.
1616
* - $markup_doc: A DOMDocument containing the markup to output, after
17-
* this function has run.
17+
* this function has run. If the cache was used, this will be null!
1818
* - $rendered_ead_html: The rendered HTML from the $markup_doc transform
1919
*/
2020
?>

0 commit comments

Comments
 (0)