Skip to content

Commit 3b738dd

Browse files
committed
#78: Move EAD HTML generation to a dedicated include
1 parent c0fee29 commit 3b738dd

File tree

3 files changed

+299
-266
lines changed

3 files changed

+299
-266
lines changed

includes/ead_html.inc

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
<?php
2+
3+
/**
4+
* @file
5+
* EAD HTML transformation functions
6+
*/
7+
8+
define('ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI', 'http://islandora.ca/manuscript-container');
9+
10+
/**
11+
* Prepares variables for islandora_manuscript_ead_display templates.
12+
*
13+
* @param array $variables
14+
* An associative array containing:
15+
* - object: An AbstractObject containing an "EAD" datastream.
16+
* - xslt_functions: An array of functions to allow the XSLT to run, as
17+
* accepted by XSLTProcessor::registerPhpFunctions().
18+
* - xslt_parameters: An associative array mapping namespace URIs to
19+
* associative arrays of parameters proper.
20+
* This function populates:
21+
* - doc: A DOMDocument containing the parsed EAD datastream.
22+
* - xslt_doc: A DOMDocument containing the parsed XSLT to run.
23+
*/
24+
function islandora_manuscript_preprocess_ead_display_variables(&$variables) {
25+
$variables['xslt_parameters'][''] = (isset($variables['xslt_parameters']['']) ? $variables['xslt_parameters'][''] : array()) + array(
26+
"container_string" => t('Containers'),
27+
);
28+
$variables['doc'] = $doc = new DOMDocument();
29+
$doc->loadXML($variables['object']['EAD']->content);
30+
31+
// XXX: Need to tag containers in order to work-around a PHP bug. See
32+
// islandora_manuscript_lookup_tag() for more details on the bug.
33+
// This _could_ be wrapped in version checks, so we only tag when necessary.
34+
islandora_manuscript_tag_containers($doc);
35+
36+
$variables['xslt_doc'] = $xslt_doc = new DOMDocument();
37+
$xslt_doc->load(drupal_get_path('module', 'islandora_manuscript') . '/transforms/ead_to_html.xslt');
38+
}
39+
40+
/**
41+
* Process variables for islandora_manuscript_ead_display templates.
42+
*
43+
* @param array $variables
44+
* An associative array containing:
45+
* - object: An AbstractObject containing an "EAD" datastream.
46+
* - xslt_functions: An array of functions to allow the XSLT to run, as
47+
* accepted by XSLTProcessor::registerPhpFunctions().
48+
* - xslt_parameters: An associative array mapping namespace URIs to
49+
* associative arrays of parameters proper.
50+
* - doc: A DOMDocument containing the parsed EAD datastream.
51+
* - xslt_doc: A DOMDocument containing the parsed XSLT to run.
52+
* This function populates:
53+
* - processor: The XSLTProcessor instance which was used.
54+
* - markup_doc: A DOMDocument containing the markup to output, after
55+
* this function has run.
56+
*/
57+
function islandora_manuscript_process_ead_display_variables(&$variables) {
58+
$variables['processor'] = $proc = new XSLTProcessor();
59+
$proc->importStylesheet($variables['xslt_doc']);
60+
foreach ($variables['xslt_parameters'] as $namespace_uri => $parameters) {
61+
$proc->setParameter($namespace_uri, $parameters);
62+
}
63+
$proc->registerPhpFunctions($variables['xslt_functions']);
64+
$variables['markup_doc'] = $proc->transformToDoc($variables['doc']);
65+
$variables['rendered_ead_html'] = $variables['markup_doc']->saveXML($variables['markup_doc']->documentElement);
66+
}
67+
68+
69+
/**
70+
* Tag containers with a unique ID.
71+
*
72+
* Part of a work around for a PHP bug in which nodesets passed out of XSLTs
73+
* are copied.
74+
*
75+
* @param DOMDocument $doc
76+
* A DOMDocument containing a parsed EAD document, in which we will tag all
77+
* containers with a document-unique attribute.
78+
*/
79+
function islandora_manuscript_tag_containers(DOMDocument $doc) {
80+
$xpath = new DOMXPath($doc);
81+
$xpath->registerNamespace('ead', 'urn:isbn:1-931666-22-9');
82+
foreach ($xpath->query('//ead:container') as $index => $container) {
83+
$container->setAttributeNS(ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI, 'container-tag:id', "islandora-manuscript-container-tag:$index");
84+
}
85+
}
86+
87+
/**
88+
* Callback used in XSLT to build a query URL.
89+
*
90+
* @param DOMElement[] $container_array
91+
* An array containing a single DOMElement (this is how XSLTProcessor
92+
* provides it) representing a "container" inside of an EAD document.
93+
* @param DOMElement[] $all
94+
* An array containing all container elements in the given EAD document.
95+
*
96+
* @return string
97+
* A string containing a URL to Solr search results for the given container.
98+
*/
99+
function islandora_manuscript_build_parented_query_url(array $container_array, array $all) {
100+
$object = menu_get_object('islandora_object', 2);
101+
if ($object) {
102+
$path = "islandora/object/{$object->id}/manuscript/manuscripts";
103+
list($container) = $container_array;
104+
return url($path, array(
105+
'query' => array(
106+
'f' => islandora_manuscript_build_subfile_query($container, $all),
107+
),
108+
));
109+
}
110+
}
111+
112+
/**
113+
* Build a query to filter to the given container.
114+
*
115+
* @param DOMElement $container
116+
* A DOMElement representing the container.
117+
*
118+
* @return string[]
119+
* An array of strings representing Lucene queries... Likely to be used as
120+
* filter queries.
121+
*/
122+
function islandora_manuscript_build_partial_query(DOMElement $container) {
123+
$subqueries = array();
124+
$field = islandora_manuscript_findingaid_get_solr_field($container->getAttribute('type'));
125+
126+
$value = trim($container->nodeValue);
127+
if ($value != '') {
128+
$subqueries[] = format_string('!field:"!value"', array(
129+
'!field' => $field,
130+
'!value' => $value,
131+
));
132+
}
133+
134+
return $subqueries;
135+
}
136+
137+
/**
138+
* Build a query to select all items in a given part of a file.
139+
*
140+
* @param DOMElement $container
141+
* An EAD container element for which to build a (part of a) query.
142+
* @param DOMElement[] $all
143+
* An array of all container elements in the EAD doc... 'Cause the "parent"
144+
* attribute can reference any container element.
145+
*
146+
* @return string[]
147+
* An array of Lucene-syntax Solr queries.
148+
*/
149+
function islandora_manuscript_build_subfile_query(DOMElement $container, array $all = array()) {
150+
$subqueries = islandora_manuscript_build_partial_query($container);
151+
152+
if ($container->hasAttribute('parent')) {
153+
foreach ($all as $element) {
154+
if ($element->getAttribute('id') == $container->getAttribute('parent')) {
155+
$subqueries = array_merge(islandora_manuscript_build_subfile_query($element, $all), $subqueries);
156+
}
157+
}
158+
}
159+
160+
$component = islandora_manuscript_get_container_component($container);
161+
if ($component && $component->hasAttribute('id')) {
162+
$subqueries[] = format_string('!field:"!value"', array(
163+
'!field' => variable_get('islandora_manuscript_component_identifier_solr_field', 'dereffed_ead_component_id_ms'),
164+
'!value' => $component->getAttribute('id'),
165+
));
166+
}
167+
168+
return $subqueries;
169+
}
170+
171+
/**
172+
* Get the component to which the given container belongs.
173+
*
174+
* @param DOMElement $container
175+
* A container element.
176+
*
177+
* @return DOMElement|bool
178+
* The parent component if we could find it; otherwise, FALSE.
179+
*/
180+
function islandora_manuscript_get_container_component(DOMElement $container) {
181+
$concrete_container = isset($container->parentNode) ?
182+
$container :
183+
islandora_manuscript_lookup_tag($container);
184+
185+
return $concrete_container ?
186+
$concrete_container->parentNode->parentNode :
187+
FALSE;
188+
}
189+
190+
/**
191+
* Use our "tag" ID to look up the concrete container.
192+
*
193+
* Certain versions of PHP provide element copies lacking references to parent
194+
* elements. To work around this, we may have "tagged" each container with a
195+
* attribute, which we can use to get back to the "real" element from which it
196+
* was copied.
197+
*
198+
* @param DOMElement $container
199+
* A container element to lookup.
200+
*
201+
* @return DOMElement|bool
202+
* The container if we could find it; otherwise, FALSE.
203+
*
204+
* @see https://github.com/php/php-src/commit/6408a1a59e6d371cd488687e28e18815ea97984e#diff-258cc1cabc37df15d7f0ed40924f64efR283
205+
*/
206+
function islandora_manuscript_lookup_tag(DOMElement $container) {
207+
$tag = $container->getAttributeNS(ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI, 'id');
208+
$xpath = new DOMXPath($container->ownerDocument);
209+
$xpath->registerNamespace('ead', 'urn:isbn:1-931666-22-9');
210+
$xpath->registerNamespace('container-tag', ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI);
211+
$results = $xpath->query("//ead:container[@container-tag:id='$tag']");
212+
213+
return $results->length > 0 ?
214+
$results->item(0) :
215+
FALSE;
216+
}
217+
218+
/**
219+
* Get the field for the given "type" of container.
220+
*
221+
* @param string $raw_type
222+
* The raw type attribute value from the XML. A number of different formats
223+
* have been seen in the wild, with boxes, for example:
224+
* - Box
225+
* - Boxes
226+
* - box
227+
* - boxes
228+
* As a naive implementation, we lowercase and then compare at the beginning
229+
* of the string for one of our recognized types, currently, just:
230+
* - box
231+
* - folder
232+
*
233+
* @return string|bool
234+
* A string containing the name of a Solr field with which a query might be
235+
* built, or FALSE if we do not have a mapping.
236+
*/
237+
function islandora_manuscript_findingaid_get_solr_field($raw_type) {
238+
$type = strtolower($raw_type);
239+
if (strpos($type, 'box') === 0) {
240+
return variable_get('islandora_manuscript_box_identifier_solr_field', 'mods_relatedItem_host_part_detail_box_number_ms');
241+
}
242+
elseif (strpos($type, 'folder') === 0) {
243+
return variable_get('islandora_manuscript_folder_identifier_solr_field', 'mods_relatedItem_host_part_detail_folder_number_ms');
244+
}
245+
else {
246+
watchdog('islandora_manuscript', 'Unrecognized type @type.', array('@type' => $raw_type));
247+
return FALSE;
248+
}
249+
}
250+
251+
/**
252+
* Callback used in XSLT to build a query URL.
253+
*
254+
* @param DOMElement[] $containers
255+
* An array containing a single DOMElement (this is how XSLTProcessor
256+
* provides it) representing a "container" inside of an EAD document.
257+
*
258+
* @return string
259+
* A string containing a URL to Solr search results for the given container.
260+
*/
261+
function islandora_manuscript_build_flat_query_url(array $containers) {
262+
$object = menu_get_object('islandora_object', 2);
263+
$parts = islandora_manuscript_build_flat_subfile_query($containers);
264+
if ($object && !empty($parts)) {
265+
$path = "islandora/object/{$object->id}/manuscript/manuscripts";
266+
return url($path, array(
267+
'query' => array(
268+
'f' => $parts,
269+
),
270+
));
271+
}
272+
}
273+
274+
/**
275+
* Helper function to wrap the map and merge.
276+
*
277+
* @param DOMElement[] $containers
278+
* An array of containers at the same component level in the EAD.
279+
*
280+
* @return string[]
281+
* An array of strings representing Lucene queries.
282+
*/
283+
function islandora_manuscript_build_flat_subfile_query(array $containers) {
284+
// Array of array of Lucence queries...
285+
$parts = array_map('islandora_manuscript_build_partial_query', $containers);
286+
287+
// Merge down to single array.
288+
return empty($parts) ? array() : call_user_func_array('array_merge', $parts);
289+
}

theme/islandora-manuscript-ead-display.tpl.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,17 @@
77
* - $attributes: Provided by template_process().
88
* - $object: An AbstractObject containing an "EAD" datastream.
99
* - $xslt_functions: An array of functions to allow the XSLT to run, as
10-
* $accepted by XSLTProcessor::registerPhpFunctions().
10+
* accepted by XSLTProcessor::registerPhpFunctions().
1111
* - $xslt_parameters: An associative array mapping namespace URIs to
12-
* $associative arrays of parameters proper.
12+
* associative arrays of parameters proper.
1313
* - $doc: A DOMDocument containing the parsed EAD datastream.
1414
* - $xslt_doc: A DOMDocument containing the parsed XSLT to run.
1515
* - $processor: The XSLTProcessor instance which was used.
1616
* - $markup_doc: A DOMDocument containing the markup to output, after
1717
* this function has run.
18+
* - $rendered_ead_html: The rendered HTML from the $markup_doc transform
1819
*/
1920
?>
2021
<div <?php echo $attributes; ?> <?php echo drupal_attributes(array('class' => $classes)); ?>>
21-
<?php echo $markup_doc->saveXML($markup_doc->documentElement); ?>
22+
<?php echo $rendered_ead_html; ?>
2223
</div>

0 commit comments

Comments
 (0)