|
| 1 | +<?php |
| 2 | + |
| 3 | +/** |
| 4 | + * @file |
| 5 | + * EAD HTML transformation functions |
| 6 | + */ |
| 7 | + |
| 8 | +define('ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI', 'http://islandora.ca/manuscript-container'); |
| 9 | + |
| 10 | +/** |
| 11 | + * Prepares variables for islandora_manuscript_ead_display templates. |
| 12 | + * |
| 13 | + * @param array $variables |
| 14 | + * An associative array containing: |
| 15 | + * - object: An AbstractObject containing an "EAD" datastream. |
| 16 | + * - xslt_functions: An array of functions to allow the XSLT to run, as |
| 17 | + * accepted by XSLTProcessor::registerPhpFunctions(). |
| 18 | + * - xslt_parameters: An associative array mapping namespace URIs to |
| 19 | + * associative arrays of parameters proper. |
| 20 | + * This function populates: |
| 21 | + * - doc: A DOMDocument containing the parsed EAD datastream. |
| 22 | + * - xslt_doc: A DOMDocument containing the parsed XSLT to run. |
| 23 | + */ |
| 24 | +function islandora_manuscript_preprocess_ead_display_variables(&$variables) { |
| 25 | + $variables['xslt_parameters'][''] = (isset($variables['xslt_parameters']['']) ? $variables['xslt_parameters'][''] : array()) + array( |
| 26 | + "container_string" => t('Containers'), |
| 27 | + ); |
| 28 | + $variables['doc'] = $doc = new DOMDocument(); |
| 29 | + $doc->loadXML($variables['object']['EAD']->content); |
| 30 | + |
| 31 | + // XXX: Need to tag containers in order to work-around a PHP bug. See |
| 32 | + // islandora_manuscript_lookup_tag() for more details on the bug. |
| 33 | + // This _could_ be wrapped in version checks, so we only tag when necessary. |
| 34 | + islandora_manuscript_tag_containers($doc); |
| 35 | + |
| 36 | + $variables['xslt_doc'] = $xslt_doc = new DOMDocument(); |
| 37 | + $xslt_doc->load(drupal_get_path('module', 'islandora_manuscript') . '/transforms/ead_to_html.xslt'); |
| 38 | +} |
| 39 | + |
| 40 | +/** |
| 41 | + * Process variables for islandora_manuscript_ead_display templates. |
| 42 | + * |
| 43 | + * @param array $variables |
| 44 | + * An associative array containing: |
| 45 | + * - object: An AbstractObject containing an "EAD" datastream. |
| 46 | + * - xslt_functions: An array of functions to allow the XSLT to run, as |
| 47 | + * accepted by XSLTProcessor::registerPhpFunctions(). |
| 48 | + * - xslt_parameters: An associative array mapping namespace URIs to |
| 49 | + * associative arrays of parameters proper. |
| 50 | + * - doc: A DOMDocument containing the parsed EAD datastream. |
| 51 | + * - xslt_doc: A DOMDocument containing the parsed XSLT to run. |
| 52 | + * This function populates: |
| 53 | + * - processor: The XSLTProcessor instance which was used. |
| 54 | + * - markup_doc: A DOMDocument containing the markup to output, after |
| 55 | + * this function has run. |
| 56 | + */ |
| 57 | +function islandora_manuscript_process_ead_display_variables(&$variables) { |
| 58 | + $variables['processor'] = $proc = new XSLTProcessor(); |
| 59 | + $proc->importStylesheet($variables['xslt_doc']); |
| 60 | + foreach ($variables['xslt_parameters'] as $namespace_uri => $parameters) { |
| 61 | + $proc->setParameter($namespace_uri, $parameters); |
| 62 | + } |
| 63 | + $proc->registerPhpFunctions($variables['xslt_functions']); |
| 64 | + $variables['markup_doc'] = $proc->transformToDoc($variables['doc']); |
| 65 | + $variables['rendered_ead_html'] = $variables['markup_doc']->saveXML($variables['markup_doc']->documentElement); |
| 66 | +} |
| 67 | + |
| 68 | + |
| 69 | +/** |
| 70 | + * Tag containers with a unique ID. |
| 71 | + * |
| 72 | + * Part of a work around for a PHP bug in which nodesets passed out of XSLTs |
| 73 | + * are copied. |
| 74 | + * |
| 75 | + * @param DOMDocument $doc |
| 76 | + * A DOMDocument containing a parsed EAD document, in which we will tag all |
| 77 | + * containers with a document-unique attribute. |
| 78 | + */ |
| 79 | +function islandora_manuscript_tag_containers(DOMDocument $doc) { |
| 80 | + $xpath = new DOMXPath($doc); |
| 81 | + $xpath->registerNamespace('ead', 'urn:isbn:1-931666-22-9'); |
| 82 | + foreach ($xpath->query('//ead:container') as $index => $container) { |
| 83 | + $container->setAttributeNS(ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI, 'container-tag:id', "islandora-manuscript-container-tag:$index"); |
| 84 | + } |
| 85 | +} |
| 86 | + |
| 87 | +/** |
| 88 | + * Callback used in XSLT to build a query URL. |
| 89 | + * |
| 90 | + * @param DOMElement[] $container_array |
| 91 | + * An array containing a single DOMElement (this is how XSLTProcessor |
| 92 | + * provides it) representing a "container" inside of an EAD document. |
| 93 | + * @param DOMElement[] $all |
| 94 | + * An array containing all container elements in the given EAD document. |
| 95 | + * |
| 96 | + * @return string |
| 97 | + * A string containing a URL to Solr search results for the given container. |
| 98 | + */ |
| 99 | +function islandora_manuscript_build_parented_query_url(array $container_array, array $all) { |
| 100 | + $object = menu_get_object('islandora_object', 2); |
| 101 | + if ($object) { |
| 102 | + $path = "islandora/object/{$object->id}/manuscript/manuscripts"; |
| 103 | + list($container) = $container_array; |
| 104 | + return url($path, array( |
| 105 | + 'query' => array( |
| 106 | + 'f' => islandora_manuscript_build_subfile_query($container, $all), |
| 107 | + ), |
| 108 | + )); |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +/** |
| 113 | + * Build a query to filter to the given container. |
| 114 | + * |
| 115 | + * @param DOMElement $container |
| 116 | + * A DOMElement representing the container. |
| 117 | + * |
| 118 | + * @return string[] |
| 119 | + * An array of strings representing Lucene queries... Likely to be used as |
| 120 | + * filter queries. |
| 121 | + */ |
| 122 | +function islandora_manuscript_build_partial_query(DOMElement $container) { |
| 123 | + $subqueries = array(); |
| 124 | + $field = islandora_manuscript_findingaid_get_solr_field($container->getAttribute('type')); |
| 125 | + |
| 126 | + $value = trim($container->nodeValue); |
| 127 | + if ($value != '') { |
| 128 | + $subqueries[] = format_string('!field:"!value"', array( |
| 129 | + '!field' => $field, |
| 130 | + '!value' => $value, |
| 131 | + )); |
| 132 | + } |
| 133 | + |
| 134 | + return $subqueries; |
| 135 | +} |
| 136 | + |
| 137 | +/** |
| 138 | + * Build a query to select all items in a given part of a file. |
| 139 | + * |
| 140 | + * @param DOMElement $container |
| 141 | + * An EAD container element for which to build a (part of a) query. |
| 142 | + * @param DOMElement[] $all |
| 143 | + * An array of all container elements in the EAD doc... 'Cause the "parent" |
| 144 | + * attribute can reference any container element. |
| 145 | + * |
| 146 | + * @return string[] |
| 147 | + * An array of Lucene-syntax Solr queries. |
| 148 | + */ |
| 149 | +function islandora_manuscript_build_subfile_query(DOMElement $container, array $all = array()) { |
| 150 | + $subqueries = islandora_manuscript_build_partial_query($container); |
| 151 | + |
| 152 | + if ($container->hasAttribute('parent')) { |
| 153 | + foreach ($all as $element) { |
| 154 | + if ($element->getAttribute('id') == $container->getAttribute('parent')) { |
| 155 | + $subqueries = array_merge(islandora_manuscript_build_subfile_query($element, $all), $subqueries); |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + $component = islandora_manuscript_get_container_component($container); |
| 161 | + if ($component && $component->hasAttribute('id')) { |
| 162 | + $subqueries[] = format_string('!field:"!value"', array( |
| 163 | + '!field' => variable_get('islandora_manuscript_component_identifier_solr_field', 'dereffed_ead_component_id_ms'), |
| 164 | + '!value' => $component->getAttribute('id'), |
| 165 | + )); |
| 166 | + } |
| 167 | + |
| 168 | + return $subqueries; |
| 169 | +} |
| 170 | + |
| 171 | +/** |
| 172 | + * Get the component to which the given container belongs. |
| 173 | + * |
| 174 | + * @param DOMElement $container |
| 175 | + * A container element. |
| 176 | + * |
| 177 | + * @return DOMElement|bool |
| 178 | + * The parent component if we could find it; otherwise, FALSE. |
| 179 | + */ |
| 180 | +function islandora_manuscript_get_container_component(DOMElement $container) { |
| 181 | + $concrete_container = isset($container->parentNode) ? |
| 182 | + $container : |
| 183 | + islandora_manuscript_lookup_tag($container); |
| 184 | + |
| 185 | + return $concrete_container ? |
| 186 | + $concrete_container->parentNode->parentNode : |
| 187 | + FALSE; |
| 188 | +} |
| 189 | + |
| 190 | +/** |
| 191 | + * Use our "tag" ID to look up the concrete container. |
| 192 | + * |
| 193 | + * Certain versions of PHP provide element copies lacking references to parent |
| 194 | + * elements. To work around this, we may have "tagged" each container with a |
| 195 | + * attribute, which we can use to get back to the "real" element from which it |
| 196 | + * was copied. |
| 197 | + * |
| 198 | + * @param DOMElement $container |
| 199 | + * A container element to lookup. |
| 200 | + * |
| 201 | + * @return DOMElement|bool |
| 202 | + * The container if we could find it; otherwise, FALSE. |
| 203 | + * |
| 204 | + * @see https://github.com/php/php-src/commit/6408a1a59e6d371cd488687e28e18815ea97984e#diff-258cc1cabc37df15d7f0ed40924f64efR283 |
| 205 | + */ |
| 206 | +function islandora_manuscript_lookup_tag(DOMElement $container) { |
| 207 | + $tag = $container->getAttributeNS(ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI, 'id'); |
| 208 | + $xpath = new DOMXPath($container->ownerDocument); |
| 209 | + $xpath->registerNamespace('ead', 'urn:isbn:1-931666-22-9'); |
| 210 | + $xpath->registerNamespace('container-tag', ISLANDORA_MANUSCRIPT_CONTAINER_TAG_URI); |
| 211 | + $results = $xpath->query("//ead:container[@container-tag:id='$tag']"); |
| 212 | + |
| 213 | + return $results->length > 0 ? |
| 214 | + $results->item(0) : |
| 215 | + FALSE; |
| 216 | +} |
| 217 | + |
| 218 | +/** |
| 219 | + * Get the field for the given "type" of container. |
| 220 | + * |
| 221 | + * @param string $raw_type |
| 222 | + * The raw type attribute value from the XML. A number of different formats |
| 223 | + * have been seen in the wild, with boxes, for example: |
| 224 | + * - Box |
| 225 | + * - Boxes |
| 226 | + * - box |
| 227 | + * - boxes |
| 228 | + * As a naive implementation, we lowercase and then compare at the beginning |
| 229 | + * of the string for one of our recognized types, currently, just: |
| 230 | + * - box |
| 231 | + * - folder |
| 232 | + * |
| 233 | + * @return string|bool |
| 234 | + * A string containing the name of a Solr field with which a query might be |
| 235 | + * built, or FALSE if we do not have a mapping. |
| 236 | + */ |
| 237 | +function islandora_manuscript_findingaid_get_solr_field($raw_type) { |
| 238 | + $type = strtolower($raw_type); |
| 239 | + if (strpos($type, 'box') === 0) { |
| 240 | + return variable_get('islandora_manuscript_box_identifier_solr_field', 'mods_relatedItem_host_part_detail_box_number_ms'); |
| 241 | + } |
| 242 | + elseif (strpos($type, 'folder') === 0) { |
| 243 | + return variable_get('islandora_manuscript_folder_identifier_solr_field', 'mods_relatedItem_host_part_detail_folder_number_ms'); |
| 244 | + } |
| 245 | + else { |
| 246 | + watchdog('islandora_manuscript', 'Unrecognized type @type.', array('@type' => $raw_type)); |
| 247 | + return FALSE; |
| 248 | + } |
| 249 | +} |
| 250 | + |
| 251 | +/** |
| 252 | + * Callback used in XSLT to build a query URL. |
| 253 | + * |
| 254 | + * @param DOMElement[] $containers |
| 255 | + * An array containing a single DOMElement (this is how XSLTProcessor |
| 256 | + * provides it) representing a "container" inside of an EAD document. |
| 257 | + * |
| 258 | + * @return string |
| 259 | + * A string containing a URL to Solr search results for the given container. |
| 260 | + */ |
| 261 | +function islandora_manuscript_build_flat_query_url(array $containers) { |
| 262 | + $object = menu_get_object('islandora_object', 2); |
| 263 | + $parts = islandora_manuscript_build_flat_subfile_query($containers); |
| 264 | + if ($object && !empty($parts)) { |
| 265 | + $path = "islandora/object/{$object->id}/manuscript/manuscripts"; |
| 266 | + return url($path, array( |
| 267 | + 'query' => array( |
| 268 | + 'f' => $parts, |
| 269 | + ), |
| 270 | + )); |
| 271 | + } |
| 272 | +} |
| 273 | + |
| 274 | +/** |
| 275 | + * Helper function to wrap the map and merge. |
| 276 | + * |
| 277 | + * @param DOMElement[] $containers |
| 278 | + * An array of containers at the same component level in the EAD. |
| 279 | + * |
| 280 | + * @return string[] |
| 281 | + * An array of strings representing Lucene queries. |
| 282 | + */ |
| 283 | +function islandora_manuscript_build_flat_subfile_query(array $containers) { |
| 284 | + // Array of array of Lucence queries... |
| 285 | + $parts = array_map('islandora_manuscript_build_partial_query', $containers); |
| 286 | + |
| 287 | + // Merge down to single array. |
| 288 | + return empty($parts) ? array() : call_user_func_array('array_merge', $parts); |
| 289 | +} |
0 commit comments