|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +module EventIndexHandler |
| 4 | + include RelationTypes |
| 5 | + extend ActiveSupport::Concern |
| 6 | + |
| 7 | + # Used to prepare the event record for indexing |
| 8 | + def as_indexed_json(_options = {}) |
| 9 | + { |
| 10 | + "uuid" => uuid, |
| 11 | + "subj_id" => subj_id, |
| 12 | + "obj_id" => obj_id, |
| 13 | + "subj" => subj_hash.merge(cache_key: subj_cache_key), |
| 14 | + "obj" => obj_hash.merge(cache_key: obj_cache_key), |
| 15 | + "source_doi" => source_doi, |
| 16 | + "target_doi" => target_doi, |
| 17 | + "source_relation_type_id" => source_relation_type_id, |
| 18 | + "target_relation_type_id" => target_relation_type_id, |
| 19 | + "doi" => doi, |
| 20 | + "orcid" => orcid, |
| 21 | + "issn" => issn, |
| 22 | + "prefix" => prefix, |
| 23 | + "subtype" => subtype, |
| 24 | + "citation_type" => citation_type, |
| 25 | + "source_id" => source_id, |
| 26 | + "source_token" => source_token, |
| 27 | + "message_action" => message_action, |
| 28 | + "relation_type_id" => relation_type_id, |
| 29 | + "registrant_id" => registrant_id, |
| 30 | + "access_method" => access_method, |
| 31 | + "metric_type" => metric_type, |
| 32 | + "total" => total, |
| 33 | + "license" => license, |
| 34 | + "error_messages" => error_messages, |
| 35 | + "aasm_state" => aasm_state, |
| 36 | + "state_event" => state_event, |
| 37 | + "year_month" => year_month, |
| 38 | + "created_at" => created_at, |
| 39 | + "updated_at" => updated_at, |
| 40 | + "indexed_at" => indexed_at, |
| 41 | + "occurred_at" => occurred_at, |
| 42 | + "citation_id" => citation_id, |
| 43 | + "citation_year" => citation_year, |
| 44 | + "cache_key" => cache_key, |
| 45 | + } |
| 46 | + end |
| 47 | + |
| 48 | + def subj_cache_key |
| 49 | + timestamp = subj_hash["dateModified"] || Time.zone.now.iso8601 |
| 50 | + "objects/#{subj_id}-#{timestamp}" |
| 51 | + end |
| 52 | + |
| 53 | + def obj_cache_key |
| 54 | + timestamp = obj_hash["dateModified"] || Time.zone.now.iso8601 |
| 55 | + "objects/#{obj_id}-#{timestamp}" |
| 56 | + end |
| 57 | + |
| 58 | + def doi |
| 59 | + Array.wrap(subj_hash["proxyIdentifiers"]).grep(%r{\A10\.\d{4,5}/.+\z}) { ::Regexp.last_match(1) } + |
| 60 | + Array.wrap(obj_hash["proxyIdentifiers"]).grep(%r{\A10\.\d{4,5}/.+\z}) { ::Regexp.last_match(1) } + |
| 61 | + Array.wrap(subj_hash["funder"]).map { |f| DoiUtilities.doi_from_url(f["@id"]) }.compact + |
| 62 | + Array.wrap(obj_hash["funder"]).map { |f| DoiUtilities.doi_from_url(f["@id"]) }.compact + |
| 63 | + [DoiUtilities.doi_from_url(subj_id), DoiUtilities.doi_from_url(obj_id)].compact |
| 64 | + end |
| 65 | + |
| 66 | + def orcid |
| 67 | + Array.wrap(subj_hash["author"]).map { |f| OrcidUtilities.orcid_from_url(f["@id"]) }.compact + |
| 68 | + Array.wrap(obj_hash["author"]).map { |f| OrcidUtilities.orcid_from_url(f["@id"]) }.compact + |
| 69 | + [OrcidUtilities.orcid_from_url(subj_id), OrcidUtilities.orcid_from_url(obj_id)].compact |
| 70 | + end |
| 71 | + |
| 72 | + def issn |
| 73 | + Array.wrap(subj_hash.dig("periodical", "issn")).compact + |
| 74 | + Array.wrap(obj_hash.dig("periodical", "issn")).compact |
| 75 | + rescue TypeError |
| 76 | + nil |
| 77 | + end |
| 78 | + |
| 79 | + def prefix |
| 80 | + [doi.map { |d| d.to_s.split("/", 2).first }].compact |
| 81 | + end |
| 82 | + |
| 83 | + def subtype |
| 84 | + [subj_hash["@type"], obj["@type"]].compact |
| 85 | + end |
| 86 | + |
| 87 | + def citation_type |
| 88 | + if subj_hash["@type"].blank? || subj_hash["@type"] == "CreativeWork" || |
| 89 | + obj_hash["@type"].blank? || |
| 90 | + obj_hash["@type"] == "CreativeWork" |
| 91 | + return |
| 92 | + end |
| 93 | + |
| 94 | + [subj_hash["@type"], obj_hash["@type"]].compact.sort.join("-") |
| 95 | + end |
| 96 | + |
| 97 | + def registrant_id |
| 98 | + [ |
| 99 | + subj_hash["registrantId"], |
| 100 | + obj_hash["registrantId"], |
| 101 | + subj_hash["providerId"], |
| 102 | + obj_hash["providerId"], |
| 103 | + ].compact |
| 104 | + end |
| 105 | + |
| 106 | + def access_method |
| 107 | + if /(requests|investigations)/.match?(relation_type_id.to_s) |
| 108 | + relation_type_id.split("-").last if relation_type_id.present? |
| 109 | + end |
| 110 | + end |
| 111 | + |
| 112 | + def metric_type |
| 113 | + if /(requests|investigations)/.match?(relation_type_id.to_s) |
| 114 | + arr = relation_type_id.split("-", 4) |
| 115 | + arr[0..2].join("-") |
| 116 | + end |
| 117 | + end |
| 118 | + |
| 119 | + def year_month |
| 120 | + occurred_at.utc.iso8601[0..6] if occurred_at.present? |
| 121 | + end |
| 122 | + |
| 123 | + def citation_id |
| 124 | + [subj_id, obj_id].sort.join("-") |
| 125 | + end |
| 126 | + |
| 127 | + def citation_year |
| 128 | + if (INCLUDED_RELATION_TYPES + RELATIONS_RELATION_TYPES).exclude?(relation_type_id) |
| 129 | + return "" |
| 130 | + end |
| 131 | + |
| 132 | + subj_publication = subj_hash["datePublished"] || |
| 133 | + subj_hash["date_published"] || |
| 134 | + (date_published(subj_id) || year_month) |
| 135 | + |
| 136 | + obj_publication = obj_hash["datePublished"] || |
| 137 | + obj_hash["date_published"] || |
| 138 | + (date_published(obj_id) || year_month) |
| 139 | + |
| 140 | + [subj_publication[0..3].to_i, obj_publication[0..3].to_i].max |
| 141 | + end |
| 142 | + |
| 143 | + def cache_key |
| 144 | + timestamp = updated_at || Time.zone.now |
| 145 | + |
| 146 | + "events/#{uuid}-#{timestamp.iso8601}" |
| 147 | + end |
| 148 | + |
| 149 | + def date_published(doi) |
| 150 | + item = Doi.find_by(doi: DoiUtilities.uppercase_doi_from_url(doi)) |
| 151 | + |
| 152 | + item[:publication_date] if item.present? |
| 153 | + end |
| 154 | +end |
0 commit comments