Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/update-ror-mappings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
BUCKET: ${{ secrets.ROR_ANALYSIS_S3_BUCKET }}
S3_FUNDER_KEY: ror_funder_mapping/funder_to_ror.json
S3_HIERARCHY_KEY: ror_funder_mapping/ror_hierarchy.json
S3_COUNTRIES_KEY: ror_funder_mapping/ror_to_countries.json
LOCAL_DIR: app/resources

steps:
Expand All @@ -40,6 +41,7 @@ jobs:
set -euo pipefail
aws s3 cp "s3://${BUCKET}/${S3_FUNDER_KEY}" funder_to_ror.json.new
aws s3 cp "s3://${BUCKET}/${S3_HIERARCHY_KEY}" ror_hierarchy.json.new
aws s3 cp "s3://${BUCKET}/${S3_COUNTRIES_KEY}" ror_to_countries.json.new

- name: Compare and update tracked files (semantic JSON)
id: update
Expand Down Expand Up @@ -73,6 +75,7 @@ jobs:

normalize_and_update funder_to_ror.json.new "${LOCAL_DIR}/funder_to_ror.json"
normalize_and_update ror_hierarchy.json.new "${LOCAL_DIR}/ror_hierarchy.json"
normalize_and_update ror_to_countries.json.new "${LOCAL_DIR}/ror_to_countries.json"

echo "changed=${changed}" >> "$GITHUB_OUTPUT"

Expand Down
2 changes: 2 additions & 0 deletions app/controllers/datacite_dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def index
client_type: params[:client_type],
funded_by: params[:funded_by],
include_funder_child_organizations: params[:include_funder_child_organizations],
affiliation_country: params[:affiliation_country],
)
end

Expand Down Expand Up @@ -338,6 +339,7 @@ def index
publisher: params[:publisher],
funded_by: params[:funded_by],
include_funder_child_organizations: params[:include_funder_child_organizations],
"affiliation-country" => params[:affiliation_country],
# The cursor link should be an array of values, but we want to encode it into a single string for the URL
"page[cursor]" =>
page[:cursor] ? make_cursor(results) : nil,
Expand Down
6 changes: 6 additions & 0 deletions app/models/concerns/rorable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,10 @@ def get_ror_parents(ror_id)
hierarchy = RorReferenceStore.ror_hierarchy(normalized_ror)
hierarchy&.dig("ancestors") || []
end

def get_countries_from_ror(ror_id)
normalized_ror = "https://#{ror_from_url(ror_id)}"
countries = RorReferenceStore.ror_to_countries(normalized_ror)
Array.wrap(countries).map(&:upcase).uniq
end
end
36 changes: 36 additions & 0 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def validate_publisher_obj?(doi)
indexes :related_dmp_organization_id, type: :keyword
indexes :funder_rors, type: :keyword
indexes :funder_parent_rors, type: :keyword
indexes :affiliation_countries, type: :keyword
indexes :client_id_and_name, type: :keyword
indexes :provider_id_and_name, type: :keyword
indexes :resource_type_id_and_name, type: :keyword
Expand Down Expand Up @@ -648,6 +649,7 @@ def as_indexed_json(_options = {})
"related_dmp_organization_id" => related_dmp_organization_and_affiliation_id,
"funder_rors" => funder_rors,
"funder_parent_rors" => funder_parent_rors,
"affiliation_countries" => affiliation_countries,
"affiliation_id_and_name" => affiliation_id_and_name,
"fair_affiliation_id_and_name" => fair_affiliation_id_and_name,
"media_ids" => media_ids,
Expand Down Expand Up @@ -1268,6 +1270,14 @@ def self.query(query, options = {})
minimum_should_match = 1
end

if options[:affiliation_country].present?
country_codes = options[:affiliation_country]
.split(",")
.map { |c| c.strip.upcase }
.reject(&:blank?)
filter << { terms: { "affiliation_countries" => country_codes } } if country_codes.any?
end

must_not << { terms: { agency: ["crossref", "kisti", "medra", "jalc", "istic", "airiti", "cnki", "op"] } } if options[:exclude_registration_agencies]

# ES query can be optionally defined in different ways
Expand Down Expand Up @@ -2035,6 +2045,32 @@ def funder_parent_rors
end
end

def affiliation_countries
countries = []
countries.concat(extract_countries_from_people(creators))
countries.concat(extract_countries_from_people(contributors))
countries.uniq
end

private
def extract_countries_from_people(people)
Array.wrap(people).flat_map do |person|
next [] unless person.is_a?(Hash)

Array.wrap(person.fetch("affiliation", [])).flat_map do |affiliation|
next [] unless affiliation.is_a?(Hash)
next [] unless affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR"

affiliation_identifier = affiliation.fetch("affiliationIdentifier", nil)
next [] if affiliation_identifier.blank?

get_countries_from_ror(affiliation_identifier)
end
end
end

public

def prefix
doi.split("/", 2).first if doi.present?
end
Expand Down
6 changes: 6 additions & 0 deletions app/services/ror_reference_store.rb
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ def refresh!(mapping)

def download_from_s3(filename)
bucket = ENV["ROR_ANALYSIS_S3_BUCKET"]

if bucket.blank?
Rails.logger.warn "[RorReferenceStore] ROR_ANALYSIS_S3_BUCKET not configured – skipping S3 fetch for #{filename}"
return nil
end

object_key = "#{S3_PREFIX}#{filename}"

client = Aws::S3::Client.new
Expand Down
6 changes: 6 additions & 0 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,12 @@ paths:
description: Search creators.affiliation.affiliationIdentifier and contributors.affiliation.affiliationIdentifier for a ROR ID.
schema:
type: string
- in: query
name: affiliation-country
description: Filter DOIs by associated country inferred from ROR IDs in creators/contributors affiliations. Use comma-separated ISO 3166-1 alpha-2 country codes.
schema:
type: string
example: US,GB
- in: query
name: funded-by
description: Search fundingReferences.funderIdentifier for a ROR ID. Results also include DOIs containing a Crossref Funder ID in fundingReferences.funderIdentifier corresponding to the ROR ID.
Expand Down
33 changes: 33 additions & 0 deletions spec/concerns/rorable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
{ "ancestors" => ["https://ror.org/04cw6st05"] }
)
allow(RorReferenceStore).to receive(:ror_hierarchy).with("https://ror.org/doi.org/00a0jsq62").and_return(nil)
allow(RorReferenceStore).to receive(:ror_to_countries).with("https://ror.org/00k4n6c32").and_return(["US"])
allow(RorReferenceStore).to receive(:ror_to_countries).with("https://ror.org/00a0jsq62").and_return(["us"])
allow(RorReferenceStore).to receive(:ror_to_countries).with("https://ror.org/nonexistent").and_return(nil)
allow(RorReferenceStore).to receive(:ror_to_countries).with("https://ror.org/doi.org/00k4n6c32").and_return(nil)
end

describe "Crossref Funder ID to ROR mapping" do
Expand Down Expand Up @@ -61,4 +65,33 @@
expect(ancestors).to eq([])
end
end

describe "ROR to country mapping" do
let(:doi) { create(:doi) }

it "maps ROR URL to country codes" do
expect(doi.get_countries_from_ror("https://ror.org/00k4n6c32")).to eq(["US"])
end

it "maps incomplete ROR URL to country codes" do
expect(doi.get_countries_from_ror("ror.org/00k4n6c32")).to eq(["US"])
end

it "maps ROR suffix to country codes" do
expect(doi.get_countries_from_ror("00k4n6c32")).to eq(["US"])
end

it "returns empty array for invalid ROR" do
expect(doi.get_countries_from_ror("doi.org/00k4n6c32")).to eq([])
end

it "returns empty array for ROR not in mapping" do
expect(doi.get_countries_from_ror("https://ror.org/nonexistent")).to eq([])
end

it "normalizes country codes to uppercase" do
# Store returns lowercase "us" — method must upcase it
expect(doi.get_countries_from_ror("https://ror.org/00a0jsq62")).to eq(["US"])
end
end
end
181 changes: 122 additions & 59 deletions spec/models/doi_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2366,85 +2366,148 @@
end
end

describe "with affiliation ROR IDs" do
let(:datacite_ror) { "https://ror.org/00k4n6c32" }
let(:cambridge_ror) { "https://ror.org/04wxnsj81" }

before do
allow(RorReferenceStore).to receive(:ror_to_countries).with(datacite_ror).and_return(["US"])
allow(RorReferenceStore).to receive(:ror_to_countries).with(cambridge_ror).and_return(["GB"])
end

let(:doi) do
create(
:doi,
creators: [
{
"name": "Garza, Kristian",
"givenName": "Kristian",
"familyName": "Garza",
"nameType": "Personal",
"affiliation": [
{
"name": "DataCite",
"affiliationIdentifier": datacite_ror,
"affiliationIdentifierScheme": "ROR",
},
{
"name": "University of Cambridge",
"affiliationIdentifier": cambridge_ror,
"affiliationIdentifierScheme": "ROR",
},
],
},
],
contributors: [
{
"name": "Smith, John",
"givenName": "John",
"familyName": "Smith",
"contributorType": "Editor",
"affiliation": [
{
"name": "DataCite",
"affiliationIdentifier": datacite_ror,
"affiliationIdentifierScheme": "ROR",
},
],
},
],
)
end

let(:expected_countries) { ["US", "GB"] }

it "has countries from ROR affiliations in affiliation_countries" do
expect(doi.affiliation_countries).to match_array(expected_countries)
expect(doi.as_indexed_json["affiliation_countries"]).to match_array(expected_countries)
end

it "deduplicates country codes from multiple affiliations" do
countries = doi.affiliation_countries
expect(countries).to eq(countries.uniq)
end
end

describe "enrichable" do
describe "#enrichment_field" do
let(:doi) { create(:doi, aasm_state: "findable", agency: "datacite") }
describe "#enrichment_field" do
let(:doi) { create(:doi, aasm_state: "findable", agency: "datacite") }

it "maps alternatveIdentifiers to alternate_identifiers" do
expect(doi.enrichment_field("alternateIdentifiers")).to(eq("alternate_identifiers"))
end
it "maps alternatveIdentifiers to alternate_identifiers" do
expect(doi.enrichment_field("alternateIdentifiers")).to(eq("alternate_identifiers"))
end

it "maps creators to creators" do
expect(doi.enrichment_field("creators")).to(eq("creators"))
end
it "maps creators to creators" do
expect(doi.enrichment_field("creators")).to(eq("creators"))
end

it "maps titles to titles" do
expect(doi.enrichment_field("titles")).to(eq("titles"))
end
it "maps titles to titles" do
expect(doi.enrichment_field("titles")).to(eq("titles"))
end

it "maps publisher to publisher" do
expect(doi.enrichment_field("publisher")).to(eq("publisher"))
end
it "maps publisher to publisher" do
expect(doi.enrichment_field("publisher")).to(eq("publisher"))
end

it "maps publicationYear to publication_year" do
expect(doi.enrichment_field("publicationYear")).to(eq("publication_year"))
end
it "maps publicationYear to publication_year" do
expect(doi.enrichment_field("publicationYear")).to(eq("publication_year"))
end

it "maps subjects to subjects" do
expect(doi.enrichment_field("subjects")).to(eq("subjects"))
end
it "maps subjects to subjects" do
expect(doi.enrichment_field("subjects")).to(eq("subjects"))
end

it "maps contributors to contributors" do
expect(doi.enrichment_field("contributors")).to(eq("contributors"))
end
it "maps contributors to contributors" do
expect(doi.enrichment_field("contributors")).to(eq("contributors"))
end

it "maps dates to dates" do
expect(doi.enrichment_field("dates")).to(eq("dates"))
end
it "maps dates to dates" do
expect(doi.enrichment_field("dates")).to(eq("dates"))
end

it "maps language to language" do
expect(doi.enrichment_field("language")).to(eq("language"))
end
it "maps language to language" do
expect(doi.enrichment_field("language")).to(eq("language"))
end

it "maps types to types" do
expect(doi.enrichment_field("types")).to(eq("types"))
end
it "maps types to types" do
expect(doi.enrichment_field("types")).to(eq("types"))
end

it "maps relatedIdentifiers to related_identifiers" do
expect(doi.enrichment_field("relatedIdentifiers")).to(eq("related_identifiers"))
end
it "maps relatedIdentifiers to related_identifiers" do
expect(doi.enrichment_field("relatedIdentifiers")).to(eq("related_identifiers"))
end

it "maps relatedItems to related_items" do
expect(doi.enrichment_field("relatedItems")).to(eq("related_items"))
end
it "maps relatedItems to related_items" do
expect(doi.enrichment_field("relatedItems")).to(eq("related_items"))
end

it "maps sizes to sizes" do
expect(doi.enrichment_field("sizes")).to(eq("sizes"))
end
it "maps sizes to sizes" do
expect(doi.enrichment_field("sizes")).to(eq("sizes"))
end

it "maps formats to formats" do
expect(doi.enrichment_field("formats")).to(eq("formats"))
end
it "maps formats to formats" do
expect(doi.enrichment_field("formats")).to(eq("formats"))
end

it "maps version to version" do
expect(doi.enrichment_field("version")).to(eq("version"))
end
it "maps version to version" do
expect(doi.enrichment_field("version")).to(eq("version"))
end

it "maps rightsList to rights_list" do
expect(doi.enrichment_field("rightsList")).to(eq("rights_list"))
end
it "maps rightsList to rights_list" do
expect(doi.enrichment_field("rightsList")).to(eq("rights_list"))
end

it "maps descriptions to descriptions" do
expect(doi.enrichment_field("descriptions")).to(eq("descriptions"))
end
it "maps descriptions to descriptions" do
expect(doi.enrichment_field("descriptions")).to(eq("descriptions"))
end

it "maps geoLocations to geo_locations" do
expect(doi.enrichment_field("geoLocations")).to(eq("geo_locations"))
end
it "maps geoLocations to geo_locations" do
expect(doi.enrichment_field("geoLocations")).to(eq("geo_locations"))
end

it "maps fundingReferences to funding_references" do
expect(doi.enrichment_field("fundingReferences")).to(eq("funding_references"))
end
it "maps fundingReferences to funding_references" do
expect(doi.enrichment_field("fundingReferences")).to(eq("funding_references"))
end
end
end
end
Loading
Loading