@@ -26,41 +26,75 @@ namespace :enrichment do
2626
2727 desc "Ingest ARXIV data"
2828 task ingest_arxiv : :environment do
29- file = File . read ( "lib/data/20250615_arxiv_preprint_matching_results.json" )
30- data = JSON . parse ( file )
29+ csv_path = Rails . root . join ( "lib/data/arxiv_preprint_matching.csv" )
3130 count = 0
3231
33- data . each do |item |
32+ CSV . foreach ( csv_path , headers : true ) do |row |
3433 count += 1
3534
3635 break if count == 2001
3736
37+ item = row . to_hash
38+
3839 enrichment = Enrichment . new (
39- doi : item [ "input_doi" ] ,
40- source : "COMET" ,
41- process : "10.0000/FAKE.PROCESS" ,
42- field : "relatedIdentifiers" ,
43- action : "insert" ,
44- original_value : nil ,
45- enriched_value : {
46- relationType : "Preprint" ,
47- relatedIdentifier : item [ "matched_doi" ] ,
48- relatedIdentifierType : "DOI" ,
49- } ,
40+ doi : item [ "doi" ] ,
41+ source : item [ "source" ] ,
42+ process : item [ "process" ] ,
43+ field : item [ "field" ] ,
44+ action : item [ "action" ] ,
45+ original_value : item [ "originalValue" ] ,
46+ enriched_value : JSON . parse ( item [ "enrichedValue" ] ) ,
5047 created : Time . current . utc ,
5148 updated : Time . current . utc ,
52- produced : Time . current . utc - 5 . days ,
49+ produced : item [ "produced" ] ,
5350 )
5451
5552 if enrichment . save
56- puts ( "Created enrichment for #{ item [ "input_doi " ] } " )
53+ puts ( "Created enrichment for #{ item [ "doi " ] } " )
5754 else
58- puts ( "Failed to create enrichment for #{ item [ "input_doi " ] } " )
55+ puts ( "Failed to create enrichment for #{ item [ "doi " ] } " )
5956 puts ( enrichment . errors . full_messages . join ( "," ) )
6057 end
6158 end
6259 end
6360
61+ # desc "Ingest ARXIV data"
62+ # task ingest_arxiv: :environment do
63+ # file = File.read("lib/data/20250615_arxiv_preprint_matching_results.json")
64+ # data = JSON.parse(file)
65+ # count = 0
66+
67+ # data.each do |item|
68+ # count += 1
69+
70+ # break if count == 2001
71+
72+ # enrichment = Enrichment.new(
73+ # doi: item["input_doi"],
74+ # source: "COMET",
75+ # process: "10.0000/FAKE.PROCESS",
76+ # field: "relatedIdentifiers",
77+ # action: "insert",
78+ # original_value: nil,
79+ # enriched_value: {
80+ # relationType: "Preprint",
81+ # relatedIdentifier: item["matched_doi"],
82+ # relatedIdentifierType: "DOI",
83+ # },
84+ # created: Time.current.utc,
85+ # updated: Time.current.utc,
86+ # produced: Time.current.utc - 5.days,
87+ # )
88+
89+ # if enrichment.save
90+ # puts("Created enrichment for #{item["input_doi"]}")
91+ # else
92+ # puts("Failed to create enrichment for #{item["input_doi"]}")
93+ # puts(enrichment.errors.full_messages.join(","))
94+ # end
95+ # end
96+ # end
97+
6498 desc "Ingest procedural resource type"
6599 task ingest_procedural_resource_type : :environment do
66100 file = File . read ( "lib/data/datacite_procedural_resource_type_general_reclassifications_datacite_lookup_format.json" )
0 commit comments