@@ -26,30 +26,36 @@ namespace :enrichment do
2626
2727 desc "Ingest ARXIV data"
2828 task ingest_arxiv : :environment do
29- CSV . foreach ( "lib/data/20250426_arxiv_sample_3_matches.csv" , headers : true ) do |row |
29+ file = File . read ( "lib/data/20250615_arxiv_preprint_matching_results.json" )
30+ data = JSON . parse ( file )
31+ count = 0
32+
33+ data . each do |item |
34+ count += 1
35+
36+ break if count == 2001
37+
3038 enrichment = Enrichment . new (
31- doi : row [ "input_doi" ] ,
39+ doi : item [ "input_doi" ] ,
3240 source : "COMET" ,
33- process : "10.000/FAKE.PROCESS" ,
34- field : "types" ,
35- action : "update" ,
41+ process : "10.0000/FAKE.PROCESS" ,
42+ field : "relatedIdentifiers" ,
43+ action : "insert" ,
44+ original_value : nil ,
3645 enriched_value : {
37- ris : "GEN" ,
38- bibtex : "misc" ,
39- citeproc : "article" ,
40- schemaOrg : "CreativeWork" ,
41- resourceType : "Article" ,
42- resourceTypeGeneral : "Dataset" ,
46+ relationType : "Preprint" ,
47+ relatedIdentifier : item [ "matched_doi" ] ,
48+ relatedIdentifierType : "DOI" ,
4349 } ,
4450 created : Time . current . utc ,
4551 updated : Time . current . utc ,
4652 produced : Time . current . utc - 5 . days ,
4753 )
4854
4955 if enrichment . save
50- puts ( "Created enrichment for #{ row [ "input_doi" ] } " )
56+ puts ( "Created enrichment for #{ item [ "input_doi" ] } " )
5157 else
52- puts ( "Failed to create enrichment for #{ row [ "input_doi" ] } " )
58+ puts ( "Failed to create enrichment for #{ item [ "input_doi" ] } " )
5359 puts ( enrichment . errors . full_messages . join ( "," ) )
5460 end
5561 end
0 commit comments