MITLibraries
diff --git a/‎Pipfile
Lines changed: 2 additions & 0 deletions b/‎Pipfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎Pipfile.lock
Lines changed: 28 additions & 20 deletions b/‎Pipfile.lock
Lines changed: 28 additions & 20 deletions
diff --git a/‎docs/adrs/0001-springshare-source-naming.md
Lines changed: 68 additions & 0 deletions b/‎docs/adrs/0001-springshare-source-naming.md
Lines changed: 68 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/oaidc_record_all_fields.xml
Lines changed: 26 additions & 0 deletions b/‎tests/fixtures/oai_dc/oaidc_record_all_fields.xml
Lines changed: 26 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/oaidc_record_missing_required_fields.xml
Lines changed: 24 additions & 0 deletions b/‎tests/fixtures/oai_dc/oaidc_record_missing_required_fields.xml
Lines changed: 24 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/oaidc_record_optional_fields_blank.xml
Lines changed: 26 additions & 0 deletions b/‎tests/fixtures/oai_dc/oaidc_record_optional_fields_blank.xml
Lines changed: 26 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/oaidc_record_optional_fields_missing.xml
Lines changed: 20 additions & 0 deletions b/‎tests/fixtures/oai_dc/oaidc_record_optional_fields_missing.xml
Lines changed: 20 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/oaidc_record_valid_generic_date.xml
Lines changed: 26 additions & 0 deletions b/‎tests/fixtures/oai_dc/oaidc_record_valid_generic_date.xml
Lines changed: 26 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/springshare/libguides/libguides_record_all_fields.xml
Lines changed: 26 additions & 0 deletions b/‎tests/fixtures/oai_dc/springshare/libguides/libguides_record_all_fields.xml
Lines changed: 26 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/springshare/libguides/libguides_record_optional_fields_blank.xml
Lines changed: 26 additions & 0 deletions b/‎tests/fixtures/oai_dc/springshare/libguides/libguides_record_optional_fields_blank.xml
Lines changed: 26 additions & 0 deletions
diff --git a/‎tests/fixtures/oai_dc/springshare/libguides/libguides_record_optional_fields_missing.xml
Lines changed: 20 additions & 0 deletions b/‎tests/fixtures/oai_dc/springshare/libguides/libguides_record_optional_fields_missing.xml
Lines changed: 20 additions & 0 deletions
@@ -10,6 +10,8 @@ click = "*"
 lxml = "*"
 sentry-sdk = "*"
 smart-open = {version = "*", extras = ["s3"]}
+python-dateutil = "*"
+types-python-dateutil = "*"
 
 [dev-packages]
 bandit = "*"
 
@@ -0,0 +1,68 @@
+# 1. Springshare Source Naming
+
+Date: 2023-07-26
+
+## Status
+
+Proposed
+
+## Context
+
+While working on adding two new sources to TIMDEX pipeline, there was some discussion and constraints around what source names should be used.
+
+Both data sources are both from Springshare, Libguides and the AZ list of databases, and are retrieved via OAI-PMH.
+
+At this time, source names are a string that accompany the records throughout the TIMDEX pipeline:
+  * `transmogrifier`: defined in `transmogrifier.config.SOURCES`
+    * drives what transformer class to use
+    * saved to TIMDEX record as field
+    * used for S3 key (folder structure + filename) 
+  * `timdex-pipeline-lambdas`: defined in `lambdas.config.INDEX_ALIASES`
+    * promotes a newly created index to specific aliases if configured
+    * used for S3 key (folder structure + filename)
+  * `timdex-index-manager`: defined in `tim.config.VALID_SOURCES`
+    * prevents indexing of sources if not present in this list
+    * used for index name created in OpenSearch
+
+Two distinct areas of consideration emerged when deciding on a source name:
+  * **meaningful**
+    * does it suggest what the original data source is?
+    * does it have value or meaning to end users of the API?
+  * **technically viable**
+    * does it have special characters?  are they allowed?
+    * does it result in predictable S3 key naming conventions throughout?
+    * is it an allowed OpenSearch index name?
+
+## Decision
+
+The following source names were decided on:
+  * `libguides`: the Libguides data source
+    * oai set: `guides`
+  * `researchdatabases`: the AZ list databases
+    * oai set: `az`
+
+### `libguides` 
+
+Pretty self-explanatory, satisfies both "meaningful" and "technically viable" requirements.
+
+### `researchdatabases`
+
+This one was a bit thornier.
+
+It was suggested that `az` was not terribly helpful for understanding where the data came from, and was very unhelpful for end users.  
+
+The first agreed upon alternative was `research_databases`.  `databases` was also floated, but could be ambiguous from the POV of an end user.
+
+For a variety of reasons, attempting to keep these words distinct in the name failed:  `research_databases`, `research-databases`, and `researchDatabases`.  The reasons are outlined in [this Jira ticket comments](https://mitlibraries.atlassian.net/browse/TIMX-19?focusedCommentId=107019):
+  * `research_databases`: index name not correctly parsed in `timdex-index-manager`
+  * `research-databases`: files not saved correctly to S3 in `timdex-pipeline-lambdas`
+  * `researchDatabases`: not a valid Opensearch index name
+
+And so, the final decided upon source name was `researchdatabases`; no hyphens, underscores, or camelCasing.
+
+## Consequences
+
+The source name `researchdatabases` reflects some compromises that must be made for sources:
+  * if the source name is meaningful to end users, it may lose fidelity about the source origin
+  * if the source name is technically viable, it may lose some human readability
+
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:title>Materials Science &amp; Engineering</dc:title>
+        <dc:creator>Ye Li</dc:creator>
+        <dc:subject>Engineering</dc:subject>
+        <dc:subject>Science</dc:subject>
+        <dc:description>Useful databases and other research tips for materials science.</dc:description>
+        <dc:publisher>MIT Libraries</dc:publisher>
+        <dc:date>2008-06-19 17:55:27</dc:date>
+        <dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:creator>Ye Li</dc:creator>
+        <dc:subject>Engineering</dc:subject>
+        <dc:subject>Science</dc:subject>
+        <dc:description>Useful databases and other research tips for materials science.</dc:description>
+        <dc:publisher>MIT Libraries</dc:publisher>
+        <dc:date>2008-06-19T17:55:27</dc:date>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:title>Materials Science &amp; Engineering</dc:title>
+        <dc:creator></dc:creator>
+        <dc:subject></dc:subject>
+        <dc:subject></dc:subject>
+        <dc:description></dc:description>
+        <dc:publisher></dc:publisher>
+        <dc:date></dc:date>
+        <dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:title>Materials Science &amp; Engineering</dc:title>
+        <dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:title>Materials Science &amp; Engineering</dc:title>
+        <dc:creator>Ye Li</dc:creator>
+        <dc:subject>Engineering</dc:subject>
+        <dc:subject>Science</dc:subject>
+        <dc:description>Useful databases and other research tips for materials science.</dc:description>
+        <dc:publisher>MIT Libraries</dc:publisher>
+        <dc:date>2008-06-19T17:55:27</dc:date>
+        <dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:title>Materials Science &amp; Engineering</dc:title>
+        <dc:creator>Ye Li</dc:creator>
+        <dc:subject>Engineering</dc:subject>
+        <dc:subject>Science</dc:subject>
+        <dc:description>Useful databases and other research tips for materials science.</dc:description>
+        <dc:publisher>MIT Libraries</dc:publisher>
+        <dc:date>2008-06-19 17:55:27</dc:date>
+        <dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:title>Materials Science &amp; Engineering</dc:title>
+        <dc:creator></dc:creator>
+        <dc:subject></dc:subject>
+        <dc:subject></dc:subject>
+        <dc:description></dc:description>
+        <dc:publisher></dc:publisher>
+        <dc:date></dc:date>
+        <dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<records>
+  <record xmlns="http://www.openarchives.org/OAI/2.0/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+      <identifier>oai:libguides.com:guides/175846</identifier>
+      <datestamp>2023-05-31T19:49:21Z</datestamp>
+      <setSpec>guides</setSpec>
+    </header>
+    <metadata>
+      <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+        <dc:title>Materials Science &amp; Engineering</dc:title>
+        <dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
+      </oai_dc:dc>
+    </metadata>
+  </record>
+</records>