From 72ade47243e02af762487b810700cd964f8885ea Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Fri, 1 Aug 2025 14:43:00 -0400 Subject: [PATCH 01/23] Start of mechanism --- pages/people/+Page.ts | 141 ++++++++---------------------------------- 1 file changed, 27 insertions(+), 114 deletions(-) diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index d447d2169..c0b9d42b1 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -1,123 +1,36 @@ import { Image, Navbar, Footer, SearchBar } from "~/components/general"; import h from "./main.module.sass"; import { Card, Divider } from "@blueprintjs/core"; -import { useState } from "react"; +import { useState, useEffect } from "react"; import { ContentPage } from "~/layouts"; +import { fetchPGData } from "~/_utils"; export function Page() { const [input, setInput] = useState(""); const [tags, setTags] = useState([]); - const res = [ - { - name: "Shanan Peters", - role: "Professor, Database Developer", - email: "peters@geology.wisc.edu", - link: "http://strata.geology.wisc.edu", - image: "shanan.jpg", - }, - { - name: "Daven Quinn", - role: "Research Scientist, Developer", - email: "daven.quinn@wisc.edu", - link: "https://davenquinn.com", - image: "daven.jpg", - }, - { - name: "Evgeny Mazko", - role: "Graduate Student", - email: "mazko@wisc.edu", - link: null, - image: "evgeny.jpg", - }, - { - name: "Michael McClennen", - role: "Senior Programmer Analyst", - email: "mmcclenn@geology.wisc.edu", - link: "https://geoscience.wisc.edu/geoscience/people/staff/name/michael-mcclennen/", - image: "michael.jpg", - }, - { - name: "Casey Idzikowski", - role: "Research Specialist, Developer (former)", - email: null, - link: "https://idzikowski-casey.github.io/personal-site/", - image: "casey.jpg", - }, - { - name: "David Sklar", - role: "Undergrad Student", - email: "dsklar@wisc.edu", - link: null, - image: "david.jpg", - }, - { - name: "Amy Fromandi", - role: null, - email: "punkish@eidesis.org", - link: null, - image: "amy.jpg", - }, - { - name: "Daniel Segessenmen", - role: "Graduate Student (former)", - email: null, - link: "http://strata.geology.wisc.edu", - image: "daniel.jpg", - }, - { - name: "Shan Ye", - role: "Graduate Student (former)", - email: null, - link: "https://www.wisc.edu/directories/person.php?name=Victoria+Khoo&email=vkhoo%40wisc.edu&query=victoria%20khoo", - image: "shan.jpg", - }, - { - name: "Ben Linzmeier", - role: "Postdoctoral Scholar (former)", - email: null, - link: "http://strata.geology.wisc.edu", - image: "ben.jpg", - }, - { - name: "Afiqah Rafi", - role: "Undergrad Student (former)", - email: null, - link: "https://www.wisc.edu/directories/person.php?name=Victoria+Khoo&email=vkhoo%40wisc.edu&query=victoria%20khoo", - image: "afiqah.jpg", - }, - { - name: "Sharon McMullen", - role: "Researcher (former)", - email: null, - link: "http://geoscience.wisc.edu/geoscience/people/student/?id=1007", - image: "sharon.jpg", - }, - { - name: "Andrew Zaffos", - role: "Data Mobilization and Research Scientist", - email: "azaffos@email.arizona.edu", - link: "http://www.azstrata.org", - image: "andrew.jpg", - }, - { - name: "Jon Husson", - role: "Postdoctoral Researcher (former)", - email: "jhusson@uvic.ca", - link: "http://www.jonhusson.com", - image: "jon.jpg", - }, - ]; - console.log(tags); + const [res, setPeople] = useState([]); + const [tagList, setTagList] = useState([]); - const tagList = [ - "Student", - "Researcher", - "Developer", - "Postdoc", - "Research Scientist", - "Former", - ]; + useEffect(() => { + fetchPGData("/people", {}) + .then(setPeople) + .catch((err) => { + console.error("Failed to fetch people:", err); + }); + }, []); + + useEffect(() => { + fetchPGData("/roles", {}) + .then(data => setTagList(data.map(role => role.name))) + .catch((err) => { + console.error("Failed to fetch tags:", err); + }); + }, []); + + if(!res || !tagList) { + return h("div.loading", "Loading..."); + } const handleInputChange = (e) => { const value = e.target.value; @@ -192,12 +105,12 @@ export function Page() { ]); } -function PersonCard({ name, role, email, link, image }) { +function PersonCard({ name, roles, email, website, img_id }) { return h("div.person-info", [ - h(Image, { src: image, className: "back-img" }), + h(Image, { src: img_id, className: "back-img" }), h("div.description", [ - h("a.name", { href: link }, name), - role ? h("p.role", role) : null, + h("a.name", { href: website }, name), + roles ? h("p.role", roles.map(role => role.name).join(", ")) : null, email ? h("a.email", { href: "mailto: " + email }, email) : null, ]), ]); From bd2613aa7bb2e31f34be3625dc246f89af40a94a Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Fri, 1 Aug 2025 14:48:04 -0400 Subject: [PATCH 02/23] Filter works --- pages/people/+Page.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index c0b9d42b1..d8b8cf2e3 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -39,7 +39,7 @@ export function Page() { const filteredPeople = res.filter((person) => { const name = person.name.toLowerCase(); - const role = person.role ? person.role.toLowerCase() : ""; + const role = person.roles.map(role => role.name).join(", ").toLowerCase(); const email = person.email ? person.email.toLowerCase() : ""; const roleTags = tagList @@ -110,8 +110,8 @@ function PersonCard({ name, roles, email, website, img_id }) { h(Image, { src: img_id, className: "back-img" }), h("div.description", [ h("a.name", { href: website }, name), - roles ? h("p.role", roles.map(role => role.name).join(", ")) : null, - email ? h("a.email", { href: "mailto: " + email }, email) : null, + h("p.role", roles.map(role => role.name).join(", ")), + h.if(email)("a.email", { href: "mailto: " + email }, email), ]), ]); } From 24bff3f13393c48d5685a1c9bae90491e1b8ff4a Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Fri, 1 Aug 2025 14:51:14 -0400 Subject: [PATCH 03/23] Finished --- pages/people/+Page.ts | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index d8b8cf2e3..e0e5f89dd 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -13,12 +13,12 @@ export function Page() { const [tagList, setTagList] = useState([]); useEffect(() => { - fetchPGData("/people", {}) + fetchPGData("/people", { name: `ilike.*${input}*` }) .then(setPeople) .catch((err) => { console.error("Failed to fetch people:", err); }); - }, []); + }, [input]); useEffect(() => { fetchPGData("/roles", {}) @@ -32,11 +32,6 @@ export function Page() { return h("div.loading", "Loading..."); } - const handleInputChange = (e) => { - const value = e.target.value; - setInput(value); - }; - const filteredPeople = res.filter((person) => { const name = person.name.toLowerCase(); const role = person.roles.map(role => role.name).join(", ").toLowerCase(); @@ -69,7 +64,7 @@ export function Page() { ]), h(Card, { className: "search-bar" }, [ h(SearchBar, { - onChange: handleInputChange, + onChange: (e) => setInput(e), placeholder: "Search by name, role, or email", }), h("div.tags", [ From ff4d1f23c011e4e651bc78731dd5b22a43ab15e0 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Mon, 4 Aug 2025 12:47:59 -0400 Subject: [PATCH 04/23] Show date --- pages/people/+Page.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index e0e5f89dd..365171df5 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -100,12 +100,17 @@ export function Page() { ]); } -function PersonCard({ name, roles, email, website, img_id }) { +function PersonCard({ name, roles, email, website, img_id, active_start, active_end }) { + const start = new Date(active_start).toLocaleDateString(); + const end = new Date(active_end).toLocaleDateString(); + return h("div.person-info", [ h(Image, { src: img_id, className: "back-img" }), h("div.description", [ h("a.name", { href: website }, name), h("p.role", roles.map(role => role.name).join(", ")), + h.if(active_start && !active_end)("p.start", `Active since ${start}`), + h.if(active_end)("p.dates", `Active from ${start} to ${end}`), h.if(email)("a.email", { href: "mailto: " + email }, email), ]), ]); From 4ef0ddbdf90f0e6476cc4ea7ff3acc15b0658d10 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Mon, 4 Aug 2025 12:52:01 -0400 Subject: [PATCH 05/23] Add active and former --- pages/people/+Page.ts | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index 365171df5..875f22b3c 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -22,7 +22,9 @@ export function Page() { useEffect(() => { fetchPGData("/roles", {}) - .then(data => setTagList(data.map(role => role.name))) + .then(data => { + setTagList([...(data.map(role => role.name)), "Active", "Former"]); + }) .catch((err) => { console.error("Failed to fetch tags:", err); }); @@ -37,16 +39,14 @@ export function Page() { const role = person.roles.map(role => role.name).join(", ").toLowerCase(); const email = person.email ? person.email.toLowerCase() : ""; - const roleTags = tagList - .map((tag) => { - if (role.includes(tag.toLowerCase())) { - return tag; - } - return null; - }) - .filter((tag) => tag !== null); + const isActive = !person.active_end; + const personTags = [ + ...person.roles.map(role => role.name), + isActive ? "Active" : "Former", + ]; + const tagMatch = - tags.length === 0 || tags.every((tag) => roleTags.includes(tag)); + tags.length === 0 || tags.every((tag) => personTags.includes(tag)); return ( (name.includes(input) || role.includes(input) || email.includes(input)) && @@ -54,6 +54,7 @@ export function Page() { ); }); + return h("div", [ h(Navbar), h(ContentPage, { className: "people-page" }, [ From 4699bf7c2d4b92e14c5f08abf2914426f241250c Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Mon, 4 Aug 2025 13:40:19 -0400 Subject: [PATCH 06/23] Add roles --- pages/dev/add-people/+Page.client.ts | 215 ++++++++++++++++++++++++++ pages/dev/add-people/main.module.sass | 7 + src/components/general/index.ts | 14 +- 3 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 pages/dev/add-people/+Page.client.ts create mode 100644 pages/dev/add-people/main.module.sass diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts new file mode 100644 index 000000000..3548d6b43 --- /dev/null +++ b/pages/dev/add-people/+Page.client.ts @@ -0,0 +1,215 @@ +import h from "./main.module.sass"; + +import { BasePage } from "~/components/general"; +import { DataField } from "~/components/unit-details"; +import { fetchPGData } from "~/_utils"; + +import { SaveButton } from "@macrostrat/ui-components"; +import { MultiSelect } from "@blueprintjs/select"; +import { MenuItem } from "@blueprintjs/core"; + +import { useEffect, useState } from "react"; + +export function Page() { + const [form, setForm] = useState({ + name: "", + email: "", + title: "", + website: "", + profileImage: "", + startDate: "", + endDate: "", + roles: [], + }); + + console.log("Initial form state:", form); + + const disabled = !form.name || !form.email || !form.title || !form.profileImage; + + const handleChange = (field) => (value) => { + console.log(`${field} changed:`, value); + setForm({ ...form, [field]: value }); + }; + + return h(BasePage, { title: "Add people" }, [ + h("div.add-people-page", [ + h("p", "This page is meant to add people to the Macrostrat database. Please fill out the form below with the person's details."), + ]), + h('div.form', [ + h('div.inputs', [ + h(TextInput, { + label: "Name *", + value: form.name, + onChange: handleChange("name"), + required: true + }), + h(TextInput, { + label: "Email *", + value: form.email, + onChange: handleChange("email"), + required: true, + pattern: "^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$" + }), + h(TextInput, { + label: "Title *", + value: form.title, + onChange: handleChange("title"), + required: true + }), + h(RolesInput, { setForm }), + h(ImageInput, { + label: "Profile Image *", + value: form.profileImage, + onChange: handleChange("profileImage"), + required: true + }), + h(TextInput, { + label: "Website", + value: form.website, + onChange: handleChange("website"), + pattern: "https?://.+" + }), + h(DateInput, { + label: "Active Start Date", + value: form.startDate, + onChange: handleChange("startDate"), + required: true + }), + h(DateInput, { + label: "Active End Date", + value: form.endDate, + onChange: handleChange("endDate") + }), + ]), + h(SubmitButton, { disabled: false, form }), + h("p.note", h('em', "Fields marked with * are required")), + ]), + ]); +} + +// === Input Components === + +function TextInput({ label, value = "", onChange, required = false, pattern }) { + return h(DataField, { + label, + value: h("input.text-input", { + type: "text", + value, + required, + pattern, + onInput: (e) => onChange(e.target.value), + }) + }); +} + +function DateInput({ label, value = "", onChange, required = false }) { + return h(DataField, { + label, + value: h("input.date-input", { + type: "date", + value, + required, + onInput: (e) => onChange(e.target.value), + }) + }); +} + +function ImageInput({ label, onChange, required = false }) { + return h(DataField, { + label, + value: h("input.image-input", { + type: "file", + accept: "image/*", + required, + onChange: (e) => { + const file = e.target.files[0]; + if (file) { + const reader = new FileReader(); + reader.onload = (event) => onChange(event.target.result); + reader.readAsDataURL(file); + } + }, + }), + }); +} + +function SubmitButton({ disabled, form }) { + const text = disabled ? "Please fill out all required fields" : "Add person"; + + const handleSubmit = () => { + if (!disabled) { + // Convert empty strings in form to null + const formattedForm = Object.fromEntries( + Object.entries(form).map(([key, value]) => [key, value === "" ? null : value]) + ); + + console.log("Form submitted with data:", formattedForm); + // Your form submission logic here, using formattedForm + } + }; + + return h(SaveButton, { disabled, onClick: handleSubmit }, text) +} + +function RolesInput({setForm}) { + const [roles, setRoles] = useState([]); + const [selectedRoles, setSelectedRoles] = useState([]); + + useEffect(() => { + fetchPGData("/roles", {}) + .then(data => { + setRoles(data.map(role => role.name)); + }) + .catch((err) => { + console.error("Failed to fetch roles:", err); + }); + }, []); + + // Check if item is selected in selectedRoles (not roles) + const isItemSelected = (item) => selectedRoles.includes(item); + + const handleItemSelect = (item) => { + if (!isItemSelected(item)) { + setSelectedRoles([...selectedRoles, item]); + setForm((prev) => ({ ...prev, roles: [...prev.roles, item] })); + } + }; + + const handleItemDelete = (itemToDelete) => { + const next = selectedRoles.filter((item) => item !== itemToDelete); + setSelectedRoles(next); + setForm((prev) => ({ ...prev, roles: next })); + }; + + const itemPredicate = (query, item) => + item.toLowerCase().includes(query.toLowerCase()); + + const itemRenderer = (item, { handleClick, modifiers }) => { + if (!modifiers.matchesPredicate) return null; + + return h(MenuItem, { + key: item, + text: item, + onClick: handleClick, + active: modifiers.active, + shouldDismissPopover: false, + }); + }; + + const items = roles.filter((f) => !isItemSelected(f)); + + return h(DataField, { + label: "Roles *", + value: h(MultiSelect, { + items, + itemRenderer, + itemPredicate, + selectedItems: selectedRoles, + onItemSelect: handleItemSelect, + onRemove: handleItemDelete, + tagRenderer: (item) => item, + popoverProps: { minimal: true }, + fill: true, + }) + }); +} \ No newline at end of file diff --git a/pages/dev/add-people/main.module.sass b/pages/dev/add-people/main.module.sass new file mode 100644 index 000000000..e7eac775a --- /dev/null +++ b/pages/dev/add-people/main.module.sass @@ -0,0 +1,7 @@ +.form, .inputs + display: flex + flex-direction: column + gap: 1em + +.inputs + gap: .5em \ No newline at end of file diff --git a/src/components/general/index.ts b/src/components/general/index.ts index 3244c171f..752103e47 100644 --- a/src/components/general/index.ts +++ b/src/components/general/index.ts @@ -1,7 +1,9 @@ import h from "./layout.module.sass"; import { MacrostratIcon, StickyHeader } from "~/components"; import { Spinner, Icon, Card } from "@blueprintjs/core"; -import { useDarkMode } from "@macrostrat/ui-components"; +import { ContentPage } from "~/layouts"; +import { PageBreadcrumbs } from "~/components"; +import { DarkModeButton } from "@macrostrat/ui-components"; import classNames from "classnames"; export function Image({ src, className, width, height }) { @@ -160,3 +162,13 @@ export function StratTag({ isConcept, fontSize = ".75em" }) { export function IDTag({ id }) { return h("div.id-tag", "ID: #" + id); } + +export function BasePage({title, className, children}) { + return h("div", [ + h(ContentPage, { className }, [ + h(PageBreadcrumbs, { title }), + children, + ]), + h(Footer), + ]); +} \ No newline at end of file From 5cc6b8b7652e3f9aa555d2afd10c1306a3f1b2ce Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Mon, 4 Aug 2025 14:08:40 -0400 Subject: [PATCH 07/23] Store role ids --- pages/dev/add-people/+Page.client.ts | 131 ++++++++++++++++----------- 1 file changed, 79 insertions(+), 52 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index 3548d6b43..b4dea626c 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -5,6 +5,7 @@ import { DataField } from "~/components/unit-details"; import { fetchPGData } from "~/_utils"; import { SaveButton } from "@macrostrat/ui-components"; +import { postgrestPrefix } from "@macrostrat-web/settings"; import { MultiSelect } from "@blueprintjs/select"; import { MenuItem } from "@blueprintjs/core"; @@ -12,13 +13,13 @@ import { useEffect, useState } from "react"; export function Page() { const [form, setForm] = useState({ - name: "", - email: "", - title: "", - website: "", - profileImage: "", - startDate: "", - endDate: "", + name: null, + email: null, + title: null, + website: null, + img_id: null, + active_start: null, + active_end: null, roles: [], }); @@ -59,8 +60,8 @@ export function Page() { h(RolesInput, { setForm }), h(ImageInput, { label: "Profile Image *", - value: form.profileImage, - onChange: handleChange("profileImage"), + value: form.img_id, + onChange: handleChange("img_id"), required: true }), h(TextInput, { @@ -71,14 +72,14 @@ export function Page() { }), h(DateInput, { label: "Active Start Date", - value: form.startDate, - onChange: handleChange("startDate"), + value: form.active_start, + onChange: handleChange("active_start"), required: true }), h(DateInput, { label: "Active End Date", - value: form.endDate, - onChange: handleChange("endDate") + value: form.active_end, + onChange: handleChange("active_end") }), ]), h(SubmitButton, { disabled: false, form }), @@ -143,73 +144,99 @@ function SubmitButton({ disabled, form }) { Object.entries(form).map(([key, value]) => [key, value === "" ? null : value]) ); - console.log("Form submitted with data:", formattedForm); - // Your form submission logic here, using formattedForm + const { roles, ...personData } = formattedForm; + + fetch(postgrestPrefix + "/people", { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: personData + }) + .then(response => { + if (!response.ok) { + throw new Error(`HTTP error! Status: ${response.status}`); + } + return response.json(); // or response.text(), depending on your API + }) + .then(data => { + alert('Success: ' + form.name + ' has been added!'); + }) + .catch(error => { + console.warn('Error:', error); + }); } }; return h(SaveButton, { disabled, onClick: handleSubmit }, text) } -function RolesInput({setForm}) { +function RolesInput({ setForm }) { const [roles, setRoles] = useState([]); const [selectedRoles, setSelectedRoles] = useState([]); useEffect(() => { fetchPGData("/roles", {}) - .then(data => { - setRoles(data.map(role => role.name)); + .then((data) => { + setRoles(data); }) .catch((err) => { console.error("Failed to fetch roles:", err); }); }, []); - // Check if item is selected in selectedRoles (not roles) - const isItemSelected = (item) => selectedRoles.includes(item); + const isItemSelected = (item) => + selectedRoles.some((r) => r.id === item.id); const handleItemSelect = (item) => { if (!isItemSelected(item)) { - setSelectedRoles([...selectedRoles, item]); - setForm((prev) => ({ ...prev, roles: [...prev.roles, item] })); + const next = [...selectedRoles, item]; + setSelectedRoles(next); + setForm((prev) => ({ + ...prev, + roles: next.map((r) => r.id), + })); } }; const handleItemDelete = (itemToDelete) => { - const next = selectedRoles.filter((item) => item !== itemToDelete); + const next = selectedRoles.filter((item) => item.id !== itemToDelete.id); setSelectedRoles(next); - setForm((prev) => ({ ...prev, roles: next })); + setForm((prev) => ({ + ...prev, + roles: next.map((r) => r.id), + })); }; const itemPredicate = (query, item) => - item.toLowerCase().includes(query.toLowerCase()); - - const itemRenderer = (item, { handleClick, modifiers }) => { - if (!modifiers.matchesPredicate) return null; - - return h(MenuItem, { - key: item, - text: item, - onClick: handleClick, - active: modifiers.active, - shouldDismissPopover: false, - }); - }; + item.name.toLowerCase().includes(query.toLowerCase()); - const items = roles.filter((f) => !isItemSelected(f)); + const itemRenderer = (item, { handleClick, modifiers }) => { + if (!modifiers.matchesPredicate) return null; - return h(DataField, { - label: "Roles *", - value: h(MultiSelect, { - items, - itemRenderer, - itemPredicate, - selectedItems: selectedRoles, - onItemSelect: handleItemSelect, - onRemove: handleItemDelete, - tagRenderer: (item) => item, - popoverProps: { minimal: true }, - fill: true, - }) + return h(MenuItem, { + key: item.id, + text: item.name, + onClick: handleClick, + active: modifiers.active, + shouldDismissPopover: false, }); -} \ No newline at end of file + }; + + const items = roles.filter((role) => !isItemSelected(role)); + + return h(DataField, { + label: "Roles *", + value: h(MultiSelect, { + items, + itemRenderer, + itemPredicate, + selectedItems: selectedRoles, + onItemSelect: handleItemSelect, + onRemove: handleItemDelete, + tagRenderer: (item) => item.name, + popoverProps: { minimal: true }, + fill: true, + }), + }); +} From f31611610c1125d2c0fc111bd0486102f719ac1b Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Mon, 4 Aug 2025 14:10:06 -0400 Subject: [PATCH 08/23] Should handle everything --- pages/dev/add-people/+Page.client.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index b4dea626c..9f8f0d66c 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -160,7 +160,22 @@ function SubmitButton({ disabled, form }) { return response.json(); // or response.text(), depending on your API }) .then(data => { - alert('Success: ' + form.name + ' has been added!'); + const personId = data.id; + + // Now handle roles + if (roles.length > 0) { + const rolePromises = roles.map(roleId => { + return fetch(postgrestPrefix + "/people_roles", { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ person_id: personId, role_id: roleId }) + }); + }); + + return Promise.all(rolePromises); + } }) .catch(error => { console.warn('Error:', error); From b309fab06038f37943c4ec6c2cfe84304fafdbb4 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Mon, 4 Aug 2025 14:34:12 -0400 Subject: [PATCH 09/23] Sample page (post doesnt work) --- pages/dev/add-people/+Page.client.ts | 22 ++++++++++++---------- pages/dev/add-people/main.module.sass | 5 ++++- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index 9f8f0d66c..e7cdfcdaa 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -23,12 +23,9 @@ export function Page() { roles: [], }); - console.log("Initial form state:", form); - - const disabled = !form.name || !form.email || !form.title || !form.profileImage; + const disabled = !form.name || !form.email || !form.title || !form.img_id || form.roles.length === 0; const handleChange = (field) => (value) => { - console.log(`${field} changed:`, value); setForm({ ...form, [field]: value }); }; @@ -82,7 +79,7 @@ export function Page() { onChange: handleChange("active_end") }), ]), - h(SubmitButton, { disabled: false, form }), + h(SubmitButton, { disabled, form }), h("p.note", h('em', "Fields marked with * are required")), ]), ]); @@ -144,18 +141,23 @@ function SubmitButton({ disabled, form }) { Object.entries(form).map(([key, value]) => [key, value === "" ? null : value]) ); - const { roles, ...personData } = formattedForm; + const { roles, img_id, ...personData } = formattedForm; + + console.log("Submitting person data:", personData); - fetch(postgrestPrefix + "/people", { + fetch(postgrestPrefix + "/people_table", { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: personData + body: JSON.stringify({ + ...personData, + img_id: "david.jpg" + }) }) .then(response => { if (!response.ok) { - throw new Error(`HTTP error! Status: ${response.status}`); + throw new Error(`HTTP error! Status: ${response.toString()}`); } return response.json(); // or response.text(), depending on your API }) @@ -165,7 +167,7 @@ function SubmitButton({ disabled, form }) { // Now handle roles if (roles.length > 0) { const rolePromises = roles.map(roleId => { - return fetch(postgrestPrefix + "/people_roles", { + return fetch(postgrestPrefix + "/people_roles_table", { method: 'POST', headers: { 'Content-Type': 'application/json' diff --git a/pages/dev/add-people/main.module.sass b/pages/dev/add-people/main.module.sass index e7eac775a..e629af01c 100644 --- a/pages/dev/add-people/main.module.sass +++ b/pages/dev/add-people/main.module.sass @@ -4,4 +4,7 @@ gap: 1em .inputs - gap: .5em \ No newline at end of file + gap: .5em + +.add-people-page + margin: 1em 0 \ No newline at end of file From a925e96f652bafb2a41255426af7ff8ed6059608 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Mon, 4 Aug 2025 15:12:21 -0400 Subject: [PATCH 10/23] Better error handling --- pages/dev/add-people/+Page.client.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index e7cdfcdaa..3a36832f2 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -157,9 +157,12 @@ function SubmitButton({ disabled, form }) { }) .then(response => { if (!response.ok) { - throw new Error(`HTTP error! Status: ${response.toString()}`); + // Wait for the response text before throwing an error + return response.text().then(text => { + throw new Error(`HTTP error! Status: ${response.status} - ${text}`); + }); } - return response.json(); // or response.text(), depending on your API + return response.json(); }) .then(data => { const personId = data.id; @@ -180,7 +183,7 @@ function SubmitButton({ disabled, form }) { } }) .catch(error => { - console.warn('Error:', error); + console.warn(error); }); } }; From 7aba0101f93875ce61b5ee5fb9b076fb06930da9 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Tue, 5 Aug 2025 14:29:53 -0400 Subject: [PATCH 11/23] People page works --- pages/people/+Page.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index 875f22b3c..6578bb6c6 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -13,7 +13,7 @@ export function Page() { const [tagList, setTagList] = useState([]); useEffect(() => { - fetchPGData("/people", { name: `ilike.*${input}*` }) + fetchPGData("/people_with_roles", { name: `ilike.*${input}*` }) .then(setPeople) .catch((err) => { console.error("Failed to fetch people:", err); From c5d8a772c3c129edd15dc293a27e9c2a42629f5e Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Tue, 5 Aug 2025 14:33:30 -0400 Subject: [PATCH 12/23] Add people (not roles) works --- pages/dev/add-people/+Page.client.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index 3a36832f2..92faa4894 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -145,7 +145,7 @@ function SubmitButton({ disabled, form }) { console.log("Submitting person data:", personData); - fetch(postgrestPrefix + "/people_table", { + fetch(postgrestPrefix + "/people", { method: 'POST', headers: { 'Content-Type': 'application/json' @@ -165,12 +165,12 @@ function SubmitButton({ disabled, form }) { return response.json(); }) .then(data => { - const personId = data.id; + const personId = data.person_id; // Now handle roles if (roles.length > 0) { const rolePromises = roles.map(roleId => { - return fetch(postgrestPrefix + "/people_roles_table", { + return fetch(postgrestPrefix + "/people_roles", { method: 'POST', headers: { 'Content-Type': 'application/json' @@ -180,7 +180,7 @@ function SubmitButton({ disabled, form }) { }); return Promise.all(rolePromises); - } + } }) .catch(error => { console.warn(error); From 281f7d016a189786fea5b6e864ce0075fd149d0b Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Tue, 5 Aug 2025 14:37:20 -0400 Subject: [PATCH 13/23] Trying roles --- pages/dev/add-people/+Page.client.ts | 104 +++++++++++++++------------ 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index 92faa4894..ac35767ef 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -135,59 +135,71 @@ function SubmitButton({ disabled, form }) { const text = disabled ? "Please fill out all required fields" : "Add person"; const handleSubmit = () => { - if (!disabled) { - // Convert empty strings in form to null - const formattedForm = Object.fromEntries( - Object.entries(form).map(([key, value]) => [key, value === "" ? null : value]) - ); + if (disabled) return; - const { roles, img_id, ...personData } = formattedForm; + // Convert empty strings to null + const formattedForm = Object.fromEntries( + Object.entries(form).map(([key, value]) => [key, value === "" ? null : value]) + ); - console.log("Submitting person data:", personData); + // Destructure roles and img_id, default img_id if missing + const { roles = [], img_id, ...personData } = formattedForm; - fetch(postgrestPrefix + "/people", { - method: 'POST', - headers: { - 'Content-Type': 'application/json' - }, - body: JSON.stringify({ - ...personData, - img_id: "david.jpg" - }) - }) - .then(response => { - if (!response.ok) { - // Wait for the response text before throwing an error - return response.text().then(text => { - throw new Error(`HTTP error! Status: ${response.status} - ${text}`); - }); - } - return response.json(); - }) - .then(data => { - const personId = data.person_id; + console.log("Submitting person data:", personData); - // Now handle roles - if (roles.length > 0) { - const rolePromises = roles.map(roleId => { - return fetch(postgrestPrefix + "/people_roles", { - method: 'POST', - headers: { - 'Content-Type': 'application/json' - }, - body: JSON.stringify({ person_id: personId, role_id: roleId }) - }); - }); + fetch(`${postgrestPrefix}/people`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + ...personData, + img_id: img_id || "david.jpg", // fallback to default + }), + }) + .then(response => { + if (!response.ok) { + return response.text().then(text => { + throw new Error(`Failed to create person: ${response.status} - ${text}`); + }); + } + return response.json(); + }) + .then(data => { + const personId = data.person_id; + if (!personId) { + throw new Error("Missing person_id in response"); + } - return Promise.all(rolePromises); - } - }) - .catch(error => { - console.warn(error); - }); - } + // Handle roles if present + if (roles.length > 0) { + const rolePromises = roles.map(roleId => + fetch(`${postgrestPrefix}/people_roles`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ person_id: personId, role_id: roleId }), + }) + .then(response => { + if (!response.ok) { + return response.text().then(text => { + throw new Error(`Failed to assign role: ${response.status} - ${text}`); + }); + } + return response.json(); + }) + ); + + return Promise.all(rolePromises); + } + }) + .catch(error => { + console.warn("Submission error:", error); + }); }; + return h(SaveButton, { disabled, onClick: handleSubmit }, text) } From 6df8ea25d797bdd644e8c3877d5bbe13b84ad69e Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 6 Aug 2025 12:29:51 -0400 Subject: [PATCH 14/23] Basically working --- pages/dev/add-people/+Page.client.ts | 88 ++++++++++++---------------- 1 file changed, 36 insertions(+), 52 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index ac35767ef..cc5d1552e 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -23,6 +23,8 @@ export function Page() { roles: [], }); + console.log('form', form); + const disabled = !form.name || !form.email || !form.title || !form.img_id || form.roles.length === 0; const handleChange = (field) => (value) => { @@ -79,7 +81,7 @@ export function Page() { onChange: handleChange("active_end") }), ]), - h(SubmitButton, { disabled, form }), + h(SubmitButton, { disabled: false, form }), h("p.note", h('em', "Fields marked with * are required")), ]), ]); @@ -137,70 +139,52 @@ function SubmitButton({ disabled, form }) { const handleSubmit = () => { if (disabled) return; - // Convert empty strings to null - const formattedForm = Object.fromEntries( - Object.entries(form).map(([key, value]) => [key, value === "" ? null : value]) - ); - // Destructure roles and img_id, default img_id if missing - const { roles = [], img_id, ...personData } = formattedForm; + const { roles, ...personData } = form; + const filteredPersonData = Object.fromEntries( + Object.entries(personData).filter(([_, v]) => v !== null && v !== undefined) + ); - console.log("Submitting person data:", personData); + const testBody = new URLSearchParams(filteredPersonData).toString(); fetch(`${postgrestPrefix}/people`, { - method: 'POST', + method: "POST", headers: { - 'Content-Type': 'application/json', + "Content-Type": "application/x-www-form-urlencoded", + "Prefer": "return=representation", }, - body: JSON.stringify({ - ...personData, - img_id: img_id || "david.jpg", // fallback to default - }), - }) - .then(response => { - if (!response.ok) { - return response.text().then(text => { - throw new Error(`Failed to create person: ${response.status} - ${text}`); - }); - } - return response.json(); + body: testBody, }) + .then(r => r.json()) .then(data => { - const personId = data.person_id; - if (!personId) { - throw new Error("Missing person_id in response"); - } + console.log("Test submission response:", data[0]); + const personId = data[0].person_id; + + console.log("Person ID:", personId, "roles:", roles); - // Handle roles if present - if (roles.length > 0) { - const rolePromises = roles.map(roleId => - fetch(`${postgrestPrefix}/people_roles`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ person_id: personId, role_id: roleId }), - }) - .then(response => { - if (!response.ok) { - return response.text().then(text => { - throw new Error(`Failed to assign role: ${response.status} - ${text}`); - }); - } - return response.json(); - }) - ); + roles.forEach(roleId => { + console.log("Assigning role:", roleId, "to person:", personId); + const body = new URLSearchParams({ person_id: personId, role_id: roleId }).toString(); - return Promise.all(rolePromises); - } + fetch(`${postgrestPrefix}/people_roles`, { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + "Prefer": "return=representation", + }, + body, + }) + .then(r => r.json()) + .then(roleData => { + console.log("Role assignment response:", roleData); + }) + .catch(e => console.error("Role assignment error:", e)); + }); }) - .catch(error => { - console.warn("Submission error:", error); - }); + .catch(e => console.error("Test submission error:", e)); }; - - return h(SaveButton, { disabled, onClick: handleSubmit }, text) + return h(SaveButton, { disabled, onClick: handleSubmit }, text); } function RolesInput({ setForm }) { From 4156a2e254ac708954dfe63bf4e7587ff14e5366 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 6 Aug 2025 12:32:20 -0400 Subject: [PATCH 15/23] Roles works --- pages/dev/add-people/+Page.client.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index cc5d1552e..a184f53d4 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -191,6 +191,8 @@ function RolesInput({ setForm }) { const [roles, setRoles] = useState([]); const [selectedRoles, setSelectedRoles] = useState([]); + console.log('RolesInput', roles, selectedRoles); + useEffect(() => { fetchPGData("/roles", {}) .then((data) => { @@ -202,25 +204,26 @@ function RolesInput({ setForm }) { }, []); const isItemSelected = (item) => - selectedRoles.some((r) => r.id === item.id); + selectedRoles.some((r) => r.role_id === item.role_id); const handleItemSelect = (item) => { if (!isItemSelected(item)) { const next = [...selectedRoles, item]; + console.log('Selected roles updated:', next.map((r) => r.role_id)); setSelectedRoles(next); setForm((prev) => ({ ...prev, - roles: next.map((r) => r.id), + roles: next.map((r) => r.role_id), })); } }; const handleItemDelete = (itemToDelete) => { - const next = selectedRoles.filter((item) => item.id !== itemToDelete.id); + const next = selectedRoles.filter((item) => item.role_id !== itemToDelete.role_id); setSelectedRoles(next); setForm((prev) => ({ ...prev, - roles: next.map((r) => r.id), + roles: next.map((r) => r.role_id), })); }; @@ -231,7 +234,7 @@ function RolesInput({ setForm }) { if (!modifiers.matchesPredicate) return null; return h(MenuItem, { - key: item.id, + key: item.role_id, text: item.name, onClick: handleClick, active: modifiers.active, From 80abfb7d1aa246ee23eecf3f3181ab9208a451c8 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 6 Aug 2025 12:39:57 -0400 Subject: [PATCH 16/23] Everything works except photo --- pages/dev/add-people/+Page.client.ts | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index a184f53d4..758b76c49 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -23,8 +23,6 @@ export function Page() { roles: [], }); - console.log('form', form); - const disabled = !form.name || !form.email || !form.title || !form.img_id || form.roles.length === 0; const handleChange = (field) => (value) => { @@ -157,11 +155,8 @@ function SubmitButton({ disabled, form }) { }) .then(r => r.json()) .then(data => { - console.log("Test submission response:", data[0]); const personId = data[0].person_id; - console.log("Person ID:", personId, "roles:", roles); - roles.forEach(roleId => { console.log("Assigning role:", roleId, "to person:", personId); const body = new URLSearchParams({ person_id: personId, role_id: roleId }).toString(); @@ -174,10 +169,6 @@ function SubmitButton({ disabled, form }) { }, body, }) - .then(r => r.json()) - .then(roleData => { - console.log("Role assignment response:", roleData); - }) .catch(e => console.error("Role assignment error:", e)); }); }) @@ -191,8 +182,6 @@ function RolesInput({ setForm }) { const [roles, setRoles] = useState([]); const [selectedRoles, setSelectedRoles] = useState([]); - console.log('RolesInput', roles, selectedRoles); - useEffect(() => { fetchPGData("/roles", {}) .then((data) => { From 19cd9eea42e45c1a199189b93177e36802ae0c91 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 6 Aug 2025 14:46:32 -0400 Subject: [PATCH 17/23] Map ingest version --- scripts/upload-photo/config.py | 41 ++ scripts/upload-photo/upload.py | 992 +++++++++++++++++++++++++++++++++ 2 files changed, 1033 insertions(+) create mode 100644 scripts/upload-photo/config.py create mode 100644 scripts/upload-photo/upload.py diff --git a/scripts/upload-photo/config.py b/scripts/upload-photo/config.py new file mode 100644 index 000000000..8c3afb70c --- /dev/null +++ b/scripts/upload-photo/config.py @@ -0,0 +1,41 @@ +""" +Settings that define the ingestion process. +""" + +from minio import Minio + +from macrostrat.core.config import settings # type: ignore[import-untyped] + +CHUNK_SIZE = 8 * 1024 * 1024 # 8 MB +TIMEOUT = 60 # seconds + +PG_DATABASE = getattr(settings, "pg_database") + +storage = getattr(settings, "storage", {}) +buckets = getattr(storage, "buckets", {}) + +S3_HOST = storage.get("endpoint", None) +S3_ACCESS_KEY = storage.get("access_key", None) +S3_SECRET_KEY = storage.get("secret_key", None) +S3_BUCKET = buckets.get("map-staging", None) + + +def get_minio_client(): + if not isinstance(S3_HOST, str): + raise ValueError("settings.storage.endpoint is not defined") + + host = S3_HOST + secure = None + if host.startswith("http://"): + host = host[7:] + secure = False + elif host.startswith("https://"): + host = host[8:] + secure = True + + return Minio( + endpoint=host, + access_key=S3_ACCESS_KEY, + secret_key=S3_SECRET_KEY, + secure=secure, + ) \ No newline at end of file diff --git a/scripts/upload-photo/upload.py b/scripts/upload-photo/upload.py new file mode 100644 index 000000000..f11549d71 --- /dev/null +++ b/scripts/upload-photo/upload.py @@ -0,0 +1,992 @@ +# pylint: disable=imports,too-many-arguments,too-many-branches,too-many-locals +""" +Ingest map data from archive files. + +A.k.a., a pipeline for ingesting maps into Macrostrat. +""" + +import csv +import datetime +import hashlib +import importlib +import os +import pathlib +import re +import shutil +import tarfile +import tempfile +import time +import zipfile +from contextlib import contextmanager +from typing import Annotated, Any, NoReturn, Optional + +import requests # type: ignore[import-untyped] +from rich.console import Console +from sqlalchemy import and_, insert, select, update +from sqlalchemy.orm import Session +from typer import Argument, Option + +from macrostrat.core.database import get_database +from macrostrat.core.schemas import ( # type: ignore[import-untyped] + IngestProcess, + IngestProcessTag, + IngestState, + Object, + ObjectGroup, + SchemeEnum, + Sources, +) +from macrostrat.map_integration import config +from macrostrat.map_integration.commands.ingest import ingest_map +from macrostrat.map_integration.commands.prepare_fields import prepare_fields +from macrostrat.map_integration.errors import IngestError +from macrostrat.map_integration.process.geometry import create_rgeom, create_webgeom +from macrostrat.map_integration.utils.map_info import MapInfo, get_map_info + +from .config import get_minio_client + +# Do this with importlib so we control the order +for mod in ["pylibmagic", "magic"]: + importlib.import_module("magic") + +# The list of arguments to upload_file that ingest_csv will look +# for in the CSV file given to it. +FIELDS = [ + "slug", + "name", + # "tag", # use the CLI, which supports applying multiple tags + "ref_title", + "ref_authors", + "ref_year", + "ref_source", + "ref_isbn_or_doi", + "scale", + "archive_url", + "website_url", + "raster_url", +] + +# The current terminal, with support for displaying rich text. +console = Console() + + +# -------------------------------------------------------------------------- +# Assorted helper functions. + +default_s3_bucket = config.S3_BUCKET + + +def normalize_slug(slug: str) -> str: + """ + Replace characters that are invalid in an sql table name with an underscore. + """ + return re.sub(r"\W", "_", slug).lower() + + +def strify_list(xs: list[Any]) -> list[str]: + """ + Convert the provided list to a list of strings. + """ + return [str(x) for x in xs] + + +def truncate_str(data: str, *, limit: int = 255) -> str: + """ + Replace the end of a string with "..." if its length exceeds some limit. + """ + if len(data) > limit: + data = data[: limit - 3] + "..." + return data[:limit] + + +def truncate_source_metadata(data: dict[str, Any]) -> dict[str, Any]: + """ + Ensure that metadata fields for a `maps.sources` record are not too long. + """ + data = data.copy() + for col in ["name", "url", "authors", "ref_source"]: + if col in data: + data[col] = truncate_str(data[col], limit=255) + for col in ["isbn_doi", "licence"]: + if col in data: + data[col] = truncate_str(data[col], limit=100) + return data + + +def raise_ingest_error( + ingest_process: IngestProcess, comments: str, source_exn: Optional[Exception] = None +) -> NoReturn: + """ + Set an ingest process to "failed", and then raise an Exception. + """ + record_ingest_error(ingest_process, comments) + raise IngestError(comments) from source_exn + + +def record_ingest_error(ingest_process: IngestProcess, comments: str) -> None: + """ + Set an ingest process to "failed". + """ + update_ingest_process( + ingest_process.id, state=IngestState.failed, comments=comments + ) + + +# -------------------------------------------------------------------------- +# Extracting and analyzing archive files. + + +def is_archive(file: pathlib.Path) -> bool: + """ + Return whether a file appears to be an archive, based on its name. + """ + return file.name.endswith((".tgz", ".tar.gz", ".zip")) + + +def extract_archive( + archive_file: pathlib.Path, + target_dir: pathlib.Path, + *, + ingest_process: Optional[IngestProcess] = None, + extract_subarchives: bool = True, +) -> None: + """ + Extract an archive file into a directory. + + By default, any extracted files that are themselves archives will be + expanded into the same directory. This might not result in the expected + layout for some archives. + + If provided, the ingest process will be used to report any errors. + """ + if archive_file.name.endswith((".tgz", ".tar.gz")): + with tarfile.open(archive_file) as tf: + tf.extractall(path=target_dir, filter="data") + elif archive_file.name.endswith(".zip"): + with zipfile.ZipFile(archive_file) as zf: + zf.extractall(path=target_dir) + elif ingest_process: + raise_ingest_error(ingest_process, "Unrecognized archive file format") + else: + raise IngestError("Unrecognized archive file format") + + if extract_subarchives: + sub_archives = set(target_dir.glob("**/*.tgz")) + sub_archives |= set(target_dir.glob("**/*.tar.gz")) + sub_archives |= set(target_dir.glob("**/*.zip")) + + for sub_archive in sub_archives - {archive_file}: + extract_archive( + sub_archive, + target_dir, + ingest_process=ingest_process, + extract_subarchives=False, + ) + + +def update_alaska_metadata(source: Sources, data_dir: pathlib.Path) -> None: + """ + Set metadata for an archive from the Alaska Division of Geological & Geophysical Surveys. + """ + metadata: dict[str, str] = {} + metadata_files = list(data_dir.glob("metadata/*.txt")) + + ## NOTE: The metadata file looks like it could be parsed as YAML, + ## but alas, it is not YAML. Some would-be hashes define a key multiple + ## times, and some values confuse PyYAML's parser. + + if len(metadata_files) != 1: + return + with open(metadata_files[0], encoding="utf-8") as fp: + raw_metadata = fp.readlines() + + ## Skip the first line ("Identification_Information:"). + + raw_metadata.pop(0) + + ## Scan for interesting lines until we reach the next section. + + for line in raw_metadata: + if not line.startswith(" ") or "Description:" in line: + break + line = line.strip() + + if match := re.match(r"(\s*)Originator:(\s*)", line): + author = match.group(2).strip() + if "authors" in metadata: + metadata["authors"] += f"; {author}" + else: + metadata["authors"] = author + if match := re.match(r"(\s*)Publication_Date:(\s*)", line): + metadata["ref_year"] = match.group(2).strip() + if match := re.match(r"(\s*)Title:(\s*)", line): + title = match.group(2).strip() + metadata["name"] = title + metadata["ref_title"] = title + if match := re.match(r"(\s*)Publisher:(\s*)", line): + metadata["ref_source"] = match.group(2).strip() + if match := re.match(r"(\s*)Online_Linkage:(\s*)", line): + metadata["isbn_doi"] = match.group(2).strip() + + if metadata: + update_source(source.source_id, **metadata) + + +# -------------------------------------------------------------------------- +# Querying the database. + + +def get_db_session(expire_on_commit=False) -> Session: + # NOTE: By default, let ORM objects persist past commits, and let + # consumers manage concurrent updates. + db = get_database() + return Session(db.engine, expire_on_commit=expire_on_commit) + + +def get_object(bucket: str, key: str) -> Optional[Object]: + with get_db_session() as session: + obj = session.scalar( + select(Object).where( + and_( + Object.scheme == SchemeEnum.s3, + Object.host == config.S3_HOST, + Object.bucket == bucket, + Object.key == key, + Object.deleted_on == None, + ) + ) + ) + return obj + + +def create_object(**data) -> Object: + data = data.copy() + data["created_on"] = datetime.datetime.utcnow() + with get_db_session() as session: + new_obj = session.scalar(insert(Object).values(**data).returning(Object)) + session.commit() + return new_obj + + +def update_object(id_: int, **data) -> Object: + data = data.copy() + data["updated_on"] = datetime.datetime.utcnow() + with get_db_session() as session: + new_obj = session.scalar( + update(Object).values(**data).where(Object.id == id_).returning(Object) + ) + session.commit() + return new_obj + + +def get_ingest_process_by_object_group_id(id_: int) -> Optional[IngestProcess]: + with get_db_session() as session: + ingest_process = session.scalar( + select(IngestProcess).where(IngestProcess.object_group_id == id_), + ) + return ingest_process + + +def get_ingest_process_by_source_id(id_: int) -> Optional[IngestProcess]: + with get_db_session() as session: + ingest_process = session.scalar( + select(IngestProcess).where(IngestProcess.source_id == id_), + ) + return ingest_process + + +def create_ingest_process(**data) -> IngestProcess: + data = data.copy() + data["created_on"] = datetime.datetime.utcnow() + with get_db_session() as session: + if not ( + object_group := session.scalar(insert(ObjectGroup).returning(ObjectGroup)) + ): + raise IngestError("Failed to create a new object group") + new_ingest_process = session.scalar( + insert(IngestProcess) + .values(object_group_id=object_group.id, **data) + .returning(IngestProcess) + ) + session.commit() + return new_ingest_process + + +def update_ingest_process(id_: int, **data) -> IngestProcess: + with get_db_session() as session: + new_ingest_process = session.scalar( + update(IngestProcess) + .values(**data) + .where(IngestProcess.id == id_) + .returning(IngestProcess) + ) + session.commit() + return new_ingest_process + + +def create_ingest_process_tag(ingest_process_id: int, tag: str) -> IngestProcessTag: + with get_db_session() as session: + new_ingest_process_tag = session.scalar( + insert(IngestProcessTag) + .values(ingest_process_id=ingest_process_id, tag=tag) + .returning(IngestProcessTag) + ) + session.commit() + return new_ingest_process_tag + + +def get_source_by_id(id_: int) -> Optional[Sources]: + with get_db_session() as session: + source = session.scalar(select(Sources).where(Sources.source_id == id_)) + return source + + +def get_source_by_slug(slug: str) -> Optional[Sources]: + with get_db_session() as session: + source = session.scalar(select(Sources).where(Sources.slug == slug)) + return source + + +def create_source(**data) -> Sources: + data = truncate_source_metadata(data) + print(data) + with get_db_session() as session: + new_source = session.scalar( + insert(Sources).values(**data).returning(Sources), + ) + session.commit() + return new_source + + +def update_source(id_: int, **data) -> Sources: + data = truncate_source_metadata(data) + with get_db_session() as session: + new_source = session.scalar( + update(Sources) + .values(**data) + .where(Sources.source_id == id_) + .returning(Sources), + ) + session.commit() + return new_source + + +# -------------------------------------------------------------------------- +# Creating and ingesting a single slug (a.k.a. map). + + +def create_slug( + slug: Annotated[ + str, + Argument(help="The slug to use for this map"), + ], + *, + name: Annotated[ + Optional[str], + Option(help="The map's name"), + ] = None, + tag: Annotated[ + Optional[list[str]], + Option(help="A tag to apply to the map"), + ] = None, + ref_title: Annotated[ + Optional[str], + Option(help="The map's report's title"), + ] = None, + ref_authors: Annotated[ + Optional[str], + Option(help="The map's report's authors"), + ] = None, + ref_year: Annotated[ + Optional[str], + Option(help="The map's report's year"), + ] = None, + ref_source: Annotated[ + Optional[str], + Option(help="The map's report's source"), + ] = None, + ref_isbn_or_doi: Annotated[ + Optional[str], + Option(help="The map's report's ISBN or DOI"), + ] = None, + scale: Annotated[ + str, + Option(help="The map's scale"), + ] = "large", + website_url: Annotated[ + Optional[str], + Option(help="The URL for the map's canonical landing page"), + ] = None, + raster_url: Annotated[ + Optional[str], + Option(help="The URL for the map's raster file"), + ] = None, +) -> tuple[Sources, IngestProcess]: + """ + Ensure that a map exists in the database with the provided metadata. + """ + + ## Normalize identifiers. + + slug = normalize_slug(slug) + console.print(f"Normalized the provided slug to {slug}") + + ## Create the `sources` record. + + metadata = { + "slug": slug, + "primary_table": f"{slug}_polygons", + "scale": scale, + } + if name: + metadata["name"] = name + if website_url: + metadata["url"] = website_url + if ref_title: + metadata["ref_title"] = ref_title + if ref_authors: + metadata["authors"] = ref_authors + if ref_year: + metadata["ref_year"] = ref_year + if ref_source: + metadata["ref_source"] = ref_source + if ref_isbn_or_doi: + metadata["isbn_doi"] = ref_isbn_or_doi + if raster_url: + metadata["raster_url"] = raster_url + + if source := get_source_by_slug(slug): + console.print(f"Found existing source ID {source.source_id} for slug {slug}") + source = update_source(source.source_id, **metadata) + else: + source = create_source(**metadata) + console.print(f"Created or updated source ID {source.source_id}") + + ## Create the `ingest_process` record. + + if not (ingest_process := get_ingest_process_by_source_id(source.source_id)): + ingest_process = create_ingest_process(source_id=source.source_id) + for t in tag or []: + create_ingest_process_tag(ingest_process.id, t) + console.print(f"Created or updated ingest process ID {ingest_process.id}") + + return (source, ingest_process) + + +def ingest_slug( + map_info: MapInfo, + *, + filter: Annotated[ + Optional[str], + Option(help="How to interpret the contents of the map's objects"), + ] = None, + embed: Annotated[bool, Option(help="Embed a shell for debugging")] = False, +) -> Sources: + """ + Ingest a map from its already uploaded files. + """ + source = get_source_by_id(map_info.id) + ingest_process = get_ingest_process_by_source_id(map_info.id) + + if not source or not ingest_process: + raise IngestError(f"Internal data model error for map {map_info}") + + with get_db_session() as session: + objs = session.scalars( + select(Object).where( + and_( + Object.object_group_id == ingest_process.object_group_id, + Object.deleted_on == None, + ) + ) + ).all() + + for i, obj in enumerate(objs): + append_data = i != 0 + try: + load_object( + obj.bucket, obj.key, filter=filter, append_data=append_data, embed=embed + ) + except Exception as exn: + raise_ingest_error(ingest_process, str(exn), exn) + + ## Prepare points, lines, and polygons tables for human review. + + console.print(f"Preparing map {map_info}") + try: + prepare_fields(map_info) + ingest_process = update_ingest_process( + ingest_process.id, state=IngestState.prepared + ) + create_rgeom(map_info) + create_webgeom(map_info) + ingest_process = update_ingest_process( + ingest_process.id, state=IngestState.ingested + ) + except Exception as exn: + raise_ingest_error(ingest_process, str(exn), exn) + + return source + + +# -------------------------------------------------------------------------- +# Working with files and objects. + + +def upload_file( + slug: Annotated[ + str, + Argument(help="The slug to use for this map"), + ], + local_file: Annotated[ + pathlib.Path, + Argument(help="The local archive file to upload"), + ], + *, + compress: Annotated[ + bool, + Option(help="Whether to compress the file before uploading"), + ] = False, + s3_prefix: Annotated[ + str, + Option(help="The prefix to use for the file's S3 key"), + ] = "", + s3_bucket: Annotated[ + str, + Option(help="The S3 bucket to upload the file to"), + ] = default_s3_bucket, + name: Annotated[ + Optional[str], + Option(help="The map's name"), + ] = None, + tag: Annotated[ + Optional[list[str]], + Option(help="A tag to apply to the map"), + ] = None, + ref_title: Annotated[ + Optional[str], + Option(help="The map's report's title"), + ] = None, + ref_authors: Annotated[ + Optional[str], + Option(help="The map's report's authors"), + ] = None, + ref_year: Annotated[ + Optional[str], + Option(help="The map's report's year"), + ] = None, + ref_source: Annotated[ + Optional[str], + Option(help="The map's report's source"), + ] = None, + ref_isbn_or_doi: Annotated[ + Optional[str], + Option(help="The map's report's ISBN or DOI"), + ] = None, + scale: Annotated[ + str, + Option(help="The map's scale"), + ] = "large", + archive_url: Annotated[ + Optional[str], + Option(help="The URL for the archive file"), + ] = None, + website_url: Annotated[ + Optional[str], + Option(help="The URL for the map's canonical landing page"), + ] = None, + raster_url: Annotated[ + Optional[str], + Option(help="The URL for the map's raster file"), + ] = None, +) -> Object: + """ + Upload a local archive file for a map to the object store. + """ + + s3 = get_minio_client() + bucket = s3_bucket + assert bucket is not None + + if s3_prefix.endswith("/"): + s3_prefix = s3_prefix[:-1] + + ## Normalize identifiers. + + slug = normalize_slug(slug) + console.print(f"Normalized the provided slug to {slug}") + + out_name = local_file.name + + if local_file.is_dir() and not compress: + # Special handling for Geodatabases + if local_file.suffix.endswith(".gdb"): + compress = True + + if local_file.is_dir() and not compress: + raise IngestError("Cannot ingest a directory") + + if compress: + console.print(f"Compressing {local_file}") + out_name = f"{local_file.name}.tar.gz" + with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as tf: + with tarfile.open(tf.name, "w:gz") as tf: + tf.add(local_file, arcname=local_file.name) + local_file = pathlib.Path(tf.name) + + ## Create or update the `sources` and `ingest_process` records. + + (_, ingest_process) = create_slug( + slug, + name=name, + tag=tag, + ref_title=ref_title, + ref_authors=ref_authors, + ref_year=ref_year, + ref_source=ref_source, + ref_isbn_or_doi=ref_isbn_or_doi, + scale=scale, + website_url=website_url, + raster_url=raster_url, + ) + + console.print(f"Created record for map {slug}") + + ## Collect metadata for the archive file. + + mime_type = magic.Magic(mime=True).from_file(local_file) + hasher = hashlib.sha256() + with open(local_file, mode="rb") as fp: + while data := fp.read(config.CHUNK_SIZE): + hasher.update(data) + sha256_hash = hasher.hexdigest() + console.print(f"Detected {mime_type} with SHA-256 {sha256_hash}") + + ## Upload the file. + + bucket = s3_bucket + key = f"{s3_prefix}/{slug}/{out_name}" + + obj = get_object(bucket, key) + + if not obj or sha256_hash != obj.sha256_hash: + console.print(f"Uploading {out_name} to S3 as {bucket}/{key}") + s3.fput_object(bucket, key, str(local_file)) + ingest_process = update_ingest_process( + ingest_process.id, state=IngestState.pending + ) + console.print("Finished upload") + else: + console.print("Object with the same SHA-256 already present in S3") + + ## Create or update the object's DB entry. + + source_info = {} + if archive_url: + source_info["archive_url"] = archive_url + if raster_url: + source_info["raster_url"] = raster_url + if website_url: + source_info["website_url"] = website_url + + payload = { + "object_group_id": ingest_process.object_group_id, + "scheme": SchemeEnum.s3, + "host": config.S3_HOST, + "bucket": bucket, + "key": key, + "source": source_info, + "mime_type": mime_type, + "sha256_hash": sha256_hash, + } + + if obj: + obj = update_object(obj.id, **payload) + else: + obj = create_object(**payload) + console.print(f"Created or updated object ID {obj.id}") + + return obj + + +def load_object( + bucket: Annotated[ + str, + Argument(help="The object's bucket"), + ], + key: Annotated[ + str, + Argument(help="The object's key"), + ], + *, + filter: Annotated[ + Optional[str], + Option(help="How to interpret the contents of the object"), + ] = None, + append_data: Annotated[ + bool, + Option( + help="Whether to append data to the associated map when it already exists" + ), + ] = False, + embed: Annotated[bool, Option(help="Embed a shell for debugging")] = False, +) -> Object: + """ + Ingest an object in S3 containing a map into Macrostrat. + + Assumes that database records for the `sources` and `ingest_process` + tables have already been created. + """ + if not (obj := get_object(bucket, key)): + raise IngestError(f"No such object in the database: {bucket}/{key}") + if not ( + ingest_process := get_ingest_process_by_object_group_id(obj.object_group_id) + ): + raise IngestError(f"No ingest process in the database for object ID {obj.id}") + if not (source := get_source_by_id(ingest_process.source_id)): + raise_ingest_error( + ingest_process, + "No source ID in the database for ingest process ID {ingest_process.id}", + ) + + ## Normalize the filter. + + if filter: + filter = filter.lower() + + ## Download the object to a local, temporary file. + + s3 = get_minio_client() + + obj_basename = key.split("/")[-1] + fd, local_filename = tempfile.mkstemp(suffix=f"-{obj_basename}") + os.close(fd) + local_file = pathlib.Path(local_filename) + + console.print(f"Downloading archive into {local_file}") + s3.fget_object(bucket, key, str(local_file)) + console.print("Finished downloading archive") + + ## Process anything that might have points, lines, or polygons. + + try: + with ingestion_context(local_file, ignore_cleanup_errors=True) as tmp_dir: + + ## Locate files of interest. + + gis_files = ( + list(tmp_dir.glob("**/*.gdb")) + + list(tmp_dir.glob("**/*.geojson")) + + list(tmp_dir.glob("**/*.gpkg")) + + list(tmp_dir.glob("**/*.shp")) + ) + gis_data = [] + excluded_data = [] + + for gis_file in gis_files: + if filter == "polymer": + if ( + gis_file.name.startswith("polymer") + and "_bbox" not in gis_file.name + and "_legend" not in gis_file.name + ): + gis_data.append(gis_file) + else: + excluded_data.append(gis_file) + elif filter == "ta1": + if "_bbox" not in gis_file.name and "_legend" not in gis_file.name: + gis_data.append(gis_file) + else: + excluded_data.append(gis_file) + else: + gis_data.append(gis_file) + + if not gis_data: + raise_ingest_error(ingest_process, "Failed to locate GIS data") + + ## Process the GIS files. + + console.print(f"Loading into {source.slug}") + console.print(f"Loading {strify_list(gis_data)}") + if excluded_data: + console.print( + f"Skipping over / not loading {strify_list(excluded_data)}" + ) + console.print(f"Appending data? {append_data}") + try: + ingest_map( + source.slug, + gis_data, + if_exists="append" if append_data else "replace", + embed=embed, + ) + except Exception as exn: + raise_ingest_error(ingest_process, str(exn), exn) + + ## Process any other data of interest. + + try: + if filter == "alaska": + update_alaska_metadata(source, tmp_dir) + except Exception as exn: + raise_ingest_error(ingest_process, str(exn), exn) + except Exception as exn: + raise_ingest_error(ingest_process, str(exn), exn) + finally: + local_file.unlink() + + return obj + + +@contextmanager +def ingestion_context(local_file, *, ignore_cleanup_errors=False) -> list[pathlib.Path]: + """Copy or extract a local file into a temporary directory for ingestion.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=ignore_cleanup_errors) as td: + tmp_dir = pathlib.Path(td) + + if is_archive(local_file): + console.print(f"Extracting archive into {tmp_dir}") + extract_archive(local_file, tmp_dir) + else: + shutil.copy(local_file, tmp_dir) + + yield tmp_dir + + +# -------------------------------------------------------------------------- +# Creating and ingesting multiple slugs (a.k.a. maps). + + +def ingest_csv( + csv_file: Annotated[ + pathlib.Path, + Argument(help="CSV file containing arguments for upload-file"), + ], + download_dir: Annotated[ + pathlib.Path, + Option(help="Directory into which to download the maps' archive files"), + ], + *, + s3_bucket: Annotated[ + str, + Option(help="The S3 bucket to upload the files to"), + ] = default_s3_bucket, + s3_prefix: Annotated[ + str, + Option(help="The prefix, sans trailing slash, to use for the files' S3 keys"), + ] = None, + tag: Annotated[ + Optional[list[str]], + Option(help="A tag to apply to the maps"), + ] = None, + filter: Annotated[ + Optional[str], + Option(help="How to interpret the contents of the maps' files"), + ] = None, +) -> None: + """ + Ingest multiple maps from their descriptions in a CSV file. + + This command enables the bulk ingest of maps by specifying values for + arguments and options to the upload-file command, with each row in the + CSV file corresponding to one file. Once all files have been uploaded, + each resulting map will be processed with ingest-map. + + The first row of the CSV file should be a header listing the names of + arguments and options to the upload-file subcommand, with hyphens being + replaced by underscores. + + Instead of the "local_file" argument, there must be a column for + "archive_url", which is where to download the map's archive file from. + + There must also be a column for "slug". + """ + slugs_seen = [] + + with open(csv_file, mode="r", encoding="utf-8", newline="") as input_fp: + reader = csv.DictReader(input_fp) + + for row in reader: + url = row["archive_url"] + filename = url.split("/")[-1] + + download_dir_for_slug = download_dir / row["slug"] + download_dir_for_slug.mkdir(parents=True, exist_ok=True) + + partial_local_file = download_dir_for_slug / (filename + ".partial") + local_file = download_dir_for_slug / filename + + if not local_file.exists(): + console.print(f"Downloading {url}") + response = requests.get(url, stream=True, timeout=config.TIMEOUT) + + if not response.ok: + console.print(f"Failed to download {url}") + continue + + with open(partial_local_file, mode="wb") as local_fp: + for chunk in response.iter_content(chunk_size=config.CHUNK_SIZE): + local_fp.write(chunk) + partial_local_file.rename(local_file) + + kwargs = {} + for f in set(FIELDS) - {"slug"}: + if row.get(f): + kwargs[f] = row[f] + if tag: + kwargs["tag"] = tag + + upload_file( + row["slug"], + local_file, + s3_bucket=s3_bucket, + s3_prefix=s3_prefix, + **kwargs, # type: ignore[arg-type] + ) + slugs_seen.append(row["slug"]) + + ## Ingest only those maps with successful uploads. + db = get_database() + + for slug in set(slugs_seen): + try: + ingest_slug(get_map_info(db, slug), filter=filter) + except Exception as exn: + console.print(f"Exception while attempting to ingest a CSV file: {exn}") + + +def run_polling_loop( + polling_interval: Annotated[ + int, + Argument(help="How often to poll, in seconds"), + ] = 60, +) -> None: + """ + Poll for and process pending maps. + """ + while True: + console.print("Starting iteration of polling loop") + bad_pending = 0 + + db = get_database() + + with get_db_session() as session: + for ingest_process in session.scalars( + select(IngestProcess).where(IngestProcess.state == IngestState.pending) + ).unique(): + if ingest_process.source_id: + map_info = get_map_info(db, ingest_process.source_id) + console.print(f"Processing {map_info}") + try: + ingest_slug(map_info) + except Exception as exn: + record_ingest_error(ingest_process, str(exn)) + else: + bad_pending += 1 + + if bad_pending: + console.print( + f"Skipped {bad_pending} ingests because of a missing source_id" + ) + console.print("Finished iteration of polling loop") + time.sleep(polling_interval) \ No newline at end of file From 8eeb4b5f466e415a6f3abfe94fa52803f7ace3d8 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 6 Aug 2025 14:55:26 -0400 Subject: [PATCH 18/23] basic script --- scripts/upload-photo/Makefile | 2 + scripts/upload-photo/upload.py | 1045 ++------------------------------ 2 files changed, 66 insertions(+), 981 deletions(-) create mode 100644 scripts/upload-photo/Makefile diff --git a/scripts/upload-photo/Makefile b/scripts/upload-photo/Makefile new file mode 100644 index 000000000..290d0a4bf --- /dev/null +++ b/scripts/upload-photo/Makefile @@ -0,0 +1,2 @@ +run: + python3 upload.py \ No newline at end of file diff --git a/scripts/upload-photo/upload.py b/scripts/upload-photo/upload.py index f11549d71..ace1df8a0 100644 --- a/scripts/upload-photo/upload.py +++ b/scripts/upload-photo/upload.py @@ -1,992 +1,75 @@ -# pylint: disable=imports,too-many-arguments,too-many-branches,too-many-locals -""" -Ingest map data from archive files. - -A.k.a., a pipeline for ingesting maps into Macrostrat. -""" - -import csv -import datetime -import hashlib -import importlib +#!/usr/bin/env python3 import os -import pathlib -import re -import shutil -import tarfile -import tempfile -import time -import zipfile -from contextlib import contextmanager -from typing import Annotated, Any, NoReturn, Optional - -import requests # type: ignore[import-untyped] -from rich.console import Console -from sqlalchemy import and_, insert, select, update -from sqlalchemy.orm import Session -from typer import Argument, Option - -from macrostrat.core.database import get_database -from macrostrat.core.schemas import ( # type: ignore[import-untyped] - IngestProcess, - IngestProcessTag, - IngestState, - Object, - ObjectGroup, - SchemeEnum, - Sources, -) -from macrostrat.map_integration import config -from macrostrat.map_integration.commands.ingest import ingest_map -from macrostrat.map_integration.commands.prepare_fields import prepare_fields -from macrostrat.map_integration.errors import IngestError -from macrostrat.map_integration.process.geometry import create_rgeom, create_webgeom -from macrostrat.map_integration.utils.map_info import MapInfo, get_map_info - -from .config import get_minio_client - -# Do this with importlib so we control the order -for mod in ["pylibmagic", "magic"]: - importlib.import_module("magic") - -# The list of arguments to upload_file that ingest_csv will look -# for in the CSV file given to it. -FIELDS = [ - "slug", - "name", - # "tag", # use the CLI, which supports applying multiple tags - "ref_title", - "ref_authors", - "ref_year", - "ref_source", - "ref_isbn_or_doi", - "scale", - "archive_url", - "website_url", - "raster_url", -] - -# The current terminal, with support for displaying rich text. -console = Console() - - -# -------------------------------------------------------------------------- -# Assorted helper functions. - -default_s3_bucket = config.S3_BUCKET - - -def normalize_slug(slug: str) -> str: - """ - Replace characters that are invalid in an sql table name with an underscore. - """ - return re.sub(r"\W", "_", slug).lower() - - -def strify_list(xs: list[Any]) -> list[str]: - """ - Convert the provided list to a list of strings. - """ - return [str(x) for x in xs] - - -def truncate_str(data: str, *, limit: int = 255) -> str: - """ - Replace the end of a string with "..." if its length exceeds some limit. - """ - if len(data) > limit: - data = data[: limit - 3] + "..." - return data[:limit] - - -def truncate_source_metadata(data: dict[str, Any]) -> dict[str, Any]: - """ - Ensure that metadata fields for a `maps.sources` record are not too long. - """ - data = data.copy() - for col in ["name", "url", "authors", "ref_source"]: - if col in data: - data[col] = truncate_str(data[col], limit=255) - for col in ["isbn_doi", "licence"]: - if col in data: - data[col] = truncate_str(data[col], limit=100) - return data - - -def raise_ingest_error( - ingest_process: IngestProcess, comments: str, source_exn: Optional[Exception] = None -) -> NoReturn: - """ - Set an ingest process to "failed", and then raise an Exception. - """ - record_ingest_error(ingest_process, comments) - raise IngestError(comments) from source_exn - - -def record_ingest_error(ingest_process: IngestProcess, comments: str) -> None: - """ - Set an ingest process to "failed". - """ - update_ingest_process( - ingest_process.id, state=IngestState.failed, comments=comments - ) - - -# -------------------------------------------------------------------------- -# Extracting and analyzing archive files. - - -def is_archive(file: pathlib.Path) -> bool: - """ - Return whether a file appears to be an archive, based on its name. - """ - return file.name.endswith((".tgz", ".tar.gz", ".zip")) - - -def extract_archive( - archive_file: pathlib.Path, - target_dir: pathlib.Path, - *, - ingest_process: Optional[IngestProcess] = None, - extract_subarchives: bool = True, -) -> None: - """ - Extract an archive file into a directory. - - By default, any extracted files that are themselves archives will be - expanded into the same directory. This might not result in the expected - layout for some archives. - - If provided, the ingest process will be used to report any errors. - """ - if archive_file.name.endswith((".tgz", ".tar.gz")): - with tarfile.open(archive_file) as tf: - tf.extractall(path=target_dir, filter="data") - elif archive_file.name.endswith(".zip"): - with zipfile.ZipFile(archive_file) as zf: - zf.extractall(path=target_dir) - elif ingest_process: - raise_ingest_error(ingest_process, "Unrecognized archive file format") - else: - raise IngestError("Unrecognized archive file format") - - if extract_subarchives: - sub_archives = set(target_dir.glob("**/*.tgz")) - sub_archives |= set(target_dir.glob("**/*.tar.gz")) - sub_archives |= set(target_dir.glob("**/*.zip")) - - for sub_archive in sub_archives - {archive_file}: - extract_archive( - sub_archive, - target_dir, - ingest_process=ingest_process, - extract_subarchives=False, - ) - - -def update_alaska_metadata(source: Sources, data_dir: pathlib.Path) -> None: - """ - Set metadata for an archive from the Alaska Division of Geological & Geophysical Surveys. - """ - metadata: dict[str, str] = {} - metadata_files = list(data_dir.glob("metadata/*.txt")) - - ## NOTE: The metadata file looks like it could be parsed as YAML, - ## but alas, it is not YAML. Some would-be hashes define a key multiple - ## times, and some values confuse PyYAML's parser. - - if len(metadata_files) != 1: - return - with open(metadata_files[0], encoding="utf-8") as fp: - raw_metadata = fp.readlines() - - ## Skip the first line ("Identification_Information:"). - - raw_metadata.pop(0) - - ## Scan for interesting lines until we reach the next section. - - for line in raw_metadata: - if not line.startswith(" ") or "Description:" in line: - break - line = line.strip() - - if match := re.match(r"(\s*)Originator:(\s*)", line): - author = match.group(2).strip() - if "authors" in metadata: - metadata["authors"] += f"; {author}" - else: - metadata["authors"] = author - if match := re.match(r"(\s*)Publication_Date:(\s*)", line): - metadata["ref_year"] = match.group(2).strip() - if match := re.match(r"(\s*)Title:(\s*)", line): - title = match.group(2).strip() - metadata["name"] = title - metadata["ref_title"] = title - if match := re.match(r"(\s*)Publisher:(\s*)", line): - metadata["ref_source"] = match.group(2).strip() - if match := re.match(r"(\s*)Online_Linkage:(\s*)", line): - metadata["isbn_doi"] = match.group(2).strip() - - if metadata: - update_source(source.source_id, **metadata) - - -# -------------------------------------------------------------------------- -# Querying the database. - - -def get_db_session(expire_on_commit=False) -> Session: - # NOTE: By default, let ORM objects persist past commits, and let - # consumers manage concurrent updates. - db = get_database() - return Session(db.engine, expire_on_commit=expire_on_commit) - - -def get_object(bucket: str, key: str) -> Optional[Object]: - with get_db_session() as session: - obj = session.scalar( - select(Object).where( - and_( - Object.scheme == SchemeEnum.s3, - Object.host == config.S3_HOST, - Object.bucket == bucket, - Object.key == key, - Object.deleted_on == None, - ) - ) - ) - return obj - - -def create_object(**data) -> Object: - data = data.copy() - data["created_on"] = datetime.datetime.utcnow() - with get_db_session() as session: - new_obj = session.scalar(insert(Object).values(**data).returning(Object)) - session.commit() - return new_obj - - -def update_object(id_: int, **data) -> Object: - data = data.copy() - data["updated_on"] = datetime.datetime.utcnow() - with get_db_session() as session: - new_obj = session.scalar( - update(Object).values(**data).where(Object.id == id_).returning(Object) - ) - session.commit() - return new_obj - - -def get_ingest_process_by_object_group_id(id_: int) -> Optional[IngestProcess]: - with get_db_session() as session: - ingest_process = session.scalar( - select(IngestProcess).where(IngestProcess.object_group_id == id_), - ) - return ingest_process - - -def get_ingest_process_by_source_id(id_: int) -> Optional[IngestProcess]: - with get_db_session() as session: - ingest_process = session.scalar( - select(IngestProcess).where(IngestProcess.source_id == id_), - ) - return ingest_process - - -def create_ingest_process(**data) -> IngestProcess: - data = data.copy() - data["created_on"] = datetime.datetime.utcnow() - with get_db_session() as session: - if not ( - object_group := session.scalar(insert(ObjectGroup).returning(ObjectGroup)) - ): - raise IngestError("Failed to create a new object group") - new_ingest_process = session.scalar( - insert(IngestProcess) - .values(object_group_id=object_group.id, **data) - .returning(IngestProcess) - ) - session.commit() - return new_ingest_process - - -def update_ingest_process(id_: int, **data) -> IngestProcess: - with get_db_session() as session: - new_ingest_process = session.scalar( - update(IngestProcess) - .values(**data) - .where(IngestProcess.id == id_) - .returning(IngestProcess) - ) - session.commit() - return new_ingest_process - - -def create_ingest_process_tag(ingest_process_id: int, tag: str) -> IngestProcessTag: - with get_db_session() as session: - new_ingest_process_tag = session.scalar( - insert(IngestProcessTag) - .values(ingest_process_id=ingest_process_id, tag=tag) - .returning(IngestProcessTag) - ) - session.commit() - return new_ingest_process_tag - - -def get_source_by_id(id_: int) -> Optional[Sources]: - with get_db_session() as session: - source = session.scalar(select(Sources).where(Sources.source_id == id_)) - return source - - -def get_source_by_slug(slug: str) -> Optional[Sources]: - with get_db_session() as session: - source = session.scalar(select(Sources).where(Sources.slug == slug)) - return source - - -def create_source(**data) -> Sources: - data = truncate_source_metadata(data) - print(data) - with get_db_session() as session: - new_source = session.scalar( - insert(Sources).values(**data).returning(Sources), - ) - session.commit() - return new_source - - -def update_source(id_: int, **data) -> Sources: - data = truncate_source_metadata(data) - with get_db_session() as session: - new_source = session.scalar( - update(Sources) - .values(**data) - .where(Sources.source_id == id_) - .returning(Sources), - ) - session.commit() - return new_source - - -# -------------------------------------------------------------------------- -# Creating and ingesting a single slug (a.k.a. map). - - -def create_slug( - slug: Annotated[ - str, - Argument(help="The slug to use for this map"), - ], - *, - name: Annotated[ - Optional[str], - Option(help="The map's name"), - ] = None, - tag: Annotated[ - Optional[list[str]], - Option(help="A tag to apply to the map"), - ] = None, - ref_title: Annotated[ - Optional[str], - Option(help="The map's report's title"), - ] = None, - ref_authors: Annotated[ - Optional[str], - Option(help="The map's report's authors"), - ] = None, - ref_year: Annotated[ - Optional[str], - Option(help="The map's report's year"), - ] = None, - ref_source: Annotated[ - Optional[str], - Option(help="The map's report's source"), - ] = None, - ref_isbn_or_doi: Annotated[ - Optional[str], - Option(help="The map's report's ISBN or DOI"), - ] = None, - scale: Annotated[ - str, - Option(help="The map's scale"), - ] = "large", - website_url: Annotated[ - Optional[str], - Option(help="The URL for the map's canonical landing page"), - ] = None, - raster_url: Annotated[ - Optional[str], - Option(help="The URL for the map's raster file"), - ] = None, -) -> tuple[Sources, IngestProcess]: - """ - Ensure that a map exists in the database with the provided metadata. - """ - - ## Normalize identifiers. - - slug = normalize_slug(slug) - console.print(f"Normalized the provided slug to {slug}") - - ## Create the `sources` record. - - metadata = { - "slug": slug, - "primary_table": f"{slug}_polygons", - "scale": scale, - } - if name: - metadata["name"] = name - if website_url: - metadata["url"] = website_url - if ref_title: - metadata["ref_title"] = ref_title - if ref_authors: - metadata["authors"] = ref_authors - if ref_year: - metadata["ref_year"] = ref_year - if ref_source: - metadata["ref_source"] = ref_source - if ref_isbn_or_doi: - metadata["isbn_doi"] = ref_isbn_or_doi - if raster_url: - metadata["raster_url"] = raster_url - - if source := get_source_by_slug(slug): - console.print(f"Found existing source ID {source.source_id} for slug {slug}") - source = update_source(source.source_id, **metadata) - else: - source = create_source(**metadata) - console.print(f"Created or updated source ID {source.source_id}") - - ## Create the `ingest_process` record. - - if not (ingest_process := get_ingest_process_by_source_id(source.source_id)): - ingest_process = create_ingest_process(source_id=source.source_id) - for t in tag or []: - create_ingest_process_tag(ingest_process.id, t) - console.print(f"Created or updated ingest process ID {ingest_process.id}") - - return (source, ingest_process) - - -def ingest_slug( - map_info: MapInfo, - *, - filter: Annotated[ - Optional[str], - Option(help="How to interpret the contents of the map's objects"), - ] = None, - embed: Annotated[bool, Option(help="Embed a shell for debugging")] = False, -) -> Sources: - """ - Ingest a map from its already uploaded files. - """ - source = get_source_by_id(map_info.id) - ingest_process = get_ingest_process_by_source_id(map_info.id) - - if not source or not ingest_process: - raise IngestError(f"Internal data model error for map {map_info}") - - with get_db_session() as session: - objs = session.scalars( - select(Object).where( - and_( - Object.object_group_id == ingest_process.object_group_id, - Object.deleted_on == None, - ) - ) - ).all() - - for i, obj in enumerate(objs): - append_data = i != 0 - try: - load_object( - obj.bucket, obj.key, filter=filter, append_data=append_data, embed=embed - ) - except Exception as exn: - raise_ingest_error(ingest_process, str(exn), exn) - - ## Prepare points, lines, and polygons tables for human review. - - console.print(f"Preparing map {map_info}") - try: - prepare_fields(map_info) - ingest_process = update_ingest_process( - ingest_process.id, state=IngestState.prepared - ) - create_rgeom(map_info) - create_webgeom(map_info) - ingest_process = update_ingest_process( - ingest_process.id, state=IngestState.ingested - ) - except Exception as exn: - raise_ingest_error(ingest_process, str(exn), exn) - - return source - - -# -------------------------------------------------------------------------- -# Working with files and objects. - - -def upload_file( - slug: Annotated[ - str, - Argument(help="The slug to use for this map"), - ], - local_file: Annotated[ - pathlib.Path, - Argument(help="The local archive file to upload"), - ], - *, - compress: Annotated[ - bool, - Option(help="Whether to compress the file before uploading"), - ] = False, - s3_prefix: Annotated[ - str, - Option(help="The prefix to use for the file's S3 key"), - ] = "", - s3_bucket: Annotated[ - str, - Option(help="The S3 bucket to upload the file to"), - ] = default_s3_bucket, - name: Annotated[ - Optional[str], - Option(help="The map's name"), - ] = None, - tag: Annotated[ - Optional[list[str]], - Option(help="A tag to apply to the map"), - ] = None, - ref_title: Annotated[ - Optional[str], - Option(help="The map's report's title"), - ] = None, - ref_authors: Annotated[ - Optional[str], - Option(help="The map's report's authors"), - ] = None, - ref_year: Annotated[ - Optional[str], - Option(help="The map's report's year"), - ] = None, - ref_source: Annotated[ - Optional[str], - Option(help="The map's report's source"), - ] = None, - ref_isbn_or_doi: Annotated[ - Optional[str], - Option(help="The map's report's ISBN or DOI"), - ] = None, - scale: Annotated[ - str, - Option(help="The map's scale"), - ] = "large", - archive_url: Annotated[ - Optional[str], - Option(help="The URL for the archive file"), - ] = None, - website_url: Annotated[ - Optional[str], - Option(help="The URL for the map's canonical landing page"), - ] = None, - raster_url: Annotated[ - Optional[str], - Option(help="The URL for the map's raster file"), - ] = None, -) -> Object: - """ - Upload a local archive file for a map to the object store. - """ - - s3 = get_minio_client() - bucket = s3_bucket - assert bucket is not None - - if s3_prefix.endswith("/"): - s3_prefix = s3_prefix[:-1] - - ## Normalize identifiers. - - slug = normalize_slug(slug) - console.print(f"Normalized the provided slug to {slug}") - - out_name = local_file.name - - if local_file.is_dir() and not compress: - # Special handling for Geodatabases - if local_file.suffix.endswith(".gdb"): - compress = True - - if local_file.is_dir() and not compress: - raise IngestError("Cannot ingest a directory") +import sys +import hashlib +import magic +from minio import Minio +from minio.error import S3Error - if compress: - console.print(f"Compressing {local_file}") - out_name = f"{local_file.name}.tar.gz" - with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as tf: - with tarfile.open(tf.name, "w:gz") as tf: - tf.add(local_file, arcname=local_file.name) - local_file = pathlib.Path(tf.name) +def main(): + # Load environment variables + endpoint = os.getenv("S3_ENDPOINT") + bucket = os.getenv("S3_BUCKET") + s3_path = os.getenv("S3_PATH") + access_key = os.getenv("S3_ACCESS_KEY") + secret_key = os.getenv("S3_SECRET_KEY") - ## Create or update the `sources` and `ingest_process` records. + - (_, ingest_process) = create_slug( - slug, - name=name, - tag=tag, - ref_title=ref_title, - ref_authors=ref_authors, - ref_year=ref_year, - ref_source=ref_source, - ref_isbn_or_doi=ref_isbn_or_doi, - scale=scale, - website_url=website_url, - raster_url=raster_url, - ) + if not all([endpoint, bucket, access_key, secret_key]): + print("Error: Please set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY, and S3_SECRET_KEY environment variables.") + sys.exit(1) - console.print(f"Created record for map {slug}") + photo_filename = "david.jpg" + local_path = os.path.join(os.getcwd(), photo_filename) - ## Collect metadata for the archive file. + if not os.path.isfile(local_path): + print(f"Error: File '{photo_filename}' not found in current directory.") + sys.exit(1) - mime_type = magic.Magic(mime=True).from_file(local_file) + # Calculate SHA256 hash hasher = hashlib.sha256() - with open(local_file, mode="rb") as fp: - while data := fp.read(config.CHUNK_SIZE): - hasher.update(data) + with open(local_path, "rb") as f: + while chunk := f.read(8192): + hasher.update(chunk) sha256_hash = hasher.hexdigest() - console.print(f"Detected {mime_type} with SHA-256 {sha256_hash}") - - ## Upload the file. - - bucket = s3_bucket - key = f"{s3_prefix}/{slug}/{out_name}" - - obj = get_object(bucket, key) - - if not obj or sha256_hash != obj.sha256_hash: - console.print(f"Uploading {out_name} to S3 as {bucket}/{key}") - s3.fput_object(bucket, key, str(local_file)) - ingest_process = update_ingest_process( - ingest_process.id, state=IngestState.pending - ) - console.print("Finished upload") - else: - console.print("Object with the same SHA-256 already present in S3") - - ## Create or update the object's DB entry. - - source_info = {} - if archive_url: - source_info["archive_url"] = archive_url - if raster_url: - source_info["raster_url"] = raster_url - if website_url: - source_info["website_url"] = website_url - - payload = { - "object_group_id": ingest_process.object_group_id, - "scheme": SchemeEnum.s3, - "host": config.S3_HOST, - "bucket": bucket, - "key": key, - "source": source_info, - "mime_type": mime_type, - "sha256_hash": sha256_hash, - } - - if obj: - obj = update_object(obj.id, **payload) - else: - obj = create_object(**payload) - console.print(f"Created or updated object ID {obj.id}") - - return obj - - -def load_object( - bucket: Annotated[ - str, - Argument(help="The object's bucket"), - ], - key: Annotated[ - str, - Argument(help="The object's key"), - ], - *, - filter: Annotated[ - Optional[str], - Option(help="How to interpret the contents of the object"), - ] = None, - append_data: Annotated[ - bool, - Option( - help="Whether to append data to the associated map when it already exists" - ), - ] = False, - embed: Annotated[bool, Option(help="Embed a shell for debugging")] = False, -) -> Object: - """ - Ingest an object in S3 containing a map into Macrostrat. - - Assumes that database records for the `sources` and `ingest_process` - tables have already been created. - """ - if not (obj := get_object(bucket, key)): - raise IngestError(f"No such object in the database: {bucket}/{key}") - if not ( - ingest_process := get_ingest_process_by_object_group_id(obj.object_group_id) - ): - raise IngestError(f"No ingest process in the database for object ID {obj.id}") - if not (source := get_source_by_id(ingest_process.source_id)): - raise_ingest_error( - ingest_process, - "No source ID in the database for ingest process ID {ingest_process.id}", - ) - - ## Normalize the filter. - - if filter: - filter = filter.lower() - - ## Download the object to a local, temporary file. - - s3 = get_minio_client() - - obj_basename = key.split("/")[-1] - fd, local_filename = tempfile.mkstemp(suffix=f"-{obj_basename}") - os.close(fd) - local_file = pathlib.Path(local_filename) - - console.print(f"Downloading archive into {local_file}") - s3.fget_object(bucket, key, str(local_file)) - console.print("Finished downloading archive") - - ## Process anything that might have points, lines, or polygons. + print(f"SHA-256: {sha256_hash}") + + # Detect MIME type + mime = magic.Magic(mime=True) + mime_type = mime.from_file(local_path) + print(f"MIME type: {mime_type}") + + # Prepare object key (S3 path) + # Remove trailing slash if any + s3_path = s3_path.rstrip("/") + object_key = f"{s3_path}/{photo_filename}" if s3_path else photo_filename + + # Connect to MinIO/S3 + client = Minio( + endpoint, + access_key=access_key, + secret_key=secret_key, + secure=endpoint.startswith("https"), + ) + # Ensure bucket exists (optional) try: - with ingestion_context(local_file, ignore_cleanup_errors=True) as tmp_dir: - - ## Locate files of interest. - - gis_files = ( - list(tmp_dir.glob("**/*.gdb")) - + list(tmp_dir.glob("**/*.geojson")) - + list(tmp_dir.glob("**/*.gpkg")) - + list(tmp_dir.glob("**/*.shp")) - ) - gis_data = [] - excluded_data = [] - - for gis_file in gis_files: - if filter == "polymer": - if ( - gis_file.name.startswith("polymer") - and "_bbox" not in gis_file.name - and "_legend" not in gis_file.name - ): - gis_data.append(gis_file) - else: - excluded_data.append(gis_file) - elif filter == "ta1": - if "_bbox" not in gis_file.name and "_legend" not in gis_file.name: - gis_data.append(gis_file) - else: - excluded_data.append(gis_file) - else: - gis_data.append(gis_file) - - if not gis_data: - raise_ingest_error(ingest_process, "Failed to locate GIS data") - - ## Process the GIS files. - - console.print(f"Loading into {source.slug}") - console.print(f"Loading {strify_list(gis_data)}") - if excluded_data: - console.print( - f"Skipping over / not loading {strify_list(excluded_data)}" - ) - console.print(f"Appending data? {append_data}") - try: - ingest_map( - source.slug, - gis_data, - if_exists="append" if append_data else "replace", - embed=embed, - ) - except Exception as exn: - raise_ingest_error(ingest_process, str(exn), exn) - - ## Process any other data of interest. - - try: - if filter == "alaska": - update_alaska_metadata(source, tmp_dir) - except Exception as exn: - raise_ingest_error(ingest_process, str(exn), exn) - except Exception as exn: - raise_ingest_error(ingest_process, str(exn), exn) - finally: - local_file.unlink() - - return obj - - -@contextmanager -def ingestion_context(local_file, *, ignore_cleanup_errors=False) -> list[pathlib.Path]: - """Copy or extract a local file into a temporary directory for ingestion.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=ignore_cleanup_errors) as td: - tmp_dir = pathlib.Path(td) - - if is_archive(local_file): - console.print(f"Extracting archive into {tmp_dir}") - extract_archive(local_file, tmp_dir) - else: - shutil.copy(local_file, tmp_dir) - - yield tmp_dir - - -# -------------------------------------------------------------------------- -# Creating and ingesting multiple slugs (a.k.a. maps). - - -def ingest_csv( - csv_file: Annotated[ - pathlib.Path, - Argument(help="CSV file containing arguments for upload-file"), - ], - download_dir: Annotated[ - pathlib.Path, - Option(help="Directory into which to download the maps' archive files"), - ], - *, - s3_bucket: Annotated[ - str, - Option(help="The S3 bucket to upload the files to"), - ] = default_s3_bucket, - s3_prefix: Annotated[ - str, - Option(help="The prefix, sans trailing slash, to use for the files' S3 keys"), - ] = None, - tag: Annotated[ - Optional[list[str]], - Option(help="A tag to apply to the maps"), - ] = None, - filter: Annotated[ - Optional[str], - Option(help="How to interpret the contents of the maps' files"), - ] = None, -) -> None: - """ - Ingest multiple maps from their descriptions in a CSV file. - - This command enables the bulk ingest of maps by specifying values for - arguments and options to the upload-file command, with each row in the - CSV file corresponding to one file. Once all files have been uploaded, - each resulting map will be processed with ingest-map. - - The first row of the CSV file should be a header listing the names of - arguments and options to the upload-file subcommand, with hyphens being - replaced by underscores. - - Instead of the "local_file" argument, there must be a column for - "archive_url", which is where to download the map's archive file from. - - There must also be a column for "slug". - """ - slugs_seen = [] - - with open(csv_file, mode="r", encoding="utf-8", newline="") as input_fp: - reader = csv.DictReader(input_fp) - - for row in reader: - url = row["archive_url"] - filename = url.split("/")[-1] - - download_dir_for_slug = download_dir / row["slug"] - download_dir_for_slug.mkdir(parents=True, exist_ok=True) - - partial_local_file = download_dir_for_slug / (filename + ".partial") - local_file = download_dir_for_slug / filename - - if not local_file.exists(): - console.print(f"Downloading {url}") - response = requests.get(url, stream=True, timeout=config.TIMEOUT) - - if not response.ok: - console.print(f"Failed to download {url}") - continue - - with open(partial_local_file, mode="wb") as local_fp: - for chunk in response.iter_content(chunk_size=config.CHUNK_SIZE): - local_fp.write(chunk) - partial_local_file.rename(local_file) - - kwargs = {} - for f in set(FIELDS) - {"slug"}: - if row.get(f): - kwargs[f] = row[f] - if tag: - kwargs["tag"] = tag - - upload_file( - row["slug"], - local_file, - s3_bucket=s3_bucket, - s3_prefix=s3_prefix, - **kwargs, # type: ignore[arg-type] - ) - slugs_seen.append(row["slug"]) - - ## Ingest only those maps with successful uploads. - db = get_database() - - for slug in set(slugs_seen): - try: - ingest_slug(get_map_info(db, slug), filter=filter) - except Exception as exn: - console.print(f"Exception while attempting to ingest a CSV file: {exn}") - - -def run_polling_loop( - polling_interval: Annotated[ - int, - Argument(help="How often to poll, in seconds"), - ] = 60, -) -> None: - """ - Poll for and process pending maps. - """ - while True: - console.print("Starting iteration of polling loop") - bad_pending = 0 - - db = get_database() - - with get_db_session() as session: - for ingest_process in session.scalars( - select(IngestProcess).where(IngestProcess.state == IngestState.pending) - ).unique(): - if ingest_process.source_id: - map_info = get_map_info(db, ingest_process.source_id) - console.print(f"Processing {map_info}") - try: - ingest_slug(map_info) - except Exception as exn: - record_ingest_error(ingest_process, str(exn)) - else: - bad_pending += 1 - - if bad_pending: - console.print( - f"Skipped {bad_pending} ingests because of a missing source_id" - ) - console.print("Finished iteration of polling loop") - time.sleep(polling_interval) \ No newline at end of file + if not client.bucket_exists(bucket): + print(f"Bucket '{bucket}' does not exist. Creating it.") + client.make_bucket(bucket) + except S3Error as e: + print(f"Error checking or creating bucket: {e}") + sys.exit(1) + + # Upload the file + try: + print(f"Uploading {photo_filename} to bucket '{bucket}' at '{object_key}' ...") + client.fput_object(bucket, object_key, local_path, content_type=mime_type) + print("Upload complete!") + except S3Error as e: + print(f"Upload failed: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() From 297bd0e55fbf521a2c03a9801b3b87fda5894127 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 6 Aug 2025 15:43:12 -0400 Subject: [PATCH 19/23] Upload does work, just need to choose a path and credentials for it --- scripts/upload-photo/Makefile | 5 ++- scripts/upload-photo/config.py | 41 ------------------- scripts/upload-photo/list.sh | 43 +++++++++++++++++++ scripts/upload-photo/upload.py | 75 ---------------------------------- scripts/upload-photo/upload.sh | 46 +++++++++++++++++++++ 5 files changed, 93 insertions(+), 117 deletions(-) delete mode 100644 scripts/upload-photo/config.py create mode 100755 scripts/upload-photo/list.sh delete mode 100644 scripts/upload-photo/upload.py create mode 100755 scripts/upload-photo/upload.sh diff --git a/scripts/upload-photo/Makefile b/scripts/upload-photo/Makefile index 290d0a4bf..24d3380a8 100644 --- a/scripts/upload-photo/Makefile +++ b/scripts/upload-photo/Makefile @@ -1,2 +1,5 @@ run: - python3 upload.py \ No newline at end of file + ./upload.sh + +list: + ./list.sh \ No newline at end of file diff --git a/scripts/upload-photo/config.py b/scripts/upload-photo/config.py deleted file mode 100644 index 8c3afb70c..000000000 --- a/scripts/upload-photo/config.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -Settings that define the ingestion process. -""" - -from minio import Minio - -from macrostrat.core.config import settings # type: ignore[import-untyped] - -CHUNK_SIZE = 8 * 1024 * 1024 # 8 MB -TIMEOUT = 60 # seconds - -PG_DATABASE = getattr(settings, "pg_database") - -storage = getattr(settings, "storage", {}) -buckets = getattr(storage, "buckets", {}) - -S3_HOST = storage.get("endpoint", None) -S3_ACCESS_KEY = storage.get("access_key", None) -S3_SECRET_KEY = storage.get("secret_key", None) -S3_BUCKET = buckets.get("map-staging", None) - - -def get_minio_client(): - if not isinstance(S3_HOST, str): - raise ValueError("settings.storage.endpoint is not defined") - - host = S3_HOST - secure = None - if host.startswith("http://"): - host = host[7:] - secure = False - elif host.startswith("https://"): - host = host[8:] - secure = True - - return Minio( - endpoint=host, - access_key=S3_ACCESS_KEY, - secret_key=S3_SECRET_KEY, - secure=secure, - ) \ No newline at end of file diff --git a/scripts/upload-photo/list.sh b/scripts/upload-photo/list.sh new file mode 100755 index 000000000..620307434 --- /dev/null +++ b/scripts/upload-photo/list.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -euo pipefail + +# Load .env file from two directories up +ENV_PATH="$(dirname "$(dirname "$PWD")")/.env" +if [[ -f "$ENV_PATH" ]]; then + set -a + source "$ENV_PATH" + set +a +else + echo "❌ .env file not found at $ENV_PATH" + exit 1 +fi + +# Check required env vars +REQUIRED_VARS=(S3_ENDPOINT S3_BUCKET S3_PATH S3_ACCESS_KEY S3_SECRET_KEY) +missing=() +for var in "${REQUIRED_VARS[@]}"; do + if [[ -z "${!var:-}" ]]; then + missing+=("$var") + fi +done +if (( ${#missing[@]} > 0 )); then + echo "❌ Missing required environment variables: ${missing[*]}" + exit 1 +fi + +# Configure rclone using environment variables (no config file needed) +export RCLONE_CONFIG_S3_TYPE="s3" +export RCLONE_CONFIG_S3_PROVIDER="Minio" +export RCLONE_CONFIG_S3_ACCESS_KEY_ID="$S3_ACCESS_KEY" +export RCLONE_CONFIG_S3_SECRET_ACCESS_KEY="$S3_SECRET_KEY" +export RCLONE_CONFIG_S3_ENDPOINT="$S3_ENDPOINT" +export RCLONE_CONFIG_S3_ENV_AUTH="false" + +REMOTE_PATH="s3:${S3_BUCKET}/assets" + +echo "🔍 Listing files in '$REMOTE_PATH'..." + +rclone ls "$REMOTE_PATH" --log-level DEBUG + +rclone copy "s3:macrostrat-sites/assets" ./ diff --git a/scripts/upload-photo/upload.py b/scripts/upload-photo/upload.py deleted file mode 100644 index ace1df8a0..000000000 --- a/scripts/upload-photo/upload.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys -import hashlib -import magic -from minio import Minio -from minio.error import S3Error - -def main(): - # Load environment variables - endpoint = os.getenv("S3_ENDPOINT") - bucket = os.getenv("S3_BUCKET") - s3_path = os.getenv("S3_PATH") - access_key = os.getenv("S3_ACCESS_KEY") - secret_key = os.getenv("S3_SECRET_KEY") - - - - if not all([endpoint, bucket, access_key, secret_key]): - print("Error: Please set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY, and S3_SECRET_KEY environment variables.") - sys.exit(1) - - photo_filename = "david.jpg" - local_path = os.path.join(os.getcwd(), photo_filename) - - if not os.path.isfile(local_path): - print(f"Error: File '{photo_filename}' not found in current directory.") - sys.exit(1) - - # Calculate SHA256 hash - hasher = hashlib.sha256() - with open(local_path, "rb") as f: - while chunk := f.read(8192): - hasher.update(chunk) - sha256_hash = hasher.hexdigest() - print(f"SHA-256: {sha256_hash}") - - # Detect MIME type - mime = magic.Magic(mime=True) - mime_type = mime.from_file(local_path) - print(f"MIME type: {mime_type}") - - # Prepare object key (S3 path) - # Remove trailing slash if any - s3_path = s3_path.rstrip("/") - object_key = f"{s3_path}/{photo_filename}" if s3_path else photo_filename - - # Connect to MinIO/S3 - client = Minio( - endpoint, - access_key=access_key, - secret_key=secret_key, - secure=endpoint.startswith("https"), - ) - - # Ensure bucket exists (optional) - try: - if not client.bucket_exists(bucket): - print(f"Bucket '{bucket}' does not exist. Creating it.") - client.make_bucket(bucket) - except S3Error as e: - print(f"Error checking or creating bucket: {e}") - sys.exit(1) - - # Upload the file - try: - print(f"Uploading {photo_filename} to bucket '{bucket}' at '{object_key}' ...") - client.fput_object(bucket, object_key, local_path, content_type=mime_type) - print("Upload complete!") - except S3Error as e: - print(f"Upload failed: {e}") - sys.exit(1) - -if __name__ == "__main__": - main() diff --git a/scripts/upload-photo/upload.sh b/scripts/upload-photo/upload.sh new file mode 100755 index 000000000..c9734cec8 --- /dev/null +++ b/scripts/upload-photo/upload.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +set -euo pipefail + +# Load .env file from two directories up +ENV_PATH="$(dirname "$(dirname "$PWD")")/.env" +if [[ -f "$ENV_PATH" ]]; then + # Use `set -a` to automatically export all variables + set -a + source "$ENV_PATH" + set +a +else + echo "❌ .env file not found at $ENV_PATH" + exit 1 +fi + +# Check required env vars +REQUIRED_VARS=(S3_ENDPOINT S3_BUCKET S3_PATH S3_ACCESS_KEY S3_SECRET_KEY) +for var in "${REQUIRED_VARS[@]}"; do + if [[ -z "${!var:-}" ]]; then + echo "❌ Missing required environment variable: $var" + exit 1 + fi +done + +# File to upload +FILE="david.jpg" +if [[ ! -f "$FILE" ]]; then + echo "❌ File '$FILE' not found in current directory." + exit 1 +fi + +# Configure rclone using environment variables (no config file needed) +export RCLONE_CONFIG_S3_TYPE="s3" +export RCLONE_CONFIG_S3_PROVIDER="Minio" +export RCLONE_CONFIG_S3_ACCESS_KEY_ID="$S3_ACCESS_KEY" +export RCLONE_CONFIG_S3_SECRET_ACCESS_KEY="$S3_SECRET_KEY" +export RCLONE_CONFIG_S3_ENDPOINT="$S3_ENDPOINT" +export RCLONE_CONFIG_S3_ENV_AUTH="false" + +# Final destination path +DESTINATION="s3:/${S3_PATH}" +echo "⬆️ Uploading '$FILE' to '$DESTINATION'..." +rclone copy "$FILE" "$DESTINATION" --s3-no-check-bucket --s3-upload-concurrency=4 --progress + +echo "✅ Upload complete!" From ca8a7c06e08c8ecd35ddb95d2800a5a3a40042a0 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 13 Aug 2025 15:22:19 -0400 Subject: [PATCH 20/23] Image upload works --- pages/dev/add-people/+Page.client.ts | 59 +++++++++++++++++++--------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index 758b76c49..59aabbc7f 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -29,6 +29,8 @@ export function Page() { setForm({ ...form, [field]: value }); }; + console.log("form", form) + return h(BasePage, { title: "Add people" }, [ h("div.add-people-page", [ h("p", "This page is meant to add people to the Macrostrat database. Please fill out the form below with the person's details."), @@ -112,32 +114,52 @@ function DateInput({ label, value = "", onChange, required = false }) { }); } -function ImageInput({ label, onChange, required = false }) { - return h(DataField, { - label, - value: h("input.image-input", { - type: "file", - accept: "image/*", - required, - onChange: (e) => { - const file = e.target.files[0]; - if (file) { - const reader = new FileReader(); - reader.onload = (event) => onChange(event.target.result); - reader.readAsDataURL(file); - } - }, - }), - }); +function ImageInput({ label, value = null, onChange, required = false }) { + return h(DataField, { + label, + value: h("input.image-input", { + type: "file", + accept: "image/*", + required, + onChange: (e) => { + const file = e.target.files[0]; + if (file) { + onChange(file); + } + }, + }), + }); } + function SubmitButton({ disabled, form }) { const text = disabled ? "Please fill out all required fields" : "Add person"; const handleSubmit = () => { if (disabled) return; - // Destructure roles and img_id, default img_id if missing + // Upload image + const APIURL = "http://localhost:8000/image_upload"; + const formData = new FormData(); + formData.append("file", form.img_id); + + return fetch(APIURL, { + method: "POST", + body: formData, + }) + .then(res => { + if (!res.ok) throw new Error(`Image upload failed: ${res.statusText}`); + return res.json(); + }) + .then(data => { + console.log("Image uploaded successfully:", data); + return data; + }) + .catch(err => console.error("Image upload error:", err)); + + + /* + // Upload person const { roles, ...personData } = form; const filteredPersonData = Object.fromEntries( Object.entries(personData).filter(([_, v]) => v !== null && v !== undefined) @@ -173,6 +195,7 @@ function SubmitButton({ disabled, form }) { }); }) .catch(e => console.error("Test submission error:", e)); + */ }; return h(SaveButton, { disabled, onClick: handleSubmit }, text); From 018064a2c009d711ba8ff6907ac512b651fbaec7 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 13 Aug 2025 15:30:50 -0400 Subject: [PATCH 21/23] Upload person works!! --- pages/dev/add-people/+Page.client.ts | 47 ++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index 59aabbc7f..ffb249ce7 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -133,8 +133,11 @@ function ImageInput({ label, value = null, onChange, required = false }) { function SubmitButton({ disabled, form }) { + const [img, setImg] = useState(null); const text = disabled ? "Please fill out all required fields" : "Add person"; + console.log("Image: ", img); + const handleSubmit = () => { if (disabled) return; @@ -152,12 +155,50 @@ function SubmitButton({ disabled, form }) { return res.json(); }) .then(data => { - console.log("Image uploaded successfully:", data); - return data; + // Upload person + const { roles, img_id, ...personData } = form; + const filteredPersonData = Object.fromEntries( + Object.entries(personData).filter(([_, v]) => v !== null && v !== undefined) + ); + + const fullData = { + ...filteredPersonData, + img_id: data.filename + } + + const body = new URLSearchParams(fullData).toString(); + + fetch(`${postgrestPrefix}/people`, { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + "Prefer": "return=representation", + }, + body, + }) + .then(r => r.json()) + .then(data => { + const personId = data[0].person_id; + + roles.forEach(roleId => { + console.log("Assigning role:", roleId, "to person:", personId); + const body = new URLSearchParams({ person_id: personId, role_id: roleId }).toString(); + + fetch(`${postgrestPrefix}/people_roles`, { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + "Prefer": "return=representation", + }, + body, + }) + .catch(e => console.error("Role assignment error:", e)); + }); + }) + .catch(e => console.error("Test submission error:", e)); }) .catch(err => console.error("Image upload error:", err)); - /* // Upload person const { roles, ...personData } = form; From 8d45ca365cd4a33587600a003d445562b472e8de Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 13 Aug 2025 15:33:39 -0400 Subject: [PATCH 22/23] People page works too --- pages/people/+Page.ts | 4 ++-- src/components/general/index.ts | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index 6578bb6c6..5adcddc63 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -1,4 +1,4 @@ -import { Image, Navbar, Footer, SearchBar } from "~/components/general"; +import { PersonImage, Navbar, Footer, SearchBar } from "~/components/general"; import h from "./main.module.sass"; import { Card, Divider } from "@blueprintjs/core"; import { useState, useEffect } from "react"; @@ -106,7 +106,7 @@ function PersonCard({ name, roles, email, website, img_id, active_start, active_ const end = new Date(active_end).toLocaleDateString(); return h("div.person-info", [ - h(Image, { src: img_id, className: "back-img" }), + h(PersonImage, { src: img_id, className: "back-img" }), h("div.description", [ h("a.name", { href: website }, name), h("p.role", roles.map(role => role.name).join(", ")), diff --git a/src/components/general/index.ts b/src/components/general/index.ts index af816dc46..2d0295341 100644 --- a/src/components/general/index.ts +++ b/src/components/general/index.ts @@ -12,6 +12,12 @@ export function Image({ src, className, width, height }) { return h("img", { src: srcWithAddedPrefix, className, width, height }); } +export function PersonImage({ src, className, width, height }) { + const srcWithAddedPrefix = + "https://storage.macrostrat.org/macrostrat-sites/test/" + src; + return h("img", { src: srcWithAddedPrefix, className, width, height }); +} + export function NavListItem({ href, children }) { return h( "li.nav-list-item", From 435707185007b5bf4f36538a5ff43c7bf5b6ea46 Mon Sep 17 00:00:00 2001 From: davidsklar99 Date: Wed, 13 Aug 2025 15:52:37 -0400 Subject: [PATCH 23/23] Formatting change --- pages/dev/add-people/+Page.client.ts | 147 ++++++++++++--------------- pages/people/+Page.ts | 4 +- src/components/general/index.ts | 2 +- 3 files changed, 66 insertions(+), 87 deletions(-) diff --git a/pages/dev/add-people/+Page.client.ts b/pages/dev/add-people/+Page.client.ts index ffb249ce7..47aba492f 100644 --- a/pages/dev/add-people/+Page.client.ts +++ b/pages/dev/add-people/+Page.client.ts @@ -81,7 +81,7 @@ export function Page() { onChange: handleChange("active_end") }), ]), - h(SubmitButton, { disabled: false, form }), + h(SubmitButton, { disabled, form, setForm }), h("p.note", h('em', "Fields marked with * are required")), ]), ]); @@ -132,14 +132,13 @@ function ImageInput({ label, value = null, onChange, required = false }) { } -function SubmitButton({ disabled, form }) { - const [img, setImg] = useState(null); +function SubmitButton({ disabled, form, setForm }) { + const [inProgress, setInProgress] = useState(false); const text = disabled ? "Please fill out all required fields" : "Add person"; - console.log("Image: ", img); - const handleSubmit = () => { if (disabled) return; + setInProgress(true); // Upload image const APIURL = "http://localhost:8000/image_upload"; @@ -156,90 +155,27 @@ function SubmitButton({ disabled, form }) { }) .then(data => { // Upload person - const { roles, img_id, ...personData } = form; - const filteredPersonData = Object.fromEntries( - Object.entries(personData).filter(([_, v]) => v !== null && v !== undefined) - ); - - const fullData = { - ...filteredPersonData, - img_id: data.filename - } - - const body = new URLSearchParams(fullData).toString(); - - fetch(`${postgrestPrefix}/people`, { - method: "POST", - headers: { - "Content-Type": "application/x-www-form-urlencoded", - "Prefer": "return=representation", - }, - body, - }) - .then(r => r.json()) - .then(data => { - const personId = data[0].person_id; - - roles.forEach(roleId => { - console.log("Assigning role:", roleId, "to person:", personId); - const body = new URLSearchParams({ person_id: personId, role_id: roleId }).toString(); - - fetch(`${postgrestPrefix}/people_roles`, { - method: "POST", - headers: { - "Content-Type": "application/x-www-form-urlencoded", - "Prefer": "return=representation", - }, - body, - }) - .catch(e => console.error("Role assignment error:", e)); - }); + uploadPerson({ data, form }); + }) + .then(() => { + // Handle successful upload + alert("Person added successfully!"); + setForm({ + name: null, + email: null, + title: null, + website: null, + img_id: null, + active_start: null, + active_end: null, + roles: [], }) - .catch(e => console.error("Test submission error:", e)); + setInProgress(false); }) .catch(err => console.error("Image upload error:", err)); - - /* - // Upload person - const { roles, ...personData } = form; - const filteredPersonData = Object.fromEntries( - Object.entries(personData).filter(([_, v]) => v !== null && v !== undefined) - ); - - const testBody = new URLSearchParams(filteredPersonData).toString(); - - fetch(`${postgrestPrefix}/people`, { - method: "POST", - headers: { - "Content-Type": "application/x-www-form-urlencoded", - "Prefer": "return=representation", - }, - body: testBody, - }) - .then(r => r.json()) - .then(data => { - const personId = data[0].person_id; - - roles.forEach(roleId => { - console.log("Assigning role:", roleId, "to person:", personId); - const body = new URLSearchParams({ person_id: personId, role_id: roleId }).toString(); - - fetch(`${postgrestPrefix}/people_roles`, { - method: "POST", - headers: { - "Content-Type": "application/x-www-form-urlencoded", - "Prefer": "return=representation", - }, - body, - }) - .catch(e => console.error("Role assignment error:", e)); - }); - }) - .catch(e => console.error("Test submission error:", e)); - */ }; - return h(SaveButton, { disabled, onClick: handleSubmit }, text); + return h(SaveButton, { disabled, onClick: handleSubmit, inProgress }, text); } function RolesInput({ setForm }) { @@ -312,3 +248,46 @@ function RolesInput({ setForm }) { }), }); } + +function uploadPerson({ data, form }) { + const { roles, img_id, ...personData } = form; + const filteredPersonData = Object.fromEntries( + Object.entries(personData).filter(([_, v]) => v !== null && v !== undefined) + ); + + const fullData = { + ...filteredPersonData, + img_id: data.filename + } + + const body = new URLSearchParams(fullData).toString(); + + fetch(`${postgrestPrefix}/people`, { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + "Prefer": "return=representation", + }, + body, + }) + .then(r => r.json()) + .then(data => { + const personId = data[0].person_id; + + roles.forEach(roleId => { + console.log("Assigning role:", roleId, "to person:", personId); + const body = new URLSearchParams({ person_id: personId, role_id: roleId }).toString(); + + fetch(`${postgrestPrefix}/people_roles`, { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + "Prefer": "return=representation", + }, + body, + }) + .catch(e => console.error("Role assignment error:", e)); + }); + }) + .catch(e => console.error("Test submission error:", e)); +} \ No newline at end of file diff --git a/pages/people/+Page.ts b/pages/people/+Page.ts index 5adcddc63..8754f4aaf 100644 --- a/pages/people/+Page.ts +++ b/pages/people/+Page.ts @@ -102,8 +102,8 @@ export function Page() { } function PersonCard({ name, roles, email, website, img_id, active_start, active_end }) { - const start = new Date(active_start).toLocaleDateString(); - const end = new Date(active_end).toLocaleDateString(); + const start = new Date(active_start).toLocaleDateString('en-US', { timeZone: 'UTC', year: 'numeric', month: 'long', day: 'numeric' }); + const end = new Date(active_end).toLocaleDateString('en-US', { timeZone: 'UTC', year: 'numeric', month: 'long', day: 'numeric' }); return h("div.person-info", [ h(PersonImage, { src: img_id, className: "back-img" }), diff --git a/src/components/general/index.ts b/src/components/general/index.ts index 2d0295341..56456e266 100644 --- a/src/components/general/index.ts +++ b/src/components/general/index.ts @@ -14,7 +14,7 @@ export function Image({ src, className, width, height }) { export function PersonImage({ src, className, width, height }) { const srcWithAddedPrefix = - "https://storage.macrostrat.org/macrostrat-sites/test/" + src; + "https://storage.macrostrat.org/macrostrat-sites/people/" + src; return h("img", { src: srcWithAddedPrefix, className, width, height }); }