Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions services/madoc-ts/src/extensions/enrichment/extension.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Topic, TopicType, TopicTypeListResponse } from '../../types/schemas/topics';
import { BaseDjangoExtension } from './base-django-extension';
import { EnrichmentIndexPayload } from './types';
import { EnrichmentIndexPayload, EnrichmentPlaintext, EnrichmentTask } from './types';
import { ApiKey } from '../../types/api-key';
import { SearchQuery, SearchResponse } from '../../types/search';
import {
Expand Down Expand Up @@ -75,11 +75,35 @@ export class EnrichmentExtension extends BaseDjangoExtension {
}

getAllEnrichmentTasks(page = 1) {
return this.api.request(`/api/enrichment/task_log?page=${page}`);
return this.api.request(`/api/enrichment/task_log/?page=${page}`);
}

getEnrichmentTask(id: string) {
return this.api.request(`/api/enrichment/task_log/${id}`);
return this.api.request<EnrichmentTask>(`/api/enrichment/task_log/${id}/`);
}
getEnrichmentPlaintext(id: string) {
return this.api.request<EnrichmentPlaintext>(`/api/enrichment/plaintext/${id}/`);
}

enrichManifest(id: number) {
return this.api.request<EnrichmentTask>(`/api/madoc/iiif/manifests/${id}/enrichment`, {
method: 'POST',
});
}

enrichManifestInternal(id: number, callback?: string) {
return this.api.request<EnrichmentTask>(
`/api/enrichment/internal/madoc/tasks/madoc_manifest_enrichment_pipeline/`,
{
method: 'POST',
body: {
task: {
subject: `urn:madoc:manifest:${id}`,
parameters: [{ callback_url: callback }],
},
},
}
);
}

allTasks = [
Expand Down
35 changes: 35 additions & 0 deletions services/madoc-ts/src/extensions/enrichment/types.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Canvas, Collection, InternationalString, Manifest } from '@iiif/presentation-3';
import { BaseTask } from '../../gateway/tasks/base-task';

export type EnrichmentIndexPayload = {
madoc_id: string;
Expand All @@ -16,3 +17,37 @@ export interface DjangoPagination<T> {
previous: string;
results: T[];
}

export interface EnrichmentTaskSnippet {
url: string;
id: string;
created: string;
modified: string;
name: string;
state: any;
status: number;
status_text: string;
task_type: string;
task_class: string;
}

export interface EnrichmentTask extends Omit<BaseTask, 'parent_task' | 'type' | 'subtasks'> {
url: string;
task_type: string; // Type
parent_task: EnrichmentTaskSnippet;
errors: string[];
child_tasks: EnrichmentTaskSnippet[];
task_class: string;
}

export interface EnrichmentPlaintext {
url: string;
id: string;
created: string;
modified: string;
source: string;
ocr_backend: string;
ocr_format: string;
plaintext: string;
plaintext_list: string;
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export const ManifestSearchIndex = createUniversalComponent<ManifestSearchIndexT

const api = useApi();
const [invokeEnrichment, { isLoading: enrichLoading }] = useMutation(async () => {
await api.triggerSearchIndex(Number(id), 'manifest');
await api.enrichment.enrichManifest(Number(id));
await refetch();
});

Expand Down
15 changes: 14 additions & 1 deletion services/madoc-ts/src/gateway/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,14 @@ export class ApiClient {
// Enrichment
this.authority = new AuthorityExtension(this);
this.enrichment = new EnrichmentExtension(this);
this.webhooks = new WebhookExtension(this);
this.search = new SearchExtension(this);

if (options.withoutExtensions) {
this.crowdsourcing = new CrowdsourcingApi(this, null, captureModelDataSources);
return;
}

this.webhooks = new WebhookExtension(this);
this.pageBlocks = new PageBlockExtension(this, getDefaultPageBlockDefinitions());
this.media = new MediaExtension(this);
this.system = new SystemExtension(this);
Expand Down Expand Up @@ -1077,6 +1077,13 @@ export class ApiClient {
return this.request<{ found: boolean; transcription: string }>(`/api/madoc/iiif/canvases/${id}/plaintext`);
}

async updateCanvasPlaintext(id: number, plaintext: string) {
return this.request<{ success: boolean; empty: boolean }>(`/api/madoc/iiif/canvases/${id}/plaintext`, {
method: 'POST',
body: { plaintext },
});
}

async getCanvasDeletionSummary(id: number) {
return this.request<CanvasDeletionSummary>(`/api/madoc/iiif/canvases/${id}/deletion-summary`);
}
Expand Down Expand Up @@ -1757,6 +1764,12 @@ export class ApiClient {
});
}

async deleteLinkingProperty(id: number) {
return this.request(`/api/madoc/iiif/linking/${id}`, {
method: 'DELETE',
});
}

async getStorageRaw(bucket: string, fileName: string, isPublic = false) {
return this.request<Response>(
isPublic ? `/api/storage/data/${bucket}/public/${fileName}` : `/api/storage/data/${bucket}/${fileName}`,
Expand Down
4 changes: 4 additions & 0 deletions services/madoc-ts/src/router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import { keyRegenerate } from './routes/admin/key-regenerate';
import { listApiKeys } from './routes/admin/list-api-keys';
import { getProjectAnnotationStyle } from './routes/annotation-styles/get-project-annotation-style';
import { annotationStyles } from './routes/annotation-styles/index';
import { manifestEnrichmentPipeline } from "./routes/enrichment/manifest-enrichment-pipeline";
import { searchAllUsers } from './routes/global/search-all-users';
import { systemCheck } from './routes/global/system-check';
import { addPlaintext } from './routes/iiif/linking/add-plaintext';
import { getAutomatedUsers } from './routes/manage-site/get-automated-users';
import { createProjectExport } from './routes/projects/create-project-export';
import { getProjectRawData } from './routes/projects/get-project-raw-data';
Expand Down Expand Up @@ -415,6 +417,7 @@ export const router = new TypedRouter({
'get-manifest-linking': [TypedRouter.GET, '/api/madoc/iiif/manifests/:id/linking', getLinking],
'get-manifest-canvas-linking': [TypedRouter.GET, '/api/madoc/iiif/manifests/:id/canvas-linking', getParentLinking],
'search-index-manifest': [TypedRouter.POST, '/api/madoc/iiif/manifests/:id/index', indexManifest],
'search-enrich-manifest': [TypedRouter.POST, '/api/madoc/iiif/manifests/:id/enrichment', manifestEnrichmentPipeline],

// Canvas API
'list-canvases': [TypedRouter.GET, '/api/madoc/iiif/canvases', listCanvases],
Expand All @@ -432,6 +435,7 @@ export const router = new TypedRouter({
'search-index-canvas': [TypedRouter.POST, '/api/madoc/iiif/canvases/:id/index', indexCanvas],
'convert-linking-property': [TypedRouter.POST, '/api/madoc/iiif/linking/:id/convert', convertLinking],
'get-canvas-plaintext': [TypedRouter.GET, '/api/madoc/iiif/canvases/:id/plaintext', getCanvasPlaintext],
'update-canvas-plaintext': [TypedRouter.POST, '/api/madoc/iiif/canvases/:id/plaintext', addPlaintext],
'get-canvas-source': [TypedRouter.GET, '/api/madoc/iiif/canvas-source', getCanvasReference],
'get-canvas-deletion-summary': [
TypedRouter.GET,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import invariant from 'tiny-invariant';
import { api } from '../../gateway/api.server';
import { RouteMiddleware } from '../../types/route-middleware';
import { parseUrn } from '../../utility/parse-urn';
import { userWithScope } from '../../utility/user-with-scope';
import { IncomingWebhook, WebhookEventType } from '../../webhooks/webhook-types';

export const manifestEnrichmentPipeline: RouteMiddleware<{ id: number }> = async context => {
const { siteId } = userWithScope(context, ['site.admin']);
const site = await context.siteManager.getSiteById(siteId);
const siteApi = api.asUser({ siteId });

// 12-hour token.
const webhook = await context.webhookExtension.generateWebhookUrl(
site,
manifestEnrichmentPipelineEvent.event_id,
12 * 3600,
true
);
context.response.body = await siteApi.enrichment.enrichManifestInternal(context.params.id, webhook);
};

export const manifestEnrichmentPipelineEvent: WebhookEventType = {
event_id: 'manifest-enrichment-pipeline.complete',
body_variables: ['id'],
};

export const manifestEnrichmentHook: IncomingWebhook = {
type: 'manifest-enrichment-pipeline-task-ingest',
event_id: 'manifest-enrichment-pipeline.complete',
is_outgoing: false,
execute: async (resp, siteApi) => {
const response: any = { warnings: [] };

invariant(resp.id, 'Expected response to contain `id`');

const task = await siteApi.enrichment.getEnrichmentTask(resp.id);

invariant(task.subject, 'Missing subject on task');
invariant(task.status === 3, 'Task is not yet complete');

response.taskId = task.id;
response.subject = task.subject;
response.task_type = task.task_type;

if (task.task_type === 'ocr_madoc_resource') {
const parsed = parseUrn(task.subject);
invariant(parsed, 'Invalid subject');
invariant(parsed.type === 'canvas', 'Can only process canvases');

if (!task.state) {
response.warnings.push(`Task state not found`);
}

if (!task.state?.ocr_resources) {
response.warnings.push(`Expected "ocr_resources" in state`);
}

if (task.state && task.state.ocr_resources && task.state.ocr_resources[0]) {
const first = task.state.ocr_resources[0];
const enrichmentPlaintext = await siteApi.enrichment.getEnrichmentPlaintext(first);
invariant(enrichmentPlaintext, 'Missing plaintext from enrichment');
if (enrichmentPlaintext.plaintext) {
const canvasId = parsed.id; // ??
response.plaintext = await siteApi.updateCanvasPlaintext(canvasId, enrichmentPlaintext.plaintext);
return response;
} else {
response.warnings.push(`Plaintext not found`);
}
}
} else {
response.warnings.push(`Unknown task ${task.task_type}`);
response.empty = true;
}

return response;
},
};
53 changes: 53 additions & 0 deletions services/madoc-ts/src/routes/iiif/linking/add-plaintext.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { api } from '../../../gateway/api.server';
import { RouteMiddleware } from '../../../types/route-middleware';
import { userWithScope } from '../../../utility/user-with-scope';
import { linkHash } from './convert-linking';

export const addPlaintext: RouteMiddleware<{ id: number }, { plaintext: string }> = async context => {
const { siteId } = userWithScope(context, ['site.admin']);
const canvasId = Number(context.query.id);
const plaintext = context.requestBody.plaintext;

const siteApi = api.asUser({ siteId });
const linking = await siteApi.getCanvasLinking(canvasId);

if (!plaintext.trim()) {
context.response.status = 200;
context.response.body = { success: true, empty: true };
return;
}

const matchingPlaintexts = linking.linking.filter(singleLink => {
return singleLink.property === 'seeAlso' && singleLink.link.format === 'text/plain';
});

if (matchingPlaintexts.length) {
for (const matchingPlaintext of matchingPlaintexts) {
// Delete the existing one, and continue;
await siteApi.deleteLinkingProperty(matchingPlaintext.id);
}
}

// Create new plaintext and insert it.
const bucket = 'plaintext';
const filePath = `public/${canvasId}/${linkHash(plaintext)}.txt`;

await siteApi.saveStoragePlainText(bucket, filePath, plaintext, true);

await siteApi.addLinkToResource({
label: 'Plaintext',
link: {
id: `/public/storage/urn:madoc:site:${siteId}/${bucket}/${filePath}`,
format: 'text/plain',
label: 'Plaintext',
type: 'Text',
file_path: `public/${canvasId}/${linkHash(plaintext)}.txt`,
file_bucket: bucket,
},
resource_id: canvasId as any,
property: 'seeAlso',
});

context.response.status = 200;
context.response.body = { success: true, empty: false };
};
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { RequestError } from '../../../utility/errors/request-error';
import { userWithScope } from '../../../utility/user-with-scope';
import contentType from 'content-type';

function linkHash(uri: string) {
export function linkHash(uri: string) {
return createHash('sha1')
.update(uri)
.digest('hex');
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { generateId } from '../../frontend/shared/capture-models/helpers/generate-id';
import { api } from '../../gateway/api.server';
import { RouteMiddleware } from '../../types/route-middleware';
import { optionalUserWithScope } from '../../utility/user-with-scope';
import { WebhookCallRow } from '../webhook-types';
Expand All @@ -14,6 +15,8 @@ export const executeWebhookInternal: RouteMiddleware<{ event_id: string }> = asy
const results = { success: 0, fail: 0 };
const callId = generateId();

const siteApi = api.asUser({ siteId }, {}, true);

const databaseHooks = await context.webhooks.listWebhooksByEvent(eventId, siteId);

for (const databaseHook of databaseHooks) {
Expand Down Expand Up @@ -64,7 +67,7 @@ export const executeWebhookInternal: RouteMiddleware<{ event_id: string }> = asy
continue;
} else {
// Do internal thing.
result.response = (await hook.execute(body)) || {};
result.response = (await hook.execute(body, siteApi)) || {};
}
results.success++;
} catch (e) {
Expand Down
3 changes: 2 additions & 1 deletion services/madoc-ts/src/webhooks/webhook-events.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { manifestEnrichmentPipelineEvent } from '../routes/enrichment/manifest-enrichment-pipeline';
import { WebhookEventType } from './webhook-types';

const testEvent = {
event_id: 'test-event',
body_variables: ['hello'] as const,
};

export const webhookEvents: WebhookEventType[] = [testEvent];
export const webhookEvents: WebhookEventType[] = [testEvent, manifestEnrichmentPipelineEvent];
9 changes: 6 additions & 3 deletions services/madoc-ts/src/webhooks/webhook-server-extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import invariant from 'tiny-invariant';
import { RegistryExtension } from '../extensions/registry-extension';
import { generateId } from '../frontend/shared/capture-models/helpers/generate-id';
import { apiGateway, gatewayHost } from '../gateway/api.server';
import { manifestEnrichmentHook } from '../routes/enrichment/manifest-enrichment-pipeline';
import { getPem, getPublicPem } from '../utility/get-pem';
import { IncomingWebhook, OutgoingWebhook } from './webhook-types';
import { JWK, JWS } from 'jose';
Expand All @@ -13,12 +14,14 @@ export class WebhookServerExtension extends RegistryExtension<IncomingWebhook |
registryName: 'webhook',
});

WebhookServerExtension.register(manifestEnrichmentHook);

WebhookServerExtension.register({
is_outgoing: false,
type: 'example-test',
event_id: 'test-event',
execute: body => {
console.log('Did this work?', body);
console.log('WebHooks - test event:', body);
},
});
}
Expand All @@ -41,7 +44,7 @@ export class WebhookServerExtension extends RegistryExtension<IncomingWebhook |
code: await this.sign({ eventId, expires, siteId: site.id }),
};

return `${internal ? apiGateway : gatewayHost}/s/${site.slug}/madoc/api/webhook?${stringify(query)}}`;
return `${internal ? apiGateway : gatewayHost}/s/${site.slug}/madoc/api/webhook?${stringify(query)}`;
}

getHooksForEvents(eventId: string, siteId: number): Array<IncomingWebhook | OutgoingWebhook> {
Expand Down Expand Up @@ -94,7 +97,7 @@ export class WebhookServerExtension extends RegistryExtension<IncomingWebhook |
invariant(payload.expires, 'Invalid webhook');
invariant(siteId === payload.siteId);
invariant(eventId === payload.eventId, 'Invalid webhook');
invariant(payload.created + payload.expires < time, 'Webhook has expired');
invariant(payload.created + payload.expires > time, 'Webhook has expired');

return true;
}
Expand Down
Loading