From ff570d0dc323b3a0674fddd1b73566e3daf8dfd3 Mon Sep 17 00:00:00 2001 From: Sascha Date: Fri, 15 May 2026 13:09:35 +0200 Subject: [PATCH 1/5] feat: extended metadata downloader --- README.md | 16 + sake/.env.docker.selfhosted | 5 + sake/.env.example | 7 +- .../services/ExternalBookMetadataService.ts | 8 +- .../services/ManagedBookCoverService.ts | 94 ++++- .../services/MetadataDescriptionSanitizer.ts | 45 +++ .../ApplyMetadataCandidateUseCase.ts | 334 ++++++++++++++++ .../googleBooksMetadataProvider.ts | 5 +- .../hardcoverMetadataProvider.ts | 5 +- .../isbndbMetadataProvider.ts | 292 ++++++++++++++ .../metadataProviderFactory.ts | 4 +- .../openLibraryMetadataProvider.ts | 5 +- sake/src/lib/utils/author.ts | 12 + .../library/managedBookCoverService.test.ts | 182 ++++++++- .../applyMetadataCandidateUseCase.test.ts | 364 ++++++++++++++++++ .../metadata/isbndbMetadataProvider.test.ts | 169 ++++++++ .../metadataDescriptionSanitizer.test.ts | 27 ++ 17 files changed, 1543 insertions(+), 31 deletions(-) create mode 100644 sake/src/lib/server/application/services/MetadataDescriptionSanitizer.ts create mode 100644 sake/src/lib/server/application/use-cases/ApplyMetadataCandidateUseCase.ts create mode 100644 sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts create mode 100644 sake/src/lib/utils/author.ts create mode 100644 sake/tests/metadata/applyMetadataCandidateUseCase.test.ts create mode 100644 sake/tests/metadata/isbndbMetadataProvider.test.ts create mode 100644 sake/tests/metadata/metadataDescriptionSanitizer.test.ts diff --git a/README.md b/README.md index 4d90727..42d764a 100644 --- a/README.md +++ b/README.md @@ -185,9 +185,15 @@ Copy `sake/.env.example` to `sake/.env` and fill in the values you need. - `VITE_ALLOWED_HOSTS` - comma-separated host overrides for Vite/dev setups - `ACTIVATED_PROVIDERS` - comma-separated search providers +- `ACTIVATED_METADATA_PROVIDERS` - comma-separated metadata providers, for example `googlebooks,openlibrary,hardcover,isbndb` +- `GOOGLE_BOOKS_API_KEY` - optional Google Books key for higher rate limits +- `HARDCOVER_API_TOKEN` - optional server-wide token required for the Hardcover metadata provider +- `ISBNDB_API_KEY` - optional paid API key required for the ISBNdb metadata provider +- `METADATA_PROVIDER_TIMEOUT_MS` - optional metadata provider timeout in milliseconds - `BODY_SIZE_LIMIT` - upload/body size limit If `ACTIVATED_PROVIDERS` is unset, blank, or contains no valid values, search stays disabled and the search UI remains hidden. +If `ACTIVATED_METADATA_PROVIDERS` is unset, blank, or contains no valid values, on-demand metadata lookup stays disabled and the metadata update UI remains hidden. Accepted provider names: @@ -210,6 +216,11 @@ S3_SECRET_ACCESS_KEY=your-secret-access-key S3_FORCE_PATH_STYLE=false ACTIVATED_PROVIDERS=anna,openlib,gutenberg +ACTIVATED_METADATA_PROVIDERS=googlebooks,openlibrary +GOOGLE_BOOKS_API_KEY= +HARDCOVER_API_TOKEN= +ISBNDB_API_KEY= +METADATA_PROVIDER_TIMEOUT_MS= VITE_ALLOWED_HOSTS= BODY_SIZE_LIMIT=Infinity ``` @@ -228,6 +239,11 @@ S3_SECRET_ACCESS_KEY=sakeadminsecret S3_FORCE_PATH_STYLE=true ACTIVATED_PROVIDERS=anna,openlib,gutenberg +ACTIVATED_METADATA_PROVIDERS=googlebooks,openlibrary +GOOGLE_BOOKS_API_KEY= +HARDCOVER_API_TOKEN= +ISBNDB_API_KEY= +METADATA_PROVIDER_TIMEOUT_MS= VITE_ALLOWED_HOSTS= BODY_SIZE_LIMIT=Infinity ``` diff --git a/sake/.env.docker.selfhosted b/sake/.env.docker.selfhosted index c1de2cc..5b55b39 100644 --- a/sake/.env.docker.selfhosted +++ b/sake/.env.docker.selfhosted @@ -12,4 +12,9 @@ S3_FORCE_PATH_STYLE=true VITE_ALLOWED_HOSTS= ACTIVATED_PROVIDERS=zlib,anna,openlib,gutenberg +ACTIVATED_METADATA_PROVIDERS= +GOOGLE_BOOKS_API_KEY= +HARDCOVER_API_TOKEN= +ISBNDB_API_KEY= +METADATA_PROVIDER_TIMEOUT_MS= BODY_SIZE_LIMIT=Infinity diff --git a/sake/.env.example b/sake/.env.example index a206446..cc7535e 100644 --- a/sake/.env.example +++ b/sake/.env.example @@ -13,4 +13,9 @@ S3_FORCE_PATH_STYLE= VITE_ALLOWED_HOSTS= ACTIVATED_PROVIDERS= -BODY_SIZE_LIMIT=Infinity \ No newline at end of file +ACTIVATED_METADATA_PROVIDERS= +GOOGLE_BOOKS_API_KEY= +HARDCOVER_API_TOKEN= +ISBNDB_API_KEY= +METADATA_PROVIDER_TIMEOUT_MS= +BODY_SIZE_LIMIT=Infinity diff --git a/sake/src/lib/server/application/services/ExternalBookMetadataService.ts b/sake/src/lib/server/application/services/ExternalBookMetadataService.ts index 5daefc0..08e1111 100644 --- a/sake/src/lib/server/application/services/ExternalBookMetadataService.ts +++ b/sake/src/lib/server/application/services/ExternalBookMetadataService.ts @@ -1,4 +1,6 @@ import { MetadataAggregatorService } from '$lib/server/application/services/MetadataAggregatorService'; +import { sanitizeMetadataDescription } from '$lib/server/application/services/MetadataDescriptionSanitizer'; +import type { MetadataCandidate } from '$lib/server/application/ports/MetadataProviderPort'; import { GoogleBooksMetadataProvider } from '$lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider'; import { OpenLibraryMetadataProvider } from '$lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider'; @@ -49,6 +51,10 @@ function extractAmazonAsin(identifier: string | null): string | null { return null; } +function candidateDescription(candidate: MetadataCandidate): string | null { + return sanitizeMetadataDescription(candidate.description, candidate.descriptionFormat); +} + export class ExternalBookMetadataService { private readonly aggregator: MetadataAggregatorService; @@ -83,7 +89,7 @@ export class ExternalBookMetadataService { openLibraryKey: olCandidate?.identifiers.openLibraryKey ?? null, amazonAsin: extractAmazonAsin(input.identifier), cover: bestCoverUrl, - description: pickFirst(...candidates.map((c) => c.description)), + description: pickFirst(...candidates.map(candidateDescription)), publisher: pickFirst(...candidates.map((c) => c.publisher)), series: pickFirst(...candidates.map((c) => c.series)), volume: null, diff --git a/sake/src/lib/server/application/services/ManagedBookCoverService.ts b/sake/src/lib/server/application/services/ManagedBookCoverService.ts index d587c7f..6fea073 100644 --- a/sake/src/lib/server/application/services/ManagedBookCoverService.ts +++ b/sake/src/lib/server/application/services/ManagedBookCoverService.ts @@ -21,6 +21,7 @@ const IMAGE_CONTENT_TYPE_TO_EXTENSION = new Map([ ['image/avif', 'avif'] ]); +export const MIN_MANAGED_BOOK_COVER_BYTES = 1024; export const MAX_MANAGED_BOOK_COVER_BYTES = 10 * 1024 * 1024; export interface ManagedBookCoverResult { @@ -146,6 +147,48 @@ function extensionFromImageContentType(contentType: string | null): string | nul return IMAGE_CONTENT_TYPE_TO_EXTENSION.get(contentType) ?? null; } +function hasImageMagicBytes(buffer: Buffer, contentType: string): boolean { + switch (contentType) { + case 'image/jpeg': + case 'image/jpg': + return buffer.length >= 3 && buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff; + case 'image/png': + return ( + buffer.length >= 8 && + buffer[0] === 0x89 && + buffer[1] === 0x50 && + buffer[2] === 0x4e && + buffer[3] === 0x47 && + buffer[4] === 0x0d && + buffer[5] === 0x0a && + buffer[6] === 0x1a && + buffer[7] === 0x0a + ); + case 'image/gif': { + if (buffer.length < 6) { + return false; + } + const signature = buffer.subarray(0, 6).toString('ascii'); + return signature === 'GIF87a' || signature === 'GIF89a'; + } + case 'image/webp': + return ( + buffer.length >= 12 && + buffer.subarray(0, 4).toString('ascii') === 'RIFF' && + buffer.subarray(8, 12).toString('ascii') === 'WEBP' + ); + case 'image/avif': + return ( + buffer.length >= 12 && + buffer.subarray(4, 8).toString('ascii') === 'ftyp' && + (buffer.subarray(8, 12).toString('ascii') === 'avif' || + buffer.subarray(8, 12).toString('ascii') === 'avis') + ); + default: + return false; + } +} + function parseProtocolRelativeOrAbsoluteUrl(value: string): URL | null { try { if (value.startsWith('//')) { @@ -296,8 +339,20 @@ export class ManagedBookCoverService { return { managedUrl: null, sourceUrl: null }; } + if (extensionFromImageContentType(contentType) === null) { + this.serviceLogger.warn( + { + event: 'library.cover.buffer.unsupported_image_type', + bookStorageKey: input.bookStorageKey, + contentType + }, + 'Managed cover buffer used an unsupported image type' + ); + return { managedUrl: null, sourceUrl: null }; + } + if ( - input.coverBuffer.byteLength === 0 || + input.coverBuffer.byteLength < MIN_MANAGED_BOOK_COVER_BYTES || input.coverBuffer.byteLength > MAX_MANAGED_BOOK_COVER_BYTES ) { this.serviceLogger.warn( @@ -306,7 +361,20 @@ export class ManagedBookCoverService { bookStorageKey: input.bookStorageKey, byteLength: input.coverBuffer.byteLength }, - 'Managed cover buffer was empty or too large' + 'Managed cover buffer was too small or too large' + ); + return { managedUrl: null, sourceUrl: null }; + } + + if (!hasImageMagicBytes(input.coverBuffer, contentType)) { + this.serviceLogger.warn( + { + event: 'library.cover.buffer.invalid_signature', + bookStorageKey: input.bookStorageKey, + contentType, + byteLength: input.coverBuffer.byteLength + }, + 'Managed cover buffer did not match the declared image type' ); return { managedUrl: null, sourceUrl: null }; } @@ -411,7 +479,10 @@ export class ManagedBookCoverService { response, maxBytes: MAX_MANAGED_BOOK_COVER_BYTES }); - if (coverRead.exceededLimit || coverRead.byteLength === 0) { + if ( + coverRead.exceededLimit || + coverRead.byteLength < MIN_MANAGED_BOOK_COVER_BYTES + ) { this.serviceLogger.warn( { event: 'library.cover.fetch.invalid_size', @@ -420,7 +491,22 @@ export class ManagedBookCoverService { sourceUrl: resolvedSourceUrl, byteLength: coverRead.byteLength }, - 'Managed cover fetch returned an empty or oversized payload' + 'Managed cover fetch returned a too-small or oversized payload' + ); + return { managedUrl: null, sourceUrl: resolvedSourceUrl }; + } + + if (!hasImageMagicBytes(coverRead.buffer, contentType)) { + this.serviceLogger.warn( + { + event: 'library.cover.fetch.invalid_signature', + bookStorageKey: input.bookStorageKey, + provider: input.provider, + sourceUrl: resolvedSourceUrl, + contentType, + byteLength: coverRead.byteLength + }, + 'Managed cover fetch returned bytes that did not match the declared image type' ); return { managedUrl: null, sourceUrl: resolvedSourceUrl }; } diff --git a/sake/src/lib/server/application/services/MetadataDescriptionSanitizer.ts b/sake/src/lib/server/application/services/MetadataDescriptionSanitizer.ts new file mode 100644 index 0000000..9906382 --- /dev/null +++ b/sake/src/lib/server/application/services/MetadataDescriptionSanitizer.ts @@ -0,0 +1,45 @@ +import type { MetadataCandidate } from '$lib/server/application/ports/MetadataProviderPort'; + +const ALLOWED_HTML_TAGS = new Set(['p', 'br', 'em', 'strong', 'i', 'b']); +const DANGEROUS_HTML_BLOCK_REGEX = + /<\s*(script|style|iframe|object|embed|svg|math)\b[^>]*>[\s\S]*?<\s*\/\s*\1\s*>/gi; +const DANGEROUS_VOID_TAG_REGEX = + /<\s*(script|style|iframe|object|embed|svg|math)\b[^>]*(?:\/\s*)?>/gi; +const HTML_COMMENT_REGEX = //g; +const HTML_TAG_REGEX = /<\s*(\/?)\s*([a-zA-Z][a-zA-Z0-9:-]*)(?:\s[^>]*)?\s*(\/?)\s*>/g; + +export function sanitizeMetadataDescription( + description: string | null | undefined, + format: MetadataCandidate['descriptionFormat'] +): string | null { + if (typeof description !== 'string') { + return null; + } + + const trimmed = description.trim(); + if (!trimmed) { + return null; + } + + if (format !== 'html') { + return trimmed; + } + + const sanitized = trimmed + .replace(HTML_COMMENT_REGEX, '') + .replace(DANGEROUS_HTML_BLOCK_REGEX, '') + .replace(DANGEROUS_VOID_TAG_REGEX, '') + .replace(HTML_TAG_REGEX, (_match, closing: string, tagName: string) => { + const tag = tagName.toLowerCase(); + if (!ALLOWED_HTML_TAGS.has(tag)) { + return ''; + } + if (tag === 'br') { + return '
'; + } + return closing ? `` : `<${tag}>`; + }) + .trim(); + + return sanitized.length > 0 ? sanitized : null; +} diff --git a/sake/src/lib/server/application/use-cases/ApplyMetadataCandidateUseCase.ts b/sake/src/lib/server/application/use-cases/ApplyMetadataCandidateUseCase.ts new file mode 100644 index 0000000..7ad6eae --- /dev/null +++ b/sake/src/lib/server/application/use-cases/ApplyMetadataCandidateUseCase.ts @@ -0,0 +1,334 @@ +import type { BookRepositoryPort } from '$lib/server/application/ports/BookRepositoryPort'; +import type { + MetadataCandidate, + MetadataCoverCandidate +} from '$lib/server/application/ports/MetadataProviderPort'; +import type { ManagedBookCoverService } from '$lib/server/application/services/ManagedBookCoverService'; +import { sanitizeMetadataDescription } from '$lib/server/application/services/MetadataDescriptionSanitizer'; +import type { Book, UpdateBookMetadataInput } from '$lib/server/domain/entities/Book'; +import { apiError, apiOk, type ApiResult } from '$lib/server/http/api'; +import { normalizeAuthor } from '$lib/utils/author'; +import { validatePublicationDateParts } from '$lib/utils/publicationDate'; + +export const APPLY_METADATA_FIELD_SELECTIONS = [ + 'title', + 'author', + 'publisher', + 'series', + 'seriesIndex', + 'identifier', + 'pages', + 'description', + 'language', + 'publishedDate', + 'year', + 'month', + 'day', + 'googleBooksId', + 'openLibraryKey', + 'amazonAsin', + 'externalRating', + 'externalRatingCount' +] as const; + +export type ApplyMetadataFieldSelection = (typeof APPLY_METADATA_FIELD_SELECTIONS)[number]; + +export interface ApplyMetadataCandidateInput { + bookId: number; + candidate: MetadataCandidate; + fieldSelections: ApplyMetadataFieldSelection[]; + coverChoice?: MetadataCoverCandidate | null; +} + +interface AppliedMetadataBook { + id: number; + zLibId: string | null; + title: string; + author: string | null; + publisher: string | null; + series: string | null; + volume: string | null; + seriesIndex: number | null; + edition: string | null; + identifier: string | null; + pages: number | null; + description: string | null; + googleBooksId: string | null; + openLibraryKey: string | null; + amazonAsin: string | null; + externalRating: number | null; + externalRatingCount: number | null; + cover: string | null; + extension: string | null; + filesize: number | null; + language: string | null; + year: number | null; + month: number | null; + day: number | null; + createdAt: string | null; +} + +export interface ApplyMetadataCandidateResult { + success: true; + book: AppliedMetadataBook; +} + +type MetadataPatch = Partial<{ + title: string; + author: string | null; + publisher: string | null; + series: string | null; + seriesIndex: number | null; + identifier: string | null; + pages: number | null; + description: string | null; + language: string | null; + year: number | null; + month: number | null; + day: number | null; + googleBooksId: string | null; + openLibraryKey: string | null; + amazonAsin: string | null; + externalRating: number | null; + externalRatingCount: number | null; + cover: string | null; +}>; + +const APPLY_METADATA_FIELD_SELECTION_SET = new Set(APPLY_METADATA_FIELD_SELECTIONS); + +export function isApplyMetadataFieldSelection(value: string): value is ApplyMetadataFieldSelection { + return APPLY_METADATA_FIELD_SELECTION_SET.has(value); +} + +function trimToNull(value: string | null | undefined): string | null { + if (typeof value !== 'string') { + return null; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; +} + +function finiteNumberOrNull(value: number | null | undefined): number | null { + return typeof value === 'number' && Number.isFinite(value) ? value : null; +} + +function authorsToAuthorField(authors: string[]): string | null { + const joined = authors.flatMap((author) => normalizeAuthor(author) ?? []).join(', '); + return joined.length > 0 ? joined : null; +} + +function preferredIdentifier(candidate: MetadataCandidate): string | null { + return trimToNull(candidate.identifiers.isbn13) ?? trimToNull(candidate.identifiers.isbn10); +} + +function buildPatch( + candidate: MetadataCandidate, + fieldSelections: ApplyMetadataFieldSelection[] +): MetadataPatch { + const patch: MetadataPatch = {}; + + for (const field of fieldSelections) { + switch (field) { + case 'title': + patch.title = candidate.title.trim(); + break; + case 'author': + patch.author = authorsToAuthorField(candidate.authors); + break; + case 'publisher': + patch.publisher = trimToNull(candidate.publisher); + break; + case 'series': + patch.series = trimToNull(candidate.series); + break; + case 'seriesIndex': + patch.seriesIndex = finiteNumberOrNull(candidate.seriesIndex); + break; + case 'identifier': + patch.identifier = preferredIdentifier(candidate); + break; + case 'pages': + patch.pages = finiteNumberOrNull(candidate.pageCount); + break; + case 'description': + patch.description = sanitizeMetadataDescription( + candidate.description, + candidate.descriptionFormat + ); + break; + case 'language': + patch.language = trimToNull(candidate.language); + break; + case 'publishedDate': + patch.year = finiteNumberOrNull(candidate.publishedDate.year); + patch.month = finiteNumberOrNull(candidate.publishedDate.month); + patch.day = finiteNumberOrNull(candidate.publishedDate.day); + break; + case 'year': + patch.year = finiteNumberOrNull(candidate.publishedDate.year); + break; + case 'month': + patch.month = finiteNumberOrNull(candidate.publishedDate.month); + break; + case 'day': + patch.day = finiteNumberOrNull(candidate.publishedDate.day); + break; + case 'googleBooksId': + patch.googleBooksId = trimToNull(candidate.identifiers.googleBooksId); + break; + case 'openLibraryKey': + patch.openLibraryKey = trimToNull(candidate.identifiers.openLibraryKey); + break; + case 'amazonAsin': + patch.amazonAsin = trimToNull(candidate.identifiers.asin); + break; + case 'externalRating': + patch.externalRating = finiteNumberOrNull(candidate.rating.average); + break; + case 'externalRatingCount': + patch.externalRatingCount = finiteNumberOrNull(candidate.rating.count); + break; + } + } + + return patch; +} + +function normalizePublicationDatePatch( + existing: Book, + patch: MetadataPatch +): { year: number | null; month: number | null; day: number | null } { + const year = patch.year === undefined ? existing.year : patch.year; + const month = patch.month === undefined ? existing.month : patch.month; + const day = patch.day === undefined ? existing.day : patch.day; + + if (year === null) { + return { year: null, month: null, day: null }; + } + if (month === null) { + return { year, month: null, day: null }; + } + return { year, month, day }; +} + +function buildUpdateMetadataInput(existing: Book, patch: MetadataPatch): UpdateBookMetadataInput { + const nextTitle = patch.title === undefined ? existing.title : patch.title; + const nextPublicationDate = normalizePublicationDatePatch(existing, patch); + + return { + zLibId: existing.zLibId, + title: nextTitle, + author: patch.author === undefined ? existing.author : patch.author, + publisher: patch.publisher === undefined ? existing.publisher : patch.publisher, + series: patch.series === undefined ? existing.series : patch.series, + volume: existing.volume, + series_index: patch.seriesIndex === undefined ? existing.series_index : patch.seriesIndex, + edition: existing.edition, + identifier: patch.identifier === undefined ? existing.identifier : patch.identifier, + pages: patch.pages === undefined ? existing.pages : patch.pages, + description: patch.description === undefined ? existing.description : patch.description, + google_books_id: + patch.googleBooksId === undefined ? existing.google_books_id : patch.googleBooksId, + open_library_key: + patch.openLibraryKey === undefined ? existing.open_library_key : patch.openLibraryKey, + amazon_asin: patch.amazonAsin === undefined ? existing.amazon_asin : patch.amazonAsin, + external_rating: + patch.externalRating === undefined ? existing.external_rating : patch.externalRating, + external_rating_count: + patch.externalRatingCount === undefined + ? existing.external_rating_count + : patch.externalRatingCount, + cover: patch.cover === undefined ? existing.cover : patch.cover, + extension: existing.extension, + filesize: existing.filesize, + language: patch.language === undefined ? existing.language : patch.language, + year: nextPublicationDate.year, + month: nextPublicationDate.month, + day: nextPublicationDate.day, + createdAt: existing.createdAt + }; +} + +function toAppliedMetadataBook(book: Book): AppliedMetadataBook { + return { + id: book.id, + zLibId: book.zLibId, + title: book.title, + author: book.author, + publisher: book.publisher, + series: book.series, + volume: book.volume, + seriesIndex: book.series_index, + edition: book.edition, + identifier: book.identifier, + pages: book.pages, + description: book.description, + googleBooksId: book.google_books_id, + openLibraryKey: book.open_library_key, + amazonAsin: book.amazon_asin, + externalRating: book.external_rating, + externalRatingCount: book.external_rating_count, + cover: book.cover, + extension: book.extension, + filesize: book.filesize, + language: book.language, + year: book.year, + month: book.month, + day: book.day, + createdAt: book.createdAt + }; +} + +export class ApplyMetadataCandidateUseCase { + constructor( + private readonly bookRepository: BookRepositoryPort, + private readonly managedBookCoverService: Pick + ) {} + + async execute( + input: ApplyMetadataCandidateInput + ): Promise> { + if (input.fieldSelections.length === 0 && !input.coverChoice) { + return apiError('At least one field selection or cover choice is required', 400); + } + + const existing = await this.bookRepository.getById(input.bookId); + if (!existing) { + return apiError('Book not found', 404); + } + + const patch = buildPatch(input.candidate, [...new Set(input.fieldSelections)]); + + if (input.coverChoice) { + const importedCover = await this.managedBookCoverService.storeFromExternalUrl({ + bookStorageKey: existing.s3_storage_key, + coverUrl: input.coverChoice.url + }); + if (!importedCover.managedUrl) { + return apiError('Failed to import cover image', 502); + } + patch.cover = importedCover.managedUrl; + } + + const metadata = buildUpdateMetadataInput(existing, patch); + if (metadata.title.trim().length === 0) { + return apiError('title cannot be empty', 400); + } + + const publicationDateError = validatePublicationDateParts({ + year: metadata.year, + month: metadata.month, + day: metadata.day + }); + if (publicationDateError) { + return apiError(publicationDateError, 400); + } + + const updated = await this.bookRepository.updateMetadata(input.bookId, metadata); + + return apiOk({ + success: true, + book: toAppliedMetadataBook(updated) + }); + } +} diff --git a/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts index 2577caf..5e2ee75 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts @@ -15,6 +15,7 @@ import { normalizeForMatch, parseProviderPublicationDate } from './metadataProviderUtils'; +import { normalizeAuthorForMatch } from '$lib/utils/author'; const TOUCHED_FIELDS = new Set([ 'title', @@ -104,7 +105,7 @@ export class GoogleBooksMetadataProvider implements MetadataProviderPort { } const normalizedTitle = normalizeForMatch(query.title); - const normalizedAuthor = normalizeForMatch(query.author); + const normalizedAuthor = normalizeAuthorForMatch(query.author); const targetLangTokens = languageTokens(query.language); const scoreItem = (item: (typeof items)[number]): number => { @@ -113,7 +114,7 @@ export class GoogleBooksMetadataProvider implements MetadataProviderPort { const hasTitleMatch = normalizedTitle.length > 0 && title.includes(normalizedTitle); const hasAuthorMatch = normalizedAuthor.length > 0 && - authors.some((a) => normalizeForMatch(a).includes(normalizedAuthor)); + authors.some((a) => normalizeAuthorForMatch(a).includes(normalizedAuthor)); const pages = asPositiveNumber(item.volumeInfo?.pageCount); const langScoreVal = languageScore(targetLangTokens, [item.volumeInfo?.language]); return (hasTitleMatch ? 5 : 0) + (hasAuthorMatch ? 3 : 0) + (pages ? 2 : 0) + langScoreVal; diff --git a/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts index f83d303..7ceebf5 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts @@ -15,6 +15,7 @@ import { normalizeForMatch, parseProviderPublicationDate } from './metadataProviderUtils'; +import { normalizeAuthorForMatch } from '$lib/utils/author'; // --------------------------------------------------------------------------- // Smoothed rate limiter — 60 requests per minute (Hardcover's stated limit) @@ -194,14 +195,14 @@ function mapBookToCandidate(book: HardcoverBook, query: MetadataQuery): Metadata const subjects = parseTags(book.cached_tags); const normalizedTitle = normalizeForMatch(query.title); - const normalizedAuthor = normalizeForMatch(query.author); + const normalizedAuthor = normalizeAuthorForMatch(query.author); const targetLangTokens = languageTokens(query.language); const titleMatch = normalizedTitle.length > 0 && normalizeForMatch(book.title).includes(normalizedTitle); const authorMatch = normalizedAuthor.length > 0 && - authors.some((a) => normalizeForMatch(a).includes(normalizedAuthor)); + authors.some((a) => normalizeAuthorForMatch(a).includes(normalizedAuthor)); const langScoreVal = languageScore(targetLangTokens, [edition?.language?.language]); const providerScore = diff --git a/sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts new file mode 100644 index 0000000..169c1cf --- /dev/null +++ b/sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts @@ -0,0 +1,292 @@ +import { apiError, apiOk, type ApiResult } from '$lib/server/http/api'; +import type { + MetadataCandidate, + MetadataProviderCapabilities, + MetadataProviderPort, + MetadataQuery +} from '$lib/server/application/ports/MetadataProviderPort'; +import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; +import { + asString, + languageScore, + languageTokens, + normalizeForMatch, + parseProviderPublicationDate +} from './metadataProviderUtils'; +import { normalizeAuthorForMatch } from '$lib/utils/author'; + +const ISBNDB_API_BASE_URL = 'https://api.isbndb.com'; +const DEFAULT_QUERY_LIMIT = 5; +const MAX_QUERY_LIMIT = 10; +const USER_AGENT = 'Sake/1.0 (+https://github.com/Sudashiii/Sake)'; + +const TOUCHED_FIELDS = new Set([ + 'title', + 'authors', + 'description', + 'publisher', + 'publishedDate', + 'pageCount', + 'covers', + 'subjects', + 'language', + 'identifiers' +]); + +interface IsbnDbBook { + authors?: unknown; + date_published?: unknown; + excerpt?: unknown; + image?: unknown; + cover?: unknown; + isbn?: unknown; + isbn13?: unknown; + language?: unknown; + overview?: unknown; + pages?: unknown; + publisher?: unknown; + subjects?: unknown; + synopsis?: unknown; + synopsys?: unknown; + title?: unknown; + title_long?: unknown; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function stringArray(value: unknown): string[] { + if (Array.isArray(value)) { + return value.flatMap((entry) => { + const text = asString(entry); + return text ? [text] : []; + }); + } + + const text = asString(value); + return text ? [text] : []; +} + +function positiveNumber(value: unknown): number | null { + if (typeof value === 'number' && Number.isFinite(value) && value > 0) { + return value; + } + + if (typeof value === 'string') { + const parsed = Number.parseInt(value, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : null; + } + + return null; +} + +function normalizeLimit(limit: number | undefined): number { + if (limit == null || !Number.isFinite(limit)) { + return DEFAULT_QUERY_LIMIT; + } + return Math.min(Math.max(Math.floor(limit), 1), MAX_QUERY_LIMIT); +} + +function normalizeIsbn(value: string | null | undefined): string { + return (value ?? '').replace(/[^0-9Xx]/g, '').toUpperCase(); +} + +function extractBooks(payload: unknown): IsbnDbBook[] { + if (Array.isArray(payload)) { + return payload.filter(isRecord); + } + + if (!isRecord(payload)) { + return []; + } + + if (isRecord(payload.book)) { + return [payload.book]; + } + + if (Array.isArray(payload.books)) { + return payload.books.filter(isRecord); + } + + if ('title' in payload || 'title_long' in payload || 'isbn' in payload || 'isbn13' in payload) { + return [payload]; + } + + return []; +} + +function firstDescription(book: IsbnDbBook): string | null { + return ( + asString(book.overview) ?? + asString(book.synopsys) ?? + asString(book.synopsis) ?? + asString(book.excerpt) + ); +} + +function scoreBook(book: IsbnDbBook, query: MetadataQuery): number { + const targetIsbn = normalizeIsbn(query.isbn); + const isbn10 = normalizeIsbn(asString(book.isbn)); + const isbn13 = normalizeIsbn(asString(book.isbn13)); + const title = asString(book.title_long) ?? asString(book.title) ?? ''; + const authors = stringArray(book.authors); + const targetLangTokens = languageTokens(query.language); + + const normalizedTitle = normalizeForMatch(query.title); + const normalizedAuthor = normalizeAuthorForMatch(query.author); + const hasTitleMatch = + normalizedTitle.length > 0 && normalizeForMatch(title).includes(normalizedTitle); + const hasAuthorMatch = + normalizedAuthor.length > 0 && + authors.some((author) => normalizeAuthorForMatch(author).includes(normalizedAuthor)); + const hasIsbnMatch = + targetIsbn.length > 0 && (targetIsbn === isbn10 || targetIsbn === isbn13); + const hasCover = Boolean(asString(book.image) ?? asString(book.cover)); + const hasDescription = Boolean(firstDescription(book)); + + return ( + (hasIsbnMatch ? 10 : 0) + + (hasTitleMatch ? 5 : 0) + + (hasAuthorMatch ? 3 : 0) + + languageScore(targetLangTokens, [asString(book.language)]) + + (positiveNumber(book.pages) ? 2 : 0) + + (hasCover ? 1 : 0) + + (hasDescription ? 1 : 0) + ); +} + +function mapBookToCandidate(book: IsbnDbBook, query: MetadataQuery): MetadataCandidate { + const isbn10 = asString(book.isbn); + const isbn13 = asString(book.isbn13); + const title = asString(book.title_long) ?? asString(book.title) ?? ''; + const imageUrl = asString(book.image) ?? asString(book.cover); + const sourceIsbn = isbn13 ?? isbn10; + + return { + providerId: 'isbndb', + providerScore: scoreBook(book, query), + identifiers: { + isbn10, + isbn13, + asin: null, + googleBooksId: null, + openLibraryKey: null, + hardcoverId: null + }, + title, + subtitle: null, + authors: stringArray(book.authors), + description: firstDescription(book), + descriptionFormat: 'text', + subjects: stringArray(book.subjects).slice(0, 20), + series: null, + seriesIndex: null, + publisher: asString(book.publisher), + publishedDate: parseProviderPublicationDate(asString(book.date_published)), + language: asString(book.language), + pageCount: positiveNumber(book.pages), + covers: imageUrl ? [{ url: imageUrl, source: 'isbndb' }] : [], + rating: { average: null, count: null }, + sourceUrl: sourceIsbn ? `https://isbndb.com/book/${encodeURIComponent(sourceIsbn)}` : null + } satisfies MetadataCandidate; +} + +async function fetchJson(url: string, apiKey: string): Promise> { + try { + const response = await fetch(url, { + headers: { + Accept: 'application/json', + 'User-Agent': USER_AGENT, + 'x-api-key': apiKey + } + }); + + if (response.status === 404) { + return apiOk({ books: [] }); + } + + if (response.status === 401 || response.status === 403) { + return apiError('ISBNdb API key was rejected', response.status); + } + + if (!response.ok) { + return apiError(`ISBNdb API returned ${response.status}`, 502); + } + + return apiOk(await response.json()); + } catch (cause: unknown) { + return apiError('ISBNdb lookup failed', 502, cause); + } +} + +export class IsbnDbMetadataProvider implements MetadataProviderPort { + readonly id: MetadataProviderId = 'isbndb'; + + readonly capabilities: MetadataProviderCapabilities = { + touchedFields: TOUCHED_FIELDS, + hasCover: true, + hasRating: false, + requiresIsbn: false + }; + + async lookup(query: MetadataQuery): Promise> { + const apiKey = process.env.ISBNDB_API_KEY?.trim(); + if (!apiKey) { + return apiError('ISBNDB_API_KEY is not configured', 503); + } + + const limit = normalizeLimit(query.limit); + + if (query.isbn) { + const result = await this.lookupByIsbn(apiKey, query.isbn, query); + if (!result.ok || result.value.length > 0 || !query.title) { + return result; + } + } + + if (query.title) { + return this.searchByTitle(apiKey, query, limit); + } + + return apiError('No query terms provided for ISBNdb lookup', 400); + } + + private async lookupByIsbn( + apiKey: string, + isbn: string, + query: MetadataQuery + ): Promise> { + const url = `${ISBNDB_API_BASE_URL}/book/${encodeURIComponent(isbn)}`; + const result = await fetchJson(url, apiKey); + if (!result.ok) { + return result; + } + + const candidates = extractBooks(result.value).map((book) => mapBookToCandidate(book, query)); + candidates.sort((a, b) => b.providerScore - a.providerScore); + return apiOk(candidates); + } + + private async searchByTitle( + apiKey: string, + query: MetadataQuery, + limit: number + ): Promise> { + const url = new URL(`${ISBNDB_API_BASE_URL}/books/${encodeURIComponent(query.title ?? '')}`); + url.searchParams.set('page', '1'); + url.searchParams.set('pageSize', String(limit)); + if (query.author) { + url.searchParams.set('author', query.author); + } + + const result = await fetchJson(url.toString(), apiKey); + if (!result.ok) { + return result; + } + + const candidates = extractBooks(result.value).map((book) => mapBookToCandidate(book, query)); + candidates.sort((a, b) => b.providerScore - a.providerScore); + return apiOk(candidates); + } +} diff --git a/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts index 081646a..f9ee4b4 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts @@ -3,6 +3,7 @@ import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; import { GoogleBooksMetadataProvider } from './googleBooksMetadataProvider'; import { OpenLibraryMetadataProvider } from './openLibraryMetadataProvider'; import { HardcoverMetadataProvider } from './hardcoverMetadataProvider'; +import { IsbnDbMetadataProvider } from './isbndbMetadataProvider'; export function createMetadataProvider(providerId: MetadataProviderId): MetadataProviderPort | null { switch (providerId) { @@ -14,7 +15,8 @@ export function createMetadataProvider(providerId: MetadataProviderId): Metadata // Only instantiate when token is configured; silently skipped otherwise return process.env.HARDCOVER_API_TOKEN?.trim() ? new HardcoverMetadataProvider() : null; case 'isbndb': - return null; // not yet implemented — Phase 6 + // Only instantiate when key is configured; silently skipped otherwise + return process.env.ISBNDB_API_KEY?.trim() ? new IsbnDbMetadataProvider() : null; default: { const exhaustiveId: never = providerId; throw new Error(`Unsupported metadata provider: ${exhaustiveId}`); diff --git a/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts index f0349a8..5f1f386 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts @@ -14,6 +14,7 @@ import { languageTokens, normalizeForMatch } from './metadataProviderUtils'; +import { normalizeAuthorForMatch } from '$lib/utils/author'; const TOUCHED_FIELDS = new Set([ 'title', @@ -94,7 +95,7 @@ export class OpenLibraryMetadataProvider implements MetadataProviderPort { } const normalizedTitle = normalizeForMatch(query.title); - const normalizedAuthor = normalizeForMatch(query.author); + const normalizedAuthor = normalizeAuthorForMatch(query.author); const scoreDoc = (doc: (typeof docs)[number]): number => { const title = normalizeForMatch(doc.title); @@ -102,7 +103,7 @@ export class OpenLibraryMetadataProvider implements MetadataProviderPort { const hasTitleMatch = normalizedTitle.length > 0 && title.includes(normalizedTitle); const hasAuthorMatch = normalizedAuthor.length > 0 && - authors.some((a) => normalizeForMatch(a).includes(normalizedAuthor)); + authors.some((a) => normalizeAuthorForMatch(a).includes(normalizedAuthor)); const pages = asPositiveNumber(doc.number_of_pages_median); const langScoreVal = languageScore(targetLangTokens, doc.language ?? []); return (hasTitleMatch ? 5 : 0) + (hasAuthorMatch ? 3 : 0) + (pages ? 2 : 0) + langScoreVal; diff --git a/sake/src/lib/utils/author.ts b/sake/src/lib/utils/author.ts new file mode 100644 index 0000000..e403c24 --- /dev/null +++ b/sake/src/lib/utils/author.ts @@ -0,0 +1,12 @@ +export function normalizeAuthor(raw: string | null | undefined): string | null { + if (typeof raw !== 'string') { + return null; + } + + const normalized = raw.replace(/\s+/g, ' ').trim(); + return normalized.length > 0 ? normalized : null; +} + +export function normalizeAuthorForMatch(raw: string | null | undefined): string { + return normalizeAuthor(raw)?.toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim() ?? ''; +} diff --git a/sake/tests/library/managedBookCoverService.test.ts b/sake/tests/library/managedBookCoverService.test.ts index 2291c15..d03ec42 100644 --- a/sake/tests/library/managedBookCoverService.test.ts +++ b/sake/tests/library/managedBookCoverService.test.ts @@ -4,6 +4,7 @@ import { buildManagedBookCoverVersionToken, buildVersionedManagedBookCoverUrl, ManagedBookCoverService, + MIN_MANAGED_BOOK_COVER_BYTES, MAX_MANAGED_BOOK_COVER_BYTES } from '$lib/server/application/services/ManagedBookCoverService'; import type { StoragePort } from '$lib/server/application/ports/StoragePort'; @@ -12,6 +13,32 @@ function toArrayBuffer(buffer: Buffer): ArrayBuffer { return Uint8Array.from(buffer).buffer as ArrayBuffer; } +function createImageBuffer(contentType: string, fill = 1): Buffer { + const buffer = Buffer.alloc(MIN_MANAGED_BOOK_COVER_BYTES, fill); + switch (contentType) { + case 'image/jpeg': + buffer.set([0xff, 0xd8, 0xff], 0); + break; + case 'image/png': + buffer.set([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a], 0); + break; + case 'image/gif': + buffer.write('GIF89a', 0, 'ascii'); + break; + case 'image/webp': + buffer.write('RIFF', 0, 'ascii'); + buffer.write('WEBP', 8, 'ascii'); + break; + case 'image/avif': + buffer.write('ftyp', 4, 'ascii'); + buffer.write('avif', 8, 'ascii'); + break; + default: + throw new Error(`Unsupported test image content type: ${contentType}`); + } + return buffer; +} + function createResponse(input: { url: string; status?: number; @@ -42,6 +69,7 @@ function createResponse(input: { describe('ManagedBookCoverService', () => { test('downloads trusted OpenLibrary covers into managed storage', async () => { + const coverBuffer = createImageBuffer('image/jpeg'); const stored: Array<{ key: string; body: Buffer | Uint8Array | NodeJS.ReadableStream; @@ -67,9 +95,9 @@ describe('ManagedBookCoverService', () => { url: 'https://covers.openlibrary.org/b/id/123-L.jpg', headers: { 'content-type': 'image/jpeg', - 'content-length': '3' + 'content-length': String(coverBuffer.byteLength) }, - body: Buffer.from([1, 2, 3]) + body: coverBuffer }); }); @@ -82,14 +110,14 @@ describe('ManagedBookCoverService', () => { assert.deepEqual(result, { managedUrl: buildVersionedManagedBookCoverUrl( 'example.epub.jpg', - buildManagedBookCoverVersionToken(Buffer.from([1, 2, 3])) + buildManagedBookCoverVersionToken(coverBuffer) ), sourceUrl: 'https://covers.openlibrary.org/b/id/123-L.jpg' }); assert.equal(stored.length, 1); assert.equal(stored[0]?.key, 'covers/example.epub.jpg'); assert.equal(stored[0]?.contentType, 'image/jpeg'); - assert.deepEqual(Buffer.from(stored[0]?.body as Buffer), Buffer.from([1, 2, 3])); + assert.deepEqual(Buffer.from(stored[0]?.body as Buffer), coverBuffer); }); test('skips untrusted source URLs before fetch', async () => { @@ -200,8 +228,85 @@ describe('ManagedBookCoverService', () => { assert.equal(putCalls, 0); }); + test('falls back when a fetched cover is smaller than the minimum size', async () => { + let putCalls = 0; + + const storage = { + async put(): Promise { + putCalls += 1; + }, + async get(): Promise { + throw new Error('not implemented'); + }, + async delete(): Promise {}, + async list(): Promise<[]> { + return []; + } + } as StoragePort; + + const service = new ManagedBookCoverService(storage, async () => + createResponse({ + url: 'https://books.google.com/books/content?id=test-cover', + headers: { + 'content-type': 'image/jpeg' + }, + body: Buffer.from([0xff, 0xd8, 0xff]) + }) + ); + + const result = await service.storeFromExternalUrl({ + bookStorageKey: 'example.epub', + coverUrl: 'https://books.google.com/books/content?id=test-cover' + }); + + assert.deepEqual(result, { + managedUrl: null, + sourceUrl: 'https://books.google.com/books/content?id=test-cover' + }); + assert.equal(putCalls, 0); + }); + + test('falls back when fetched cover bytes do not match the declared image type', async () => { + let putCalls = 0; + + const storage = { + async put(): Promise { + putCalls += 1; + }, + async get(): Promise { + throw new Error('not implemented'); + }, + async delete(): Promise {}, + async list(): Promise<[]> { + return []; + } + } as StoragePort; + + const service = new ManagedBookCoverService(storage, async () => + createResponse({ + url: 'https://books.google.com/books/content?id=test-cover', + headers: { + 'content-type': 'image/jpeg' + }, + body: Buffer.alloc(MIN_MANAGED_BOOK_COVER_BYTES, 7) + }) + ); + + const result = await service.storeFromExternalUrl({ + bookStorageKey: 'example.epub', + coverUrl: 'https://books.google.com/books/content?id=test-cover' + }); + + assert.deepEqual(result, { + managedUrl: null, + sourceUrl: 'https://books.google.com/books/content?id=test-cover' + }); + assert.equal(putCalls, 0); + }); + test('normalizes relative Z-Library covers and forwards auth cookies', async () => { let requestHeaders: Headers | null = null; + const coverBuffer = createImageBuffer('image/webp', 4); const storage = { async put(): Promise {}, @@ -222,7 +327,7 @@ describe('ManagedBookCoverService', () => { headers: { 'content-type': 'image/webp' }, - body: Buffer.from([4, 5, 6]) + body: coverBuffer }); }); @@ -239,7 +344,7 @@ describe('ManagedBookCoverService', () => { assert.deepEqual(result, { managedUrl: buildVersionedManagedBookCoverUrl( 'example.epub.webp', - buildManagedBookCoverVersionToken(Buffer.from([4, 5, 6])) + buildManagedBookCoverVersionToken(coverBuffer) ), sourceUrl: 'https://1lib.sk/covers/books/123.webp' }); @@ -253,6 +358,7 @@ describe('ManagedBookCoverService', () => { test('accepts protocol-relative Z-Library CDN cover URLs without leaking auth cookies', async () => { let requestHeaders: Headers | null = null; + const coverBuffer = createImageBuffer('image/jpeg', 7); const storage = { async put(): Promise {}, @@ -273,7 +379,7 @@ describe('ManagedBookCoverService', () => { headers: { 'content-type': 'image/jpeg' }, - body: Buffer.from([7, 8, 9]) + body: coverBuffer }); }); @@ -290,7 +396,7 @@ describe('ManagedBookCoverService', () => { assert.deepEqual(result, { managedUrl: buildVersionedManagedBookCoverUrl( 'example.epub.jpg', - buildManagedBookCoverVersionToken(Buffer.from([7, 8, 9])) + buildManagedBookCoverVersionToken(coverBuffer) ), sourceUrl: 'https://cdn.example.com/covers/books/123.jpg' }); @@ -303,6 +409,7 @@ describe('ManagedBookCoverService', () => { test('imports arbitrary external HTTPS cover URLs for manual library metadata actions', async () => { let requestedUrl: string | null = null; + const coverBuffer = createImageBuffer('image/jpeg', 9); const storage = { async put(): Promise {}, @@ -322,7 +429,7 @@ describe('ManagedBookCoverService', () => { headers: { 'content-type': 'image/jpeg' }, - body: Buffer.from([9, 8, 7]) + body: coverBuffer }); }); @@ -335,7 +442,7 @@ describe('ManagedBookCoverService', () => { assert.deepEqual(result, { managedUrl: buildVersionedManagedBookCoverUrl( 'example.epub.jpg', - buildManagedBookCoverVersionToken(Buffer.from([9, 8, 7])) + buildManagedBookCoverVersionToken(coverBuffer) ), sourceUrl: 'https://books.google.com/books/content?id=test-cover' }); @@ -343,6 +450,7 @@ describe('ManagedBookCoverService', () => { test('imports arbitrary external HTTP cover URLs for manual library metadata actions', async () => { let requestedUrl: string | null = null; + const coverBuffer = createImageBuffer('image/jpeg', 8); const storage = { async put(): Promise {}, @@ -362,7 +470,7 @@ describe('ManagedBookCoverService', () => { headers: { 'content-type': 'image/jpeg' }, - body: Buffer.from([9, 8, 7]) + body: coverBuffer }); }); @@ -375,13 +483,14 @@ describe('ManagedBookCoverService', () => { assert.deepEqual(result, { managedUrl: buildVersionedManagedBookCoverUrl( 'example.epub.jpg', - buildManagedBookCoverVersionToken(Buffer.from([9, 8, 7])) + buildManagedBookCoverVersionToken(coverBuffer) ), sourceUrl: 'http://books.google.com/books/content?id=test-cover' }); }); test('stores embedded cover buffers into managed storage', async () => { + const coverBuffer = createImageBuffer('image/png'); const stored: Array<{ key: string; body: Buffer | Uint8Array | NodeJS.ReadableStream; @@ -404,21 +513,21 @@ describe('ManagedBookCoverService', () => { const service = new ManagedBookCoverService(storage); const result = await service.storeFromBuffer({ bookStorageKey: 'example.epub', - coverBuffer: Buffer.from([1, 2, 3, 4]), + coverBuffer, contentType: 'image/png' }); assert.deepEqual(result, { managedUrl: buildVersionedManagedBookCoverUrl( 'example.epub.png', - buildManagedBookCoverVersionToken(Buffer.from([1, 2, 3, 4])) + buildManagedBookCoverVersionToken(coverBuffer) ), sourceUrl: null }); assert.equal(stored.length, 1); assert.equal(stored[0]?.key, 'covers/example.epub.png'); assert.equal(stored[0]?.contentType, 'image/png'); - assert.deepEqual(Buffer.from(stored[0]?.body as Buffer), Buffer.from([1, 2, 3, 4])); + assert.deepEqual(Buffer.from(stored[0]?.body as Buffer), coverBuffer); }); test('rejects embedded cover buffers with unsupported content types', async () => { @@ -438,7 +547,7 @@ describe('ManagedBookCoverService', () => { const service = new ManagedBookCoverService(storage); const result = await service.storeFromBuffer({ bookStorageKey: 'example.epub', - coverBuffer: Buffer.from([1, 2, 3, 4]), + coverBuffer: createImageBuffer('image/png'), contentType: 'image/svg+xml' }); @@ -448,6 +557,41 @@ describe('ManagedBookCoverService', () => { }); }); + test('rejects embedded cover buffers that are too small or have an invalid signature', async () => { + const storage = { + async put(): Promise { + throw new Error('should not upload'); + }, + async get(): Promise { + throw new Error('not implemented'); + }, + async delete(): Promise {}, + async list(): Promise<[]> { + return []; + } + } as StoragePort; + + const service = new ManagedBookCoverService(storage); + + assert.deepEqual( + await service.storeFromBuffer({ + bookStorageKey: 'example.epub', + coverBuffer: Buffer.from([0xff, 0xd8, 0xff]), + contentType: 'image/jpeg' + }), + { managedUrl: null, sourceUrl: null } + ); + + assert.deepEqual( + await service.storeFromBuffer({ + bookStorageKey: 'example.epub', + coverBuffer: Buffer.alloc(MIN_MANAGED_BOOK_COVER_BYTES, 1), + contentType: 'image/jpeg' + }), + { managedUrl: null, sourceUrl: null } + ); + }); + test('returns a fresh managed URL when replacing a cover with the same file extension', async () => { const storedKeys: string[] = []; @@ -465,14 +609,16 @@ describe('ManagedBookCoverService', () => { } as StoragePort; const service = new ManagedBookCoverService(storage); + const firstBuffer = createImageBuffer('image/jpeg', 3); + const secondBuffer = createImageBuffer('image/jpeg', 6); const first = await service.storeFromBuffer({ bookStorageKey: 'example.epub', - coverBuffer: Buffer.from([1, 2, 3]), + coverBuffer: firstBuffer, contentType: 'image/jpeg' }); const second = await service.storeFromBuffer({ bookStorageKey: 'example.epub', - coverBuffer: Buffer.from([4, 5, 6]), + coverBuffer: secondBuffer, contentType: 'image/jpeg' }); diff --git a/sake/tests/metadata/applyMetadataCandidateUseCase.test.ts b/sake/tests/metadata/applyMetadataCandidateUseCase.test.ts new file mode 100644 index 0000000..5d99e71 --- /dev/null +++ b/sake/tests/metadata/applyMetadataCandidateUseCase.test.ts @@ -0,0 +1,364 @@ +import assert from 'node:assert/strict'; +import { describe, test } from 'node:test'; +import type { BookRepositoryPort } from '$lib/server/application/ports/BookRepositoryPort'; +import type { + MetadataCandidate, + MetadataCoverCandidate +} from '$lib/server/application/ports/MetadataProviderPort'; +import type { ManagedBookCoverResult } from '$lib/server/application/services/ManagedBookCoverService'; +import { ApplyMetadataCandidateUseCase } from '$lib/server/application/use-cases/ApplyMetadataCandidateUseCase'; +import type { Book, UpdateBookMetadataInput } from '$lib/server/domain/entities/Book'; + +function createBook(overrides: Partial = {}): Book { + return { + id: 1, + zLibId: null, + s3_storage_key: 'dune.epub', + title: 'Dune', + author: 'Frank Herbert', + publisher: null, + series: null, + volume: null, + series_index: null, + edition: null, + identifier: null, + pages: null, + description: null, + google_books_id: null, + open_library_key: null, + amazon_asin: null, + external_rating: null, + external_rating_count: null, + cover: 'https://books.google.com/books/content?id=old', + extension: 'epub', + filesize: 10, + language: null, + year: null, + month: null, + day: null, + progress_storage_key: null, + progress_updated_at: null, + progress_percent: null, + progress_before_read: null, + rating: null, + read_at: null, + archived_at: null, + exclude_from_new_books: false, + createdAt: '2026-01-01T00:00:00.000Z', + deleted_at: null, + trash_expires_at: null, + ...overrides + }; +} + +function createCandidate(overrides: Partial = {}): MetadataCandidate { + return { + providerId: 'googlebooks', + providerScore: 1, + identifiers: { + isbn10: '0441172695', + isbn13: '9780441172696', + asin: 'B000FC1BN8', + googleBooksId: 'gb-dune-messiah', + openLibraryKey: 'OL893415W', + hardcoverId: null + }, + title: 'Dune Messiah', + subtitle: null, + authors: ['Frank Herbert'], + description: 'The second Dune novel.', + descriptionFormat: 'text', + subjects: ['Science Fiction'], + series: 'Dune', + seriesIndex: 2, + publisher: 'Ace', + publishedDate: { + year: 1969, + month: 7, + day: 1 + }, + language: 'en', + pageCount: 352, + covers: [ + { + url: 'https://books.google.com/books/content?id=gb-dune-messiah', + source: 'googlebooks' + } + ], + rating: { + average: 4.1, + count: 12345 + }, + sourceUrl: 'https://books.google.com/books?id=gb-dune-messiah', + ...overrides + }; +} + +function applyMetadataToBook(book: Book, metadata: UpdateBookMetadataInput): Book { + return { + ...book, + zLibId: metadata.zLibId, + title: metadata.title, + author: metadata.author, + publisher: metadata.publisher, + series: metadata.series, + volume: metadata.volume, + series_index: metadata.series_index, + edition: metadata.edition, + identifier: metadata.identifier, + pages: metadata.pages, + description: metadata.description, + google_books_id: metadata.google_books_id, + open_library_key: metadata.open_library_key, + amazon_asin: metadata.amazon_asin, + external_rating: metadata.external_rating, + external_rating_count: metadata.external_rating_count, + cover: metadata.cover, + extension: metadata.extension, + filesize: metadata.filesize, + language: metadata.language, + year: metadata.year, + month: metadata.month, + day: metadata.day, + createdAt: metadata.createdAt + }; +} + +describe('ApplyMetadataCandidateUseCase', () => { + test('applies selected candidate fields and imports a selected cover', async () => { + const book = createBook(); + let capturedUpdate: UpdateBookMetadataInput | null = null; + let capturedCoverInput: + | { + bookStorageKey: string; + coverUrl: string | null | undefined; + } + | null = null; + const coverChoice: MetadataCoverCandidate = { + url: 'https://books.google.com/books/content?id=gb-dune-messiah', + source: 'googlebooks' + }; + + const repository = { + async getById(): Promise { + return book; + }, + async updateMetadata(_id: number, metadata: UpdateBookMetadataInput): Promise { + capturedUpdate = metadata; + return applyMetadataToBook(book, metadata); + } + } as unknown as BookRepositoryPort; + + const useCase = new ApplyMetadataCandidateUseCase(repository, { + async storeFromExternalUrl(input): Promise { + capturedCoverInput = input; + return { + managedUrl: '/api/library/covers/dune.epub.jpg?v=123', + sourceUrl: input.coverUrl ?? null + }; + } + }); + + const result = await useCase.execute({ + bookId: 1, + candidate: createCandidate(), + fieldSelections: [ + 'title', + 'author', + 'publisher', + 'series', + 'seriesIndex', + 'identifier', + 'pages', + 'description', + 'language', + 'publishedDate', + 'googleBooksId', + 'openLibraryKey', + 'amazonAsin', + 'externalRating', + 'externalRatingCount' + ], + coverChoice + }); + + assert.equal(result.ok, true); + assert.deepEqual(capturedCoverInput, { + bookStorageKey: 'dune.epub', + coverUrl: 'https://books.google.com/books/content?id=gb-dune-messiah' + }); + if (capturedUpdate === null) { + throw new Error('Expected metadata update'); + } + const update = capturedUpdate as UpdateBookMetadataInput; + assert.equal(update.title, 'Dune Messiah'); + assert.equal(update.author, 'Frank Herbert'); + assert.equal(update.publisher, 'Ace'); + assert.equal(update.series, 'Dune'); + assert.equal(update.series_index, 2); + assert.equal(update.identifier, '9780441172696'); + assert.equal(update.pages, 352); + assert.equal(update.description, 'The second Dune novel.'); + assert.equal(update.language, 'en'); + assert.equal(update.year, 1969); + assert.equal(update.month, 7); + assert.equal(update.day, 1); + assert.equal(update.google_books_id, 'gb-dune-messiah'); + assert.equal(update.open_library_key, 'OL893415W'); + assert.equal(update.amazon_asin, 'B000FC1BN8'); + assert.equal(update.external_rating, 4.1); + assert.equal(update.external_rating_count, 12345); + assert.equal(update.cover, '/api/library/covers/dune.epub.jpg?v=123'); + if (!result.ok) { + throw new Error('Expected a successful result'); + } + assert.equal(result.value.book.title, 'Dune Messiah'); + assert.equal(result.value.book.cover, '/api/library/covers/dune.epub.jpg?v=123'); + }); + + test('returns 404 when the book does not exist', async () => { + let updateCalled = false; + let coverCalled = false; + const repository = { + async getById(): Promise { + return undefined; + }, + async updateMetadata(): Promise { + updateCalled = true; + throw new Error('should not be called'); + } + } as unknown as BookRepositoryPort; + + const useCase = new ApplyMetadataCandidateUseCase(repository, { + async storeFromExternalUrl(): Promise { + coverCalled = true; + throw new Error('should not be called'); + } + }); + + const result = await useCase.execute({ + bookId: 404, + candidate: createCandidate(), + fieldSelections: ['title'] + }); + + assert.equal(result.ok, false); + if (result.ok) { + throw new Error('Expected missing book to fail'); + } + assert.equal(result.error.status, 404); + assert.equal(result.error.message, 'Book not found'); + assert.equal(updateCalled, false); + assert.equal(coverCalled, false); + }); + + test('rejects empty field selections without a cover choice', async () => { + let getByIdCalled = false; + const repository = { + async getById(): Promise { + getByIdCalled = true; + return createBook(); + } + } as unknown as BookRepositoryPort; + + const useCase = new ApplyMetadataCandidateUseCase(repository, { + async storeFromExternalUrl(): Promise { + throw new Error('should not be called'); + } + }); + + const result = await useCase.execute({ + bookId: 1, + candidate: createCandidate(), + fieldSelections: [] + }); + + assert.equal(result.ok, false); + if (result.ok) { + throw new Error('Expected empty selection to fail'); + } + assert.equal(result.error.status, 400); + assert.equal(result.error.message, 'At least one field selection or cover choice is required'); + assert.equal(getByIdCalled, false); + }); + + test('does not update fields when cover import fails', async () => { + let updateCalled = false; + const repository = { + async getById(): Promise { + return createBook(); + }, + async updateMetadata(): Promise { + updateCalled = true; + throw new Error('should not be called'); + } + } as unknown as BookRepositoryPort; + + const useCase = new ApplyMetadataCandidateUseCase(repository, { + async storeFromExternalUrl(): Promise { + return { + managedUrl: null, + sourceUrl: 'https://example.com/cover.jpg' + }; + } + }); + + const result = await useCase.execute({ + bookId: 1, + candidate: createCandidate(), + fieldSelections: ['title'], + coverChoice: { + url: 'https://example.com/cover.jpg', + source: 'manual' + } + }); + + assert.equal(result.ok, false); + if (result.ok) { + throw new Error('Expected cover import failure'); + } + assert.equal(result.error.status, 502); + assert.equal(result.error.message, 'Failed to import cover image'); + assert.equal(updateCalled, false); + }); + + test('sanitizes HTML descriptions and normalizes author names before persisting', async () => { + const book = createBook(); + let capturedUpdate: UpdateBookMetadataInput | null = null; + + const repository = { + async getById(): Promise { + return book; + }, + async updateMetadata(_id: number, metadata: UpdateBookMetadataInput): Promise { + capturedUpdate = metadata; + return applyMetadataToBook(book, metadata); + } + } as unknown as BookRepositoryPort; + + const useCase = new ApplyMetadataCandidateUseCase(repository, { + async storeFromExternalUrl(): Promise { + throw new Error('should not be called'); + } + }); + + const result = await useCase.execute({ + bookId: 1, + candidate: createCandidate({ + authors: [' Frank Herbert ', ' '], + description: + '

Safe textlink

', + descriptionFormat: 'html' + }), + fieldSelections: ['author', 'description'] + }); + + assert.equal(result.ok, true); + if (capturedUpdate === null) { + throw new Error('Expected metadata update'); + } + const update = capturedUpdate as UpdateBookMetadataInput; + assert.equal(update.author, 'Frank Herbert'); + assert.equal(update.description, '

Safe textlink

'); + }); +}); diff --git a/sake/tests/metadata/isbndbMetadataProvider.test.ts b/sake/tests/metadata/isbndbMetadataProvider.test.ts new file mode 100644 index 0000000..81869c2 --- /dev/null +++ b/sake/tests/metadata/isbndbMetadataProvider.test.ts @@ -0,0 +1,169 @@ +import assert from 'node:assert/strict'; +import { afterEach, describe, test } from 'node:test'; +import { createMetadataProvider } from '$lib/server/infrastructure/metadata-providers/metadataProviderFactory'; +import { IsbnDbMetadataProvider } from '$lib/server/infrastructure/metadata-providers/isbndbMetadataProvider'; + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'content-type': 'application/json' } + }); +} + +describe('IsbnDbMetadataProvider', () => { + const originalFetch = globalThis.fetch; + const originalApiKey = process.env.ISBNDB_API_KEY; + + afterEach(() => { + globalThis.fetch = originalFetch; + if (originalApiKey === undefined) { + delete process.env.ISBNDB_API_KEY; + } else { + process.env.ISBNDB_API_KEY = originalApiKey; + } + }); + + test('maps an ISBN lookup response to a metadata candidate', async () => { + process.env.ISBNDB_API_KEY = 'test-key'; + + globalThis.fetch = async ( + input: RequestInfo | URL, + init?: RequestInit + ): Promise => { + assert.equal(String(input), 'https://api.isbndb.com/book/9780441172719'); + const headers = init?.headers as Record; + assert.equal(headers['x-api-key'], 'test-key'); + + return jsonResponse({ + book: { + authors: ['Frank Herbert'], + date_published: '1965-08-01T00:00:00.000Z', + image: 'https://images.isbndb.com/covers/dune.jpg', + isbn: '0441172717', + isbn13: '9780441172719', + language: 'en', + overview: 'A desert planet and a very complicated inheritance.', + pages: 688, + publisher: 'Ace', + subjects: ['Science Fiction'], + title: 'Dune', + title_long: 'Dune: Deluxe Edition' + } + }); + }; + + const provider = new IsbnDbMetadataProvider(); + const result = await provider.lookup({ + isbn: '9780441172719', + title: 'Dune', + author: 'Frank Herbert', + language: 'en' + }); + + assert.equal(result.ok, true); + if (!result.ok) return; + + assert.equal(result.value.length, 1); + const candidate = result.value[0]; + assert.equal(candidate?.providerId, 'isbndb'); + assert.equal(candidate?.title, 'Dune: Deluxe Edition'); + assert.deepEqual(candidate?.authors, ['Frank Herbert']); + assert.equal(candidate?.identifiers.isbn10, '0441172717'); + assert.equal(candidate?.identifiers.isbn13, '9780441172719'); + assert.equal(candidate?.description, 'A desert planet and a very complicated inheritance.'); + assert.equal(candidate?.descriptionFormat, 'text'); + assert.equal(candidate?.publisher, 'Ace'); + assert.equal(candidate?.publishedDate.year, 1965); + assert.equal(candidate?.pageCount, 688); + assert.equal(candidate?.language, 'en'); + assert.deepEqual(candidate?.subjects, ['Science Fiction']); + assert.deepEqual(candidate?.covers, [ + { url: 'https://images.isbndb.com/covers/dune.jpg', source: 'isbndb' } + ]); + assert.equal(candidate?.rating.average, null); + assert.equal(candidate?.sourceUrl, 'https://isbndb.com/book/9780441172719'); + assert.ok((candidate?.providerScore ?? 0) > 10); + }); + + test('searches by title and author when no ISBN is available', async () => { + process.env.ISBNDB_API_KEY = 'test-key'; + + globalThis.fetch = async (input: RequestInfo | URL): Promise => { + const requestUrl = new URL(String(input)); + assert.equal(requestUrl.origin, 'https://api.isbndb.com'); + assert.equal(requestUrl.pathname, '/books/Dune'); + assert.equal(requestUrl.searchParams.get('author'), 'Frank Herbert'); + assert.equal(requestUrl.searchParams.get('page'), '1'); + assert.equal(requestUrl.searchParams.get('pageSize'), '3'); + + return jsonResponse({ + books: [ + { + authors: ['Frank Herbert'], + cover: 'https://images.isbndb.com/covers/dune-list.jpg', + isbn: '0441172717', + isbn13: '9780441172719', + language: 'en', + pages: '688', + publisher: 'Ace', + synopsys: 'A list-search description.', + title: 'Dune' + } + ] + }); + }; + + const provider = new IsbnDbMetadataProvider(); + const result = await provider.lookup({ + title: 'Dune', + author: 'Frank Herbert', + language: 'english', + limit: 3 + }); + + assert.equal(result.ok, true); + if (!result.ok) return; + + assert.equal(result.value.length, 1); + assert.equal(result.value[0]?.title, 'Dune'); + assert.equal(result.value[0]?.pageCount, 688); + assert.equal(result.value[0]?.description, 'A list-search description.'); + assert.equal(result.value[0]?.covers[0]?.url, 'https://images.isbndb.com/covers/dune-list.jpg'); + }); + + test('skips factory creation when ISBNDB_API_KEY is missing', () => { + delete process.env.ISBNDB_API_KEY; + + assert.equal(createMetadataProvider('isbndb'), null); + + process.env.ISBNDB_API_KEY = 'test-key'; + assert.ok(createMetadataProvider('isbndb') instanceof IsbnDbMetadataProvider); + }); + + test('treats 404 as an empty result set', async () => { + process.env.ISBNDB_API_KEY = 'test-key'; + globalThis.fetch = async (): Promise => new Response('', { status: 404 }); + + const provider = new IsbnDbMetadataProvider(); + const result = await provider.lookup({ isbn: '9780000000000' }); + + assert.equal(result.ok, true); + if (!result.ok) return; + assert.deepEqual(result.value, []); + }); + + test('returns an auth error when the API key is rejected', async () => { + process.env.ISBNDB_API_KEY = 'test-key'; + globalThis.fetch = async (): Promise => new Response('', { status: 401 }); + + const provider = new IsbnDbMetadataProvider(); + const result = await provider.lookup({ isbn: '9780441172719' }); + + assert.equal(result.ok, false); + if (result.ok) { + throw new Error('Expected rejected key to fail'); + } + assert.equal(result.error.status, 401); + assert.equal(result.error.message, 'ISBNdb API key was rejected'); + }); +}); diff --git a/sake/tests/metadata/metadataDescriptionSanitizer.test.ts b/sake/tests/metadata/metadataDescriptionSanitizer.test.ts new file mode 100644 index 0000000..8668ead --- /dev/null +++ b/sake/tests/metadata/metadataDescriptionSanitizer.test.ts @@ -0,0 +1,27 @@ +import assert from 'node:assert/strict'; +import { describe, test } from 'node:test'; +import { sanitizeMetadataDescription } from '$lib/server/application/services/MetadataDescriptionSanitizer'; +import { normalizeAuthor, normalizeAuthorForMatch } from '$lib/utils/author'; + +describe('metadata safety helpers', () => { + test('sanitizes HTML descriptions with a strict allowlist', () => { + const sanitized = sanitizeMetadataDescription( + `

Hello worldlink

`, + 'html' + ); + + assert.equal(sanitized, '

Hello worldlink

'); + }); + + test('keeps text and markdown descriptions as trimmed text', () => { + assert.equal(sanitizeMetadataDescription(' **Hello** ', 'markdown'), '**Hello**'); + assert.equal(sanitizeMetadataDescription(' plain text ', 'text'), 'plain text'); + assert.equal(sanitizeMetadataDescription(' ', 'html'), null); + }); + + test('normalizes author strings for display and matching', () => { + assert.equal(normalizeAuthor(' Frank Herbert '), 'Frank Herbert'); + assert.equal(normalizeAuthor(' '), null); + assert.equal(normalizeAuthorForMatch('Frank Herbert, Jr.'), 'frank herbert jr'); + }); +}); From b23133a2285cac434165087882b07edecad5928a Mon Sep 17 00:00:00 2001 From: Sascha Date: Fri, 15 May 2026 13:15:56 +0200 Subject: [PATCH 2/5] Added section to readme --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 42d764a..a654283 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,13 @@ Copy `sake/.env.example` to `sake/.env` and fill in the values you need. If `ACTIVATED_PROVIDERS` is unset, blank, or contains no valid values, search stays disabled and the search UI remains hidden. If `ACTIVATED_METADATA_PROVIDERS` is unset, blank, or contains no valid values, on-demand metadata lookup stays disabled and the metadata update UI remains hidden. +Metadata provider notes: + +- `googlebooks` works without a key; `GOOGLE_BOOKS_API_KEY` only improves rate limits. +- `openlibrary` works without a key. +- `hardcover` is skipped unless `HARDCOVER_API_TOKEN` is set. +- `isbndb` is skipped unless `ISBNDB_API_KEY` is set. ISBNdb is a paid provider. + Accepted provider names: - `anna`, `annas`, `annas-archive`, or `annasarchive` From 56f738c89f0f5ba0ad489806673e0c1588c411be Mon Sep 17 00:00:00 2001 From: Sascha Date: Tue, 19 May 2026 18:37:55 +0200 Subject: [PATCH 3/5] made logs --- .../services/MetadataAggregatorService.ts | 72 +++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/sake/src/lib/server/application/services/MetadataAggregatorService.ts b/sake/src/lib/server/application/services/MetadataAggregatorService.ts index 91ff987..54e385f 100644 --- a/sake/src/lib/server/application/services/MetadataAggregatorService.ts +++ b/sake/src/lib/server/application/services/MetadataAggregatorService.ts @@ -5,6 +5,7 @@ import type { } from '$lib/server/application/ports/MetadataProviderPort'; import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; import { languageTokens, normalizeForMatch } from '$lib/server/infrastructure/metadata-providers/metadataProviderUtils'; +import { createChildLogger } from '$lib/server/infrastructure/logging/logger'; const DEFAULT_TIMEOUT_MS = 8_000; @@ -13,6 +14,13 @@ export interface MetadataAggregatorResult { providerErrors: Array<{ providerId: MetadataProviderId; message: string }>; } +interface MetadataProviderLookupLogResult { + providerId: MetadataProviderId; + status: 'ok' | 'empty' | 'error'; + candidateCount: number; + message?: string; +} + function readTimeoutMs(): number { const raw = process.env.METADATA_PROVIDER_TIMEOUT_MS; if (!raw) { @@ -72,10 +80,33 @@ function withTimeout(promise: Promise, ms: number, label: string): Promise } export class MetadataAggregatorService { + private readonly serviceLogger = createChildLogger({ service: 'MetadataAggregatorService' }); + constructor(private readonly providers: MetadataProviderPort[]) {} async lookup(query: MetadataQuery): Promise { const timeoutMs = readTimeoutMs(); + const providerIds = this.providers.map((provider) => provider.id); + + this.serviceLogger.info( + { + event: 'metadata.lookup.started', + providerIds, + providerCount: providerIds.length, + timeoutMs, + query: { + hasTitle: Boolean(query.title?.trim()), + hasAuthor: Boolean(query.author?.trim()), + hasIsbn: Boolean(query.isbn?.trim()), + hasLanguage: Boolean(query.language?.trim()), + hasGoogleBooksId: Boolean(query.googleBooksId?.trim()), + hasOpenLibraryKey: Boolean(query.openLibraryKey?.trim()), + hasHardcoverId: Boolean(query.hardcoverId?.trim()), + limit: query.limit ?? null + } + }, + 'Metadata lookup started' + ); const settled = await Promise.allSettled( this.providers.map((provider) => @@ -89,6 +120,7 @@ export class MetadataAggregatorService { const allCandidates: MetadataCandidate[] = []; const providerErrors: Array<{ providerId: MetadataProviderId; message: string }> = []; + const providerResults: MetadataProviderLookupLogResult[] = []; for (let i = 0; i < settled.length; i++) { const result = settled[i]; @@ -96,12 +128,19 @@ export class MetadataAggregatorService { if (!provider) continue; if (result.status === 'rejected') { + const message = + result.reason instanceof Error + ? result.reason.message + : String(result.reason); providerErrors.push({ providerId: provider.id, - message: - result.reason instanceof Error - ? result.reason.message - : String(result.reason) + message + }); + providerResults.push({ + providerId: provider.id, + status: 'error', + candidateCount: 0, + message }); continue; } @@ -112,10 +151,21 @@ export class MetadataAggregatorService { providerId: provider.id, message: apiResult.error.message }); + providerResults.push({ + providerId: provider.id, + status: 'error', + candidateCount: 0, + message: apiResult.error.message + }); continue; } allCandidates.push(...apiResult.value); + providerResults.push({ + providerId: provider.id, + status: apiResult.value.length > 0 ? 'ok' : 'empty', + candidateCount: apiResult.value.length + }); } const ranked = allCandidates @@ -131,6 +181,20 @@ export class MetadataAggregatorService { }) .map(({ candidate }) => candidate); + this.serviceLogger.info( + { + event: 'metadata.lookup.completed', + providerIds, + providerResults, + providersWithCandidates: providerResults + .filter((result) => result.candidateCount > 0) + .map((result) => result.providerId), + candidateCount: ranked.length, + providerErrorCount: providerErrors.length + }, + 'Metadata lookup completed' + ); + return { candidates: ranked, providerErrors }; } } From 3ed092f6708868e853179c796f2a21bd42e8e0d5 Mon Sep 17 00:00:00 2001 From: Sascha Date: Tue, 19 May 2026 19:47:50 +0200 Subject: [PATCH 4/5] Fixed hardcover --- README.md | 6 +- sake/.env.docker.selfhosted | 1 - sake/.env.example | 1 - .../src/lib/server/application/composition.ts | 18 +- .../services/ExternalBookMetadataService.ts | 9 +- .../googleBooksMetadataProvider.ts | 4 +- .../hardcoverMetadataProvider.ts | 241 ++++++------------ .../isbndbMetadataProvider.ts | 4 +- .../metadataProviderFactory.ts | 37 ++- .../openLibraryMetadataProvider.ts | 149 ++++++++--- .../externalBookMetadataService.test.ts | 102 ++++++++ .../hardcoverMetadataProvider.test.ts | 112 ++++++++ .../openLibraryMetadataProvider.test.ts | 112 ++++++++ 13 files changed, 564 insertions(+), 232 deletions(-) create mode 100644 sake/tests/metadata/externalBookMetadataService.test.ts create mode 100644 sake/tests/metadata/hardcoverMetadataProvider.test.ts create mode 100644 sake/tests/metadata/openLibraryMetadataProvider.test.ts diff --git a/README.md b/README.md index a654283..a389ae6 100644 --- a/README.md +++ b/README.md @@ -185,10 +185,9 @@ Copy `sake/.env.example` to `sake/.env` and fill in the values you need. - `VITE_ALLOWED_HOSTS` - comma-separated host overrides for Vite/dev setups - `ACTIVATED_PROVIDERS` - comma-separated search providers -- `ACTIVATED_METADATA_PROVIDERS` - comma-separated metadata providers, for example `googlebooks,openlibrary,hardcover,isbndb` +- `ACTIVATED_METADATA_PROVIDERS` - comma-separated metadata providers, for example `googlebooks,openlibrary,hardcover` - `GOOGLE_BOOKS_API_KEY` - optional Google Books key for higher rate limits - `HARDCOVER_API_TOKEN` - optional server-wide token required for the Hardcover metadata provider -- `ISBNDB_API_KEY` - optional paid API key required for the ISBNdb metadata provider - `METADATA_PROVIDER_TIMEOUT_MS` - optional metadata provider timeout in milliseconds - `BODY_SIZE_LIMIT` - upload/body size limit @@ -200,7 +199,6 @@ Metadata provider notes: - `googlebooks` works without a key; `GOOGLE_BOOKS_API_KEY` only improves rate limits. - `openlibrary` works without a key. - `hardcover` is skipped unless `HARDCOVER_API_TOKEN` is set. -- `isbndb` is skipped unless `ISBNDB_API_KEY` is set. ISBNdb is a paid provider. Accepted provider names: @@ -226,7 +224,6 @@ ACTIVATED_PROVIDERS=anna,openlib,gutenberg ACTIVATED_METADATA_PROVIDERS=googlebooks,openlibrary GOOGLE_BOOKS_API_KEY= HARDCOVER_API_TOKEN= -ISBNDB_API_KEY= METADATA_PROVIDER_TIMEOUT_MS= VITE_ALLOWED_HOSTS= BODY_SIZE_LIMIT=Infinity @@ -249,7 +246,6 @@ ACTIVATED_PROVIDERS=anna,openlib,gutenberg ACTIVATED_METADATA_PROVIDERS=googlebooks,openlibrary GOOGLE_BOOKS_API_KEY= HARDCOVER_API_TOKEN= -ISBNDB_API_KEY= METADATA_PROVIDER_TIMEOUT_MS= VITE_ALLOWED_HOSTS= BODY_SIZE_LIMIT=Infinity diff --git a/sake/.env.docker.selfhosted b/sake/.env.docker.selfhosted index 5b55b39..def7fcc 100644 --- a/sake/.env.docker.selfhosted +++ b/sake/.env.docker.selfhosted @@ -15,6 +15,5 @@ ACTIVATED_PROVIDERS=zlib,anna,openlib,gutenberg ACTIVATED_METADATA_PROVIDERS= GOOGLE_BOOKS_API_KEY= HARDCOVER_API_TOKEN= -ISBNDB_API_KEY= METADATA_PROVIDER_TIMEOUT_MS= BODY_SIZE_LIMIT=Infinity diff --git a/sake/.env.example b/sake/.env.example index cc7535e..ccb7e2d 100644 --- a/sake/.env.example +++ b/sake/.env.example @@ -16,6 +16,5 @@ ACTIVATED_PROVIDERS= ACTIVATED_METADATA_PROVIDERS= GOOGLE_BOOKS_API_KEY= HARDCOVER_API_TOKEN= -ISBNDB_API_KEY= METADATA_PROVIDER_TIMEOUT_MS= BODY_SIZE_LIMIT=Infinity diff --git a/sake/src/lib/server/application/composition.ts b/sake/src/lib/server/application/composition.ts index 6fba98a..c080d34 100644 --- a/sake/src/lib/server/application/composition.ts +++ b/sake/src/lib/server/application/composition.ts @@ -1,3 +1,4 @@ +import { env } from '$env/dynamic/private'; import { ZLibraryClient } from '$lib/server/infrastructure/clients/ZLibraryClient'; import { S3Storage } from '$lib/server/infrastructure/storage/S3Storage'; import { BookRepository } from '$lib/server/infrastructure/repositories/BookRepository'; @@ -86,8 +87,6 @@ import { getActivatedSearchProviders } from '$lib/server/config/activatedProvide import { SEARCH_PROVIDER_IDS } from '$lib/types/Search/Provider'; import { MetadataAggregatorService } from '$lib/server/application/services/MetadataAggregatorService'; import { ExternalBookMetadataService } from '$lib/server/application/services/ExternalBookMetadataService'; -import { GoogleBooksMetadataProvider } from '$lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider'; -import { OpenLibraryMetadataProvider } from '$lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider'; import { createMetadataProviders } from '$lib/server/infrastructure/metadata-providers/metadataProviderFactory'; import { getActivatedMetadataProviders } from '$lib/server/config/activatedMetadataProviders'; import { SearchMetadataCandidatesUseCase } from '$lib/server/application/use-cases/SearchMetadataCandidatesUseCase'; @@ -120,17 +119,16 @@ export const deviceProgressDownloadRepository = new DeviceProgressDownloadReposi export const bookProgressHistoryRepository = new BookProgressHistoryRepository(); export const managedBookCoverService = new ManagedBookCoverService(storage); -export const baselineMetadataAggregator = new MetadataAggregatorService([ - new GoogleBooksMetadataProvider(), - new OpenLibraryMetadataProvider() -]); +export const activatedMetadataProviders = createMetadataProviders(getActivatedMetadataProviders(), { + googleBooksApiKey: env.GOOGLE_BOOKS_API_KEY, + hardcoverApiToken: env.HARDCOVER_API_TOKEN, + isbnDbApiKey: env.ISBNDB_API_KEY +}); +export const activatedMetadataAggregator = new MetadataAggregatorService(activatedMetadataProviders); export const externalBookMetadataService = new ExternalBookMetadataService( - baselineMetadataAggregator + activatedMetadataAggregator ); -export const activatedMetadataProviders = createMetadataProviders(getActivatedMetadataProviders()); -export const activatedMetadataAggregator = new MetadataAggregatorService(activatedMetadataProviders); - export const downloadBookUseCase = new DownloadBookUseCase( zlibraryClient, bookRepository, diff --git a/sake/src/lib/server/application/services/ExternalBookMetadataService.ts b/sake/src/lib/server/application/services/ExternalBookMetadataService.ts index 08e1111..61b56af 100644 --- a/sake/src/lib/server/application/services/ExternalBookMetadataService.ts +++ b/sake/src/lib/server/application/services/ExternalBookMetadataService.ts @@ -1,8 +1,6 @@ import { MetadataAggregatorService } from '$lib/server/application/services/MetadataAggregatorService'; import { sanitizeMetadataDescription } from '$lib/server/application/services/MetadataDescriptionSanitizer'; import type { MetadataCandidate } from '$lib/server/application/ports/MetadataProviderPort'; -import { GoogleBooksMetadataProvider } from '$lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider'; -import { OpenLibraryMetadataProvider } from '$lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider'; export interface ExternalBookMetadata { googleBooksId: string | null; @@ -59,12 +57,7 @@ export class ExternalBookMetadataService { private readonly aggregator: MetadataAggregatorService; constructor(aggregator?: MetadataAggregatorService) { - this.aggregator = - aggregator ?? - new MetadataAggregatorService([ - new GoogleBooksMetadataProvider(), - new OpenLibraryMetadataProvider() - ]); + this.aggregator = aggregator ?? new MetadataAggregatorService([]); } async lookup(input: ExternalBookMetadataLookupInput): Promise { diff --git a/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts index 5e2ee75..99986ab 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts @@ -33,6 +33,8 @@ const TOUCHED_FIELDS = new Set([ export class GoogleBooksMetadataProvider implements MetadataProviderPort { readonly id: MetadataProviderId = 'googlebooks'; + constructor(private readonly apiKey?: string | null) {} + readonly capabilities: MetadataProviderCapabilities = { touchedFields: TOUCHED_FIELDS, hasCover: true, @@ -45,7 +47,7 @@ export class GoogleBooksMetadataProvider implements MetadataProviderPort { } private async fetchCandidates(query: MetadataQuery): Promise> { - const apiKey = process.env.GOOGLE_BOOKS_API_KEY?.trim() || ''; + const apiKey = this.apiKey?.trim() || process.env.GOOGLE_BOOKS_API_KEY?.trim() || ''; const limit = query.limit ?? 5; const queryParts: string[] = []; diff --git a/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts index 7ceebf5..b2e850a 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider.ts @@ -10,8 +10,6 @@ import { asNonNegativeNumber, asPositiveNumber, asString, - languageScore, - languageTokens, normalizeForMatch, parseProviderPublicationDate } from './metadataProviderUtils'; @@ -45,64 +43,9 @@ const rateLimiter = new RequestRateLimiter(); const SEARCH_QUERY = /* GraphQL */ ` query SakeMetadataSearch($query: String!, $limit: Int!) { - search(query: $query, query_type: "Book", per_page: $limit) { - results { - hit { - id - title - description - rating - ratings_count - slug - cached_contributors - cached_tags - default_edition_id - default_edition { - isbn_13 - isbn_10 - pages - release_date - image { url width height } - publisher { name } - language { language } - } - book_series { - position - series { name } - } - } - } - } - } -`; - -const ISBN_LOOKUP_QUERY = /* GraphQL */ ` - query SakeMetadataByISBN($isbn: String!, $limit: Int!) { - books( - where: { editions: { _or: [{ isbn_13: { _eq: $isbn } }, { isbn_10: { _eq: $isbn } }] } } - limit: $limit - ) { - id - title - description - rating - ratings_count - slug - cached_contributors - cached_tags - default_edition { - isbn_13 - isbn_10 - pages - release_date - image { url width height } - publisher { name } - language { language } - } - book_series { - position - series { name } - } + search(query: $query, query_type: "Book", per_page: $limit, page: 1) { + ids + results } } `; @@ -111,48 +54,36 @@ const ISBN_LOOKUP_QUERY = /* GraphQL */ ` // Response shape helpers // --------------------------------------------------------------------------- -interface HardcoverEdition { - isbn_13?: string | null; - isbn_10?: string | null; - pages?: number | null; - release_date?: string | null; - image?: { url?: string | null; width?: number | null; height?: number | null } | null; - publisher?: { name?: string | null } | null; - language?: { language?: string | null } | null; -} - -interface HardcoverSeries { - position?: number | null; - series?: { name?: string | null } | null; -} - -interface HardcoverBook { - id?: number | null; +interface HardcoverSearchDocument { + id?: string | number | null; title?: string | null; + subtitle?: string | null; + author_names?: string[] | null; description?: string | null; rating?: number | null; ratings_count?: number | null; slug?: string | null; - cached_contributors?: string | null; - cached_tags?: string | null; - default_edition?: HardcoverEdition | null; - book_series?: HardcoverSeries[] | null; + image?: { url?: string | null; width?: number | null; height?: number | null } | null; + isbns?: string[] | null; + pages?: number | null; + release_date?: string | null; + release_year?: number | null; + genres?: string[] | null; + tags?: string[] | null; + series_names?: string[] | null; } interface HardcoverSearchResult { data?: { search?: { - results?: Array<{ hit?: HardcoverBook | null } | null> | null; + ids?: Array | null; + results?: { + hits?: Array<{ document?: HardcoverSearchDocument | null } | null> | null; + } | null; } | null; } | null; } -interface HardcoverISBNResult { - data?: { - books?: HardcoverBook[] | null; - } | null; -} - interface GraphQLError { message?: string; } @@ -161,61 +92,51 @@ interface GraphQLError { // Field helpers // --------------------------------------------------------------------------- -function parseContributors(cached: string | null | undefined): string[] { - if (!cached) return []; - try { - const parsed = JSON.parse(cached) as Array<{ name?: string; author?: { name?: string } }>; - if (!Array.isArray(parsed)) return []; - return parsed.flatMap((entry) => { - const name = asString(entry.name ?? entry.author?.name); - return name ? [name] : []; - }); - } catch { - return []; +function firstMatchingIsbn(isbns: string[], length: 10 | 13, preferred: string | null | undefined): string | null { + const normalizedPreferred = preferred?.replace(/[^0-9X]/gi, '').toUpperCase() ?? ''; + const matchingPreferred = isbns.find((isbn) => { + const normalized = isbn.replace(/[^0-9X]/gi, '').toUpperCase(); + return normalized.length === length && normalized === normalizedPreferred; + }); + if (matchingPreferred) { + return matchingPreferred; } + + return isbns.find((isbn) => isbn.replace(/[^0-9X]/gi, '').length === length) ?? null; } -function parseTags(cached: string | null | undefined): string[] { - if (!cached) return []; - try { - const parsed = JSON.parse(cached) as Array<{ tag?: string; name?: string }>; - if (!Array.isArray(parsed)) return []; - return parsed.flatMap((entry) => { - const tag = asString(entry.tag ?? entry.name); - return tag ? [tag] : []; - }); - } catch { - return []; - } +function uniqueStrings(values: Array): string[] { + return [...new Set(values.flatMap((value) => { + const stringValue = asString(value); + return stringValue ? [stringValue] : []; + }))]; } -function mapBookToCandidate(book: HardcoverBook, query: MetadataQuery): MetadataCandidate { - const edition = book.default_edition; - const authors = parseContributors(book.cached_contributors); - const subjects = parseTags(book.cached_tags); +function mapBookToCandidate(book: HardcoverSearchDocument, query: MetadataQuery): MetadataCandidate { + const authors = uniqueStrings(book.author_names ?? []); + const subjects = uniqueStrings([...(book.genres ?? []), ...(book.tags ?? [])]); + const isbns = uniqueStrings(book.isbns ?? []); + const isbn13 = firstMatchingIsbn(isbns, 13, query.isbn); + const isbn10 = firstMatchingIsbn(isbns, 10, query.isbn); const normalizedTitle = normalizeForMatch(query.title); const normalizedAuthor = normalizeAuthorForMatch(query.author); - const targetLangTokens = languageTokens(query.language); const titleMatch = normalizedTitle.length > 0 && normalizeForMatch(book.title).includes(normalizedTitle); const authorMatch = normalizedAuthor.length > 0 && authors.some((a) => normalizeAuthorForMatch(a).includes(normalizedAuthor)); - const langScoreVal = languageScore(targetLangTokens, [edition?.language?.language]); const providerScore = (titleMatch ? 5 : 0) + (authorMatch ? 3 : 0) + - (asPositiveNumber(edition?.pages) ? 2 : 0) + - langScoreVal; + (asPositiveNumber(book.pages) ? 2 : 0); - const imageUrl = asString(edition?.image?.url); - const imageWidth = edition?.image?.width ?? undefined; - const imageHeight = edition?.image?.height ?? undefined; + const imageUrl = asString(book.image?.url); + const imageWidth = book.image?.width ?? undefined; + const imageHeight = book.image?.height ?? undefined; - const firstSeries = book.book_series?.[0]; const sourceUrl = book.slug ? `https://hardcover.app/books/${book.slug}` : null; @@ -224,25 +145,27 @@ function mapBookToCandidate(book: HardcoverBook, query: MetadataQuery): Metadata providerId: 'hardcover', providerScore, identifiers: { - isbn10: asString(edition?.isbn_10), - isbn13: asString(edition?.isbn_13), + isbn10: asString(isbn10), + isbn13: asString(isbn13), asin: null, googleBooksId: null, openLibraryKey: null, hardcoverId: book.id != null ? String(book.id) : null }, title: asString(book.title) ?? '', - subtitle: null, + subtitle: asString(book.subtitle), authors, description: asString(book.description), descriptionFormat: 'markdown', subjects, - series: asString(firstSeries?.series?.name), - seriesIndex: asNonNegativeNumber(firstSeries?.position), - publisher: asString(edition?.publisher?.name), - publishedDate: parseProviderPublicationDate(edition?.release_date ?? null), - language: asString(edition?.language?.language), - pageCount: asPositiveNumber(edition?.pages), + series: asString(book.series_names?.[0]), + seriesIndex: null, + publisher: null, + publishedDate: parseProviderPublicationDate( + book.release_date ?? (book.release_year != null ? String(book.release_year) : null) + ), + language: null, + pageCount: asPositiveNumber(book.pages), covers: imageUrl ? [ { @@ -285,17 +208,19 @@ async function graphqlFetch( ): Promise { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), UPSTREAM_TIMEOUT_MS); + const body = { query, variables }; + const headers = { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}`, + 'User-Agent': USER_AGENT + }; try { const response = await fetch(HARDCOVER_API_URL, { method: 'POST', signal: controller.signal, - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${token}`, - 'User-Agent': USER_AGENT - }, - body: JSON.stringify({ query, variables }) + headers, + body: JSON.stringify(body) }); if (!response.ok) { @@ -335,6 +260,8 @@ const TOUCHED_FIELDS = new Set([ export class HardcoverMetadataProvider implements MetadataProviderPort { readonly id: MetadataProviderId = 'hardcover'; + constructor(private readonly apiToken?: string | null) {} + readonly capabilities: MetadataProviderCapabilities = { touchedFields: TOUCHED_FIELDS, hasCover: true, @@ -347,7 +274,7 @@ export class HardcoverMetadataProvider implements MetadataProviderPort { } private async fetchCandidates(query: MetadataQuery): Promise> { - const token = process.env.HARDCOVER_API_TOKEN?.trim(); + const token = this.apiToken?.trim() || process.env.HARDCOVER_API_TOKEN?.trim(); if (!token) { return apiError('HARDCOVER_API_TOKEN is not configured', 503); } @@ -359,29 +286,11 @@ export class HardcoverMetadataProvider implements MetadataProviderPort { const limit = normalizeLimit(query.limit); try { - let books: HardcoverBook[]; - - if (query.isbn) { - const data = await graphqlFetch( - token, - ISBN_LOOKUP_QUERY, - { isbn: query.isbn, limit } - ); - books = data?.books ?? []; - - // If ISBN lookup returns nothing, fall through to title search - if (books.length === 0 && query.title) { - if (!rateLimiter.tryConsume()) { - // We're rate-limited on the fallback call — return empty rather than error - return apiOk([]); - } - books = await this.searchByTitle(token, query.title, limit); - } - } else if (query.title) { - books = await this.searchByTitle(token, query.title, limit); - } else { + const searchQuery = query.title?.trim() || query.isbn?.trim(); + if (!searchQuery) { return apiError('No query terms provided for Hardcover lookup', 400); } + const books = await this.search(token, searchQuery, limit); if (books.length === 0) { return apiOk([]); @@ -399,17 +308,17 @@ export class HardcoverMetadataProvider implements MetadataProviderPort { } } - private async searchByTitle( + private async search( token: string, - title: string, + searchQuery: string, limit: number - ): Promise { + ): Promise { const data = await graphqlFetch( token, SEARCH_QUERY, - { query: title, limit } + { query: searchQuery, limit } ); - const hits = data?.search?.results ?? []; - return hits.flatMap((r) => (r?.hit ? [r.hit] : [])); + const hits = data?.search?.results?.hits ?? []; + return hits.flatMap((hit) => (hit?.document ? [hit.document] : [])); } } diff --git a/sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts index 169c1cf..7f1c8b0 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/isbndbMetadataProvider.ts @@ -223,6 +223,8 @@ async function fetchJson(url: string, apiKey: string): Promise> { - const apiKey = process.env.ISBNDB_API_KEY?.trim(); + const apiKey = this.apiKey?.trim() || process.env.ISBNDB_API_KEY?.trim(); if (!apiKey) { return apiError('ISBNDB_API_KEY is not configured', 503); } diff --git a/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts index f9ee4b4..a1fafc3 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts @@ -5,18 +5,36 @@ import { OpenLibraryMetadataProvider } from './openLibraryMetadataProvider'; import { HardcoverMetadataProvider } from './hardcoverMetadataProvider'; import { IsbnDbMetadataProvider } from './isbndbMetadataProvider'; -export function createMetadataProvider(providerId: MetadataProviderId): MetadataProviderPort | null { +export interface MetadataProviderRuntimeConfig { + googleBooksApiKey?: string | null; + hardcoverApiToken?: string | null; + isbnDbApiKey?: string | null; +} + +function configuredValue(value: string | null | undefined): string | null { + const trimmed = value?.trim(); + return trimmed ? trimmed : null; +} + +export function createMetadataProvider( + providerId: MetadataProviderId, + config: MetadataProviderRuntimeConfig = {} +): MetadataProviderPort | null { switch (providerId) { case 'googlebooks': - return new GoogleBooksMetadataProvider(); + return new GoogleBooksMetadataProvider(config.googleBooksApiKey); case 'openlibrary': return new OpenLibraryMetadataProvider(); - case 'hardcover': + case 'hardcover': { // Only instantiate when token is configured; silently skipped otherwise - return process.env.HARDCOVER_API_TOKEN?.trim() ? new HardcoverMetadataProvider() : null; - case 'isbndb': + const token = configuredValue(config.hardcoverApiToken ?? process.env.HARDCOVER_API_TOKEN); + return token ? new HardcoverMetadataProvider(token) : null; + } + case 'isbndb': { // Only instantiate when key is configured; silently skipped otherwise - return process.env.ISBNDB_API_KEY?.trim() ? new IsbnDbMetadataProvider() : null; + const apiKey = configuredValue(config.isbnDbApiKey ?? process.env.ISBNDB_API_KEY); + return apiKey ? new IsbnDbMetadataProvider(apiKey) : null; + } default: { const exhaustiveId: never = providerId; throw new Error(`Unsupported metadata provider: ${exhaustiveId}`); @@ -24,9 +42,12 @@ export function createMetadataProvider(providerId: MetadataProviderId): Metadata } } -export function createMetadataProviders(providerIds: MetadataProviderId[]): MetadataProviderPort[] { +export function createMetadataProviders( + providerIds: MetadataProviderId[], + config: MetadataProviderRuntimeConfig = {} +): MetadataProviderPort[] { return providerIds.flatMap((id) => { - const provider = createMetadataProvider(id); + const provider = createMetadataProvider(id, config); return provider ? [provider] : []; }); } diff --git a/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts index 5f1f386..46074bb 100644 --- a/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts +++ b/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts @@ -29,6 +29,73 @@ const TOUCHED_FIELDS = new Set([ 'identifiers' ]); +interface OpenLibraryDoc { + key?: string; + title?: string; + author_name?: string[]; + language?: string[]; + cover_i?: number; + isbn?: string[]; + publisher?: string[]; + first_sentence?: string | { value?: string }; + ratings_average?: number; + ratings_count?: number; + number_of_pages_median?: number; + subject?: string[]; +} + +interface OpenLibraryPayload { + docs?: OpenLibraryDoc[]; +} + +interface OpenLibraryQueryVariant { + id: 'title' | 'title-language' | 'title-author' | 'title-author-language'; + queryText: string; + rank: number; +} + +interface OpenLibraryVariantResult { + variant: OpenLibraryQueryVariant; + docs: OpenLibraryDoc[]; +} + +const OPEN_LIBRARY_FIELDS = + 'key,title,author_name,language,cover_i,isbn,publisher,first_sentence,ratings_average,ratings_count,number_of_pages_median,subject'; + +function buildOpenLibraryVariants(input: { + title: string; + author: string | null | undefined; + preferredLanguage: string; +}): OpenLibraryQueryVariant[] { + const variants: OpenLibraryQueryVariant[] = [ + { + id: 'title', + queryText: input.title, + rank: 1 + } + ]; + + if (input.preferredLanguage) { + variants.push({ + id: 'title-language', + queryText: `${input.title} language:${input.preferredLanguage}`, + rank: 2 + }); + } + + if (input.author?.trim()) { + variants.push({ + id: input.preferredLanguage ? 'title-author-language' : 'title-author', + queryText: `${input.title} ${input.author.trim()}${ + input.preferredLanguage ? ` language:${input.preferredLanguage}` : '' + }`, + rank: input.preferredLanguage ? 3 : 2 + }); + } + + return variants; +} + export class OpenLibraryMetadataProvider implements MetadataProviderPort { readonly id: MetadataProviderId = 'openlibrary'; @@ -51,49 +118,47 @@ export class OpenLibraryMetadataProvider implements MetadataProviderPort { targetLangTokens.find((t) => t.length === 2) ?? ''; - const queryBase = - `${query.title ?? ''}${query.author ? ` ${query.author}` : ''}`.trim(); + const queryTitle = query.title?.trim() ?? ''; - if (!queryBase) { + if (!queryTitle) { return apiError('No query terms provided', 400); } - const q = encodeURIComponent( - preferredLanguage ? `${queryBase} language:${preferredLanguage}` : queryBase - ); - const url = - `https://openlibrary.org/search.json?q=${q}&limit=${limit}&fields=key,title,author_name,language,cover_i,isbn,publisher,first_sentence,ratings_average,ratings_count,number_of_pages_median,subject`; + const variants = buildOpenLibraryVariants({ + title: queryTitle, + author: query.author, + preferredLanguage + }); try { - const response = await fetch(url, { - headers: { 'User-Agent': 'Sake/1.0 (+https://github.com/Sudashiii/Sake)' } - }); - if (!response.ok) { - return apiError(`OpenLibrary API returned ${response.status}`, 502); + const settled = await Promise.allSettled( + variants.map((variant) => this.fetchVariant(variant, limit)) + ); + const successfulResults: OpenLibraryVariantResult[] = []; + const failedResults: string[] = []; + + for (const result of settled) { + if (result.status === 'fulfilled') { + successfulResults.push(result.value); + } else { + failedResults.push( + result.reason instanceof Error ? result.reason.message : String(result.reason) + ); + } } - const payload = (await response.json()) as { - docs?: Array<{ - key?: string; - title?: string; - author_name?: string[]; - language?: string[]; - cover_i?: number; - isbn?: string[]; - publisher?: string[]; - first_sentence?: string | { value?: string }; - ratings_average?: number; - ratings_count?: number; - number_of_pages_median?: number; - subject?: string[]; - }>; - }; + const selectedResult = successfulResults + .filter((result) => result.docs.length > 0) + .sort((a, b) => b.variant.rank - a.variant.rank)[0]; - const docs = payload.docs ?? []; - if (docs.length === 0) { + if (!selectedResult) { + if (successfulResults.length === 0 && failedResults.length > 0) { + return apiError(failedResults[0] ?? 'OpenLibrary lookup failed', 502); + } return apiOk([]); } + const docs = selectedResult.docs; const normalizedTitle = normalizeForMatch(query.title); const normalizedAuthor = normalizeAuthorForMatch(query.author); @@ -164,4 +229,26 @@ export class OpenLibraryMetadataProvider implements MetadataProviderPort { return apiError('OpenLibrary lookup failed', 502); } } + + private async fetchVariant( + variant: OpenLibraryQueryVariant, + limit: number + ): Promise { + const url = + `https://openlibrary.org/search.json?q=${encodeURIComponent(variant.queryText)}&limit=${limit}&fields=${OPEN_LIBRARY_FIELDS}`; + + const response = await fetch(url, { + headers: { 'User-Agent': 'Sake/1.0 (+https://github.com/Sudashiii/Sake)' } + }); + if (!response.ok) { + throw new Error(`OpenLibrary API returned ${response.status}`); + } + + const payload = (await response.json()) as OpenLibraryPayload; + + return { + variant, + docs: payload.docs ?? [] + }; + } } diff --git a/sake/tests/metadata/externalBookMetadataService.test.ts b/sake/tests/metadata/externalBookMetadataService.test.ts new file mode 100644 index 0000000..5df1ffd --- /dev/null +++ b/sake/tests/metadata/externalBookMetadataService.test.ts @@ -0,0 +1,102 @@ +import assert from 'node:assert/strict'; +import { describe, test } from 'node:test'; +import type { + MetadataCandidate, + MetadataProviderCapabilities, + MetadataProviderPort, + MetadataQuery +} from '$lib/server/application/ports/MetadataProviderPort'; +import { MetadataAggregatorService } from '$lib/server/application/services/MetadataAggregatorService'; +import { ExternalBookMetadataService } from '$lib/server/application/services/ExternalBookMetadataService'; +import { apiOk, type ApiResult } from '$lib/server/http/api'; +import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; + +function candidate(providerId: MetadataProviderId, title: string): MetadataCandidate { + return { + providerId, + providerScore: 1, + identifiers: { + isbn10: null, + isbn13: '9780593135211', + asin: null, + googleBooksId: providerId === 'googlebooks' ? 'gb-project-hail-mary' : null, + openLibraryKey: providerId === 'openlibrary' ? 'OLproject' : null, + hardcoverId: providerId === 'hardcover' ? '123' : null + }, + title, + subtitle: null, + authors: ['Andy Weir'], + description: 'A rescue mission in deep space.', + descriptionFormat: 'text', + subjects: [], + series: null, + seriesIndex: null, + publisher: 'Ballantine Books', + publishedDate: { year: 2021, month: 5, day: 4 }, + language: 'en', + pageCount: 496, + covers: [], + rating: { average: 4.5, count: 100 }, + sourceUrl: null + }; +} + +class CapturingProvider implements MetadataProviderPort { + readonly capabilities: MetadataProviderCapabilities = { + touchedFields: new Set(['title']), + hasCover: false, + hasRating: true, + requiresIsbn: false + }; + query: MetadataQuery | null = null; + + constructor(readonly id: MetadataProviderId) {} + + async lookup(query: MetadataQuery): Promise> { + this.query = query; + return apiOk([candidate(this.id, query.title ?? 'Unknown')]); + } +} + +describe('ExternalBookMetadataService', () => { + test('uses the injected metadata aggregator providers', async () => { + const hardcoverProvider = new CapturingProvider('hardcover'); + const service = new ExternalBookMetadataService( + new MetadataAggregatorService([hardcoverProvider]) + ); + + const metadata = await service.lookup({ + title: 'Project Hail Mary', + author: 'Andy Weir', + identifier: '9780593135211', + language: 'en' + }); + + assert.deepEqual(hardcoverProvider.query, { + title: 'Project Hail Mary', + author: 'Andy Weir', + isbn: '9780593135211', + language: 'en' + }); + assert.equal(metadata.identifier, '9780593135211'); + assert.equal(metadata.publisher, 'Ballantine Books'); + assert.equal(metadata.googleBooksId, null); + assert.equal(metadata.openLibraryKey, null); + }); + + test('has no hardcoded provider fallback when no aggregator is injected', async () => { + const service = new ExternalBookMetadataService(); + + const metadata = await service.lookup({ + title: 'Project Hail Mary', + author: 'Andy Weir', + identifier: '9780593135211', + language: 'en' + }); + + assert.equal(metadata.identifier, '9780593135211'); + assert.equal(metadata.publisher, null); + assert.equal(metadata.googleBooksId, null); + assert.equal(metadata.openLibraryKey, null); + }); +}); diff --git a/sake/tests/metadata/hardcoverMetadataProvider.test.ts b/sake/tests/metadata/hardcoverMetadataProvider.test.ts new file mode 100644 index 0000000..2b5391e --- /dev/null +++ b/sake/tests/metadata/hardcoverMetadataProvider.test.ts @@ -0,0 +1,112 @@ +import assert from 'node:assert/strict'; +import { afterEach, describe, test } from 'node:test'; +import { HardcoverMetadataProvider } from '$lib/server/infrastructure/metadata-providers/hardcoverMetadataProvider'; + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'content-type': 'application/json' } + }); +} + +describe('HardcoverMetadataProvider', () => { + const originalFetch = globalThis.fetch; + const originalApiToken = process.env.HARDCOVER_API_TOKEN; + + afterEach(() => { + globalThis.fetch = originalFetch; + if (originalApiToken === undefined) { + delete process.env.HARDCOVER_API_TOKEN; + } else { + process.env.HARDCOVER_API_TOKEN = originalApiToken; + } + }); + + test('maps Hardcover search result documents to metadata candidates', async () => { + globalThis.fetch = async ( + input: RequestInfo | URL, + init?: RequestInit + ): Promise => { + assert.equal(String(input), 'https://api.hardcover.app/v1/graphql'); + const headers = init?.headers as Record; + assert.equal(headers.Authorization, 'Bearer test-token'); + const body = JSON.parse(String(init?.body)) as { + query: string; + variables: { query: string; limit: number }; + }; + assert.match(body.query, /search\(query: \$query, query_type: "Book"/); + assert.equal(body.variables.query, 'Project Hail Mary'); + assert.equal(body.variables.limit, 5); + + return jsonResponse({ + data: { + search: { + ids: [427578], + results: { + hits: [ + { + document: { + id: '427578', + title: 'Project Hail Mary', + subtitle: 'A Novel', + author_names: ['Andy Weir'], + description: 'A rescue mission in deep space.', + genres: ['Science Fiction', 'Fiction'], + tags: ['Space'], + image: { + url: 'https://assets.hardcover.app/editions/3274049/project.jpg', + width: 994, + height: 1500 + }, + isbns: ['0593135210', '9780593135211'], + pages: 496, + rating: 4.496923319659198, + ratings_count: 6338, + release_date: '2021-01-01', + series_names: [], + slug: 'project-hail-mary' + } + } + ] + } + } + } + }); + }; + + const provider = new HardcoverMetadataProvider('test-token'); + const result = await provider.lookup({ + title: 'Project Hail Mary', + author: 'Andy Weir', + isbn: '9780593135211', + language: 'en' + }); + + assert.equal(result.ok, true); + if (!result.ok) return; + + const candidate = result.value[0]; + assert.equal(candidate?.providerId, 'hardcover'); + assert.equal(candidate?.identifiers.hardcoverId, '427578'); + assert.equal(candidate?.identifiers.isbn13, '9780593135211'); + assert.equal(candidate?.identifiers.isbn10, '0593135210'); + assert.equal(candidate?.title, 'Project Hail Mary'); + assert.equal(candidate?.subtitle, 'A Novel'); + assert.deepEqual(candidate?.authors, ['Andy Weir']); + assert.equal(candidate?.description, 'A rescue mission in deep space.'); + assert.equal(candidate?.pageCount, 496); + assert.equal(candidate?.publishedDate.year, 2021); + assert.deepEqual(candidate?.covers, [ + { + url: 'https://assets.hardcover.app/editions/3274049/project.jpg', + source: 'hardcover', + width: 994, + height: 1500 + } + ]); + assert.equal(candidate?.rating.average, 4.496923319659198); + assert.equal(candidate?.rating.count, 6338); + assert.equal(candidate?.sourceUrl, 'https://hardcover.app/books/project-hail-mary'); + assert.ok((candidate?.providerScore ?? 0) >= 10); + }); +}); diff --git a/sake/tests/metadata/openLibraryMetadataProvider.test.ts b/sake/tests/metadata/openLibraryMetadataProvider.test.ts new file mode 100644 index 0000000..66175a2 --- /dev/null +++ b/sake/tests/metadata/openLibraryMetadataProvider.test.ts @@ -0,0 +1,112 @@ +import assert from 'node:assert/strict'; +import { afterEach, describe, test } from 'node:test'; +import { OpenLibraryMetadataProvider } from '$lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider'; + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'content-type': 'application/json' } + }); +} + +describe('OpenLibraryMetadataProvider', () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + test('runs fallback query variants and prefers the richest non-empty result', async () => { + const requestedQueries: string[] = []; + + globalThis.fetch = async (input: RequestInfo | URL): Promise => { + const requestUrl = new URL(String(input)); + assert.equal(requestUrl.origin, 'https://openlibrary.org'); + assert.equal(requestUrl.pathname, '/search.json'); + requestedQueries.push(requestUrl.searchParams.get('q') ?? ''); + + const query = requestUrl.searchParams.get('q') ?? ''; + if (query === 'Harry Potter und der Stein der Weisen') { + return jsonResponse({ + docs: [ + { + key: '/works/OL-title-only', + title: 'Harry Potter und der Stein der Weisen', + author_name: ['Different Author'], + language: ['ger'] + } + ] + }); + } + + if (query === 'Harry Potter und der Stein der Weisen language:de') { + return jsonResponse({ + docs: [ + { + key: '/works/OL-title-language', + title: 'Harry Potter und der Stein der Weisen', + author_name: ['J. K. Rowling'], + language: ['ger'] + } + ] + }); + } + + if (query === 'Harry Potter und der Stein der Weisen Joanne K. Rowling language:de') { + return jsonResponse({ + docs: [ + { + key: '/works/OL-richest', + title: 'Harry Potter und der Stein der Weisen', + author_name: ['J. K. Rowling'], + language: ['ger'], + isbn: ['9783551551672'], + cover_i: 123 + } + ] + }); + } + + throw new Error(`Unexpected query: ${query}`); + }; + + const provider = new OpenLibraryMetadataProvider(); + const result = await provider.lookup({ + title: 'Harry Potter und der Stein der Weisen', + author: 'Joanne K. Rowling', + isbn: null, + language: 'de' + }); + + assert.equal(result.ok, true); + if (!result.ok) return; + + assert.deepEqual(requestedQueries.sort(), [ + 'Harry Potter und der Stein der Weisen', + 'Harry Potter und der Stein der Weisen Joanne K. Rowling language:de', + 'Harry Potter und der Stein der Weisen language:de' + ]); + assert.equal(result.value.length, 1); + assert.equal(result.value[0]?.identifiers.openLibraryKey, '/works/OL-richest'); + assert.equal(result.value[0]?.identifiers.isbn13, '9783551551672'); + assert.equal(result.value[0]?.covers[0]?.url, 'https://covers.openlibrary.org/b/id/123-L.jpg'); + }); + + test('skips language and author variants when their query inputs are missing', async () => { + const requestedQueries: string[] = []; + + globalThis.fetch = async (input: RequestInfo | URL): Promise => { + const requestUrl = new URL(String(input)); + requestedQueries.push(requestUrl.searchParams.get('q') ?? ''); + return jsonResponse({ docs: [] }); + }; + + const provider = new OpenLibraryMetadataProvider(); + const result = await provider.lookup({ + title: 'Dune' + }); + + assert.equal(result.ok, true); + assert.deepEqual(requestedQueries, ['Dune']); + }); +}); From 745e44f7d78c6e6336c266c85cbde4eca9439f43 Mon Sep 17 00:00:00 2001 From: Sascha Date: Tue, 19 May 2026 20:46:10 +0200 Subject: [PATCH 5/5] fix --- sake/src/lib/server/application/composition.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sake/src/lib/server/application/composition.ts b/sake/src/lib/server/application/composition.ts index c080d34..4914740 100644 --- a/sake/src/lib/server/application/composition.ts +++ b/sake/src/lib/server/application/composition.ts @@ -142,7 +142,9 @@ export const queueDownloadUseCase = new QueueDownloadUseCase(downloadQueue); export const queueSearchBookUseCase = new QueueSearchBookUseCase(downloadQueue); export const getQueueStatusUseCase = new GetQueueStatusUseCase(downloadQueue); export const zlibrarySearchUseCase = new ZLibrarySearchUseCase(zlibraryClient); -export const lookupSearchBookMetadataUseCase = new LookupSearchBookMetadataUseCase(); +export const lookupSearchBookMetadataUseCase = new LookupSearchBookMetadataUseCase( + externalBookMetadataService +); const activeSearchProviders = getActivatedSearchProviders(); const searchProviderDependencies = { zlibrary: zlibraryClient }; const activeSearchProviderInstances = createSearchProviders( @@ -208,7 +210,9 @@ export const uploadLibraryBookCoverUseCase = new UploadLibraryBookCoverUseCase( export const putLibraryFileUseCase = new PutLibraryFileUseCase( storage, bookRepository, - managedBookCoverService + managedBookCoverService, + undefined, + externalBookMetadataService ); export const exportDeviceLibraryBookUseCase = new ExportDeviceLibraryBookUseCase( bookRepository,