diff --git a/sake/src/lib/server/application/composition.ts b/sake/src/lib/server/application/composition.ts index 1cc62a7..68a62c2 100644 --- a/sake/src/lib/server/application/composition.ts +++ b/sake/src/lib/server/application/composition.ts @@ -84,6 +84,10 @@ import { DeleteDeviceUseCase } from '$lib/server/application/use-cases/DeleteDev import { GetAppVersionUseCase } from '$lib/server/application/use-cases/GetAppVersionUseCase'; import { getActivatedSearchProviders } from '$lib/server/config/activatedProviders'; import { SEARCH_PROVIDER_IDS } from '$lib/types/Search/Provider'; +import { MetadataAggregatorService } from '$lib/server/application/services/MetadataAggregatorService'; +import { ExternalBookMetadataService } from '$lib/server/application/services/ExternalBookMetadataService'; +import { GoogleBooksMetadataProvider } from '$lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider'; +import { OpenLibraryMetadataProvider } from '$lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider'; import { ManagedBookCoverService } from '$lib/server/application/services/ManagedBookCoverService'; import { GetLibraryCoverUseCase } from '$lib/server/application/use-cases/GetLibraryCoverUseCase'; import { ImportLibraryBookCoverUseCase } from '$lib/server/application/use-cases/ImportLibraryBookCoverUseCase'; @@ -113,12 +117,22 @@ export const deviceProgressDownloadRepository = new DeviceProgressDownloadReposi export const bookProgressHistoryRepository = new BookProgressHistoryRepository(); export const managedBookCoverService = new ManagedBookCoverService(storage); +export const baselineMetadataAggregator = new MetadataAggregatorService([ + new GoogleBooksMetadataProvider(), + new OpenLibraryMetadataProvider() +]); +export const externalBookMetadataService = new ExternalBookMetadataService( + baselineMetadataAggregator +); + export const downloadBookUseCase = new DownloadBookUseCase( zlibraryClient, bookRepository, storage, () => DavUploadServiceFactory.createS3(), - managedBookCoverService + managedBookCoverService, + undefined, + externalBookMetadataService ); export const queueDownloadUseCase = new QueueDownloadUseCase(downloadQueue); export const queueSearchBookUseCase = new QueueSearchBookUseCase(downloadQueue); @@ -152,7 +166,8 @@ export const getLibraryBookDetailUseCase = new GetLibraryBookDetailUseCase( shelfRepository ); export const refetchLibraryBookMetadataUseCase = new RefetchLibraryBookMetadataUseCase( - bookRepository + bookRepository, + externalBookMetadataService ); export const getNewBooksForDeviceUseCase = new GetNewBooksForDeviceUseCase(bookRepository); export const confirmDownloadUseCase = new ConfirmDownloadUseCase(deviceDownloadRepository); diff --git a/sake/src/lib/server/application/ports/MetadataProviderPort.ts b/sake/src/lib/server/application/ports/MetadataProviderPort.ts new file mode 100644 index 0000000..a574e7f --- /dev/null +++ b/sake/src/lib/server/application/ports/MetadataProviderPort.ts @@ -0,0 +1,61 @@ +import type { ApiResult } from '$lib/server/http/api'; +import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; + +export interface MetadataQuery { + title?: string | null; + author?: string | null; + isbn?: string | null; + language?: string | null; + googleBooksId?: string | null; + openLibraryKey?: string | null; + hardcoverId?: string | null; + limit?: number; +} + +export interface MetadataCoverCandidate { + url: string; + width?: number; + height?: number; + source: string; +} + +export interface MetadataCandidate { + providerId: MetadataProviderId; + providerScore: number; + identifiers: { + isbn10: string | null; + isbn13: string | null; + asin: string | null; + googleBooksId: string | null; + openLibraryKey: string | null; + hardcoverId: string | null; + }; + title: string; + subtitle: string | null; + authors: string[]; + description: string | null; + descriptionFormat: 'text' | 'html' | 'markdown'; + subjects: string[]; + series: string | null; + seriesIndex: number | null; + publisher: string | null; + publishedDate: { year: number | null; month: number | null; day: number | null }; + language: string | null; + pageCount: number | null; + covers: MetadataCoverCandidate[]; + rating: { average: number | null; count: number | null }; + sourceUrl: string | null; +} + +export interface MetadataProviderCapabilities { + touchedFields: ReadonlySet; + hasCover: boolean; + hasRating: boolean; + requiresIsbn: boolean; +} + +export interface MetadataProviderPort { + readonly id: MetadataProviderId; + readonly capabilities: MetadataProviderCapabilities; + lookup(query: MetadataQuery): Promise>; +} diff --git a/sake/src/lib/server/application/services/ExternalBookMetadataService.ts b/sake/src/lib/server/application/services/ExternalBookMetadataService.ts index a20268d..5daefc0 100644 --- a/sake/src/lib/server/application/services/ExternalBookMetadataService.ts +++ b/sake/src/lib/server/application/services/ExternalBookMetadataService.ts @@ -1,4 +1,6 @@ -import { parsePublicationDateString } from '$lib/utils/publicationDate'; +import { MetadataAggregatorService } from '$lib/server/application/services/MetadataAggregatorService'; +import { GoogleBooksMetadataProvider } from '$lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider'; +import { OpenLibraryMetadataProvider } from '$lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider'; export interface ExternalBookMetadata { googleBooksId: string | null; @@ -20,23 +22,13 @@ export interface ExternalBookMetadata { day: number | null; } -const googleBooksApiKey = process.env.GOOGLE_BOOKS_API_KEY?.trim() || ''; - -interface LookupInput { +export interface ExternalBookMetadataLookupInput { title: string; author: string | null; identifier: string | null; language?: string | null; } -function asString(value: unknown): string | null { - return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; -} - -function asNumber(value: unknown): number | null { - return typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : null; -} - function pickFirst(...values: Array): T | null { for (const value of values) { if (value !== null && value !== undefined) { @@ -46,393 +38,64 @@ function pickFirst(...values: Array): T | null { return null; } -function parseExternalPublicationDate(value: string | null | undefined): { - year: number | null; - month: number | null; - day: number | null; -} { - return ( - parsePublicationDateString(value) ?? { - year: null, - month: null, - day: null - } - ); -} - -function normalizeForMatch(value: string | null | undefined): string { - if (!value) { - return ''; - } - - return value.toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim(); -} - -function languageTokens(input: string | null | undefined): string[] { - if (!input) { - return []; - } - - const normalized = input.trim().toLowerCase(); - if (!normalized) { - return []; - } - - const mapped = new Set(); - const add = (token: string) => mapped.add(token.toLowerCase()); - - const mapByName: Record = { - english: ['en', 'eng'], - german: ['de', 'deu', 'ger'], - deutsch: ['de', 'deu', 'ger'], - french: ['fr', 'fra', 'fre'], - spanish: ['es', 'spa'], - italian: ['it', 'ita'], - portuguese: ['pt', 'por'], - dutch: ['nl', 'nld', 'dut'], - polish: ['pl', 'pol'], - russian: ['ru', 'rus'], - japanese: ['ja', 'jpn'], - chinese: ['zh', 'zho', 'chi'] - }; - - add(normalized); - for (const token of normalized.split(/[^a-z0-9]+/g)) { - if (token) { - add(token); - } - } - for (const token of mapByName[normalized] ?? []) { - add(token); - } - - return [...mapped]; -} - -function normalizeLanguageToken(value: string | null | undefined): string { - if (!value) { - return ''; - } - - const lower = value.toLowerCase().trim(); - if (!lower) { - return ''; - } - - const parts = lower.split('/').filter(Boolean); - return parts[parts.length - 1] ?? lower; -} - -function languageScore(targetLanguageTokens: string[], candidateLanguages: Array): number { - if (targetLanguageTokens.length === 0) { - return 0; +function extractAmazonAsin(identifier: string | null): string | null { + if (!identifier) { + return null; } - - const normalizedCandidates = candidateLanguages - .map((value) => normalizeLanguageToken(value)) - .filter((token) => token.length > 0); - - if (normalizedCandidates.length === 0) { - return 0; + const trimmed = identifier.trim(); + if (/^[A-Z0-9]{10}$/i.test(trimmed)) { + return trimmed.toUpperCase(); } - - const matched = normalizedCandidates.some((token) => { - if (targetLanguageTokens.includes(token)) { - return true; - } - if (token.length >= 2 && targetLanguageTokens.includes(token.slice(0, 2))) { - return true; - } - return false; - }); - - return matched ? 4 : -4; + return null; } export class ExternalBookMetadataService { - async lookup(input: LookupInput): Promise { - const [google, openLibrary] = await Promise.all([ - this.lookupGoogleBooks(input), - this.lookupOpenLibrary(input) - ]); - - const amazonAsin = this.extractAmazonAsin(input.identifier); + private readonly aggregator: MetadataAggregatorService; - return { - googleBooksId: google.id, - openLibraryKey: openLibrary.key, - amazonAsin, - cover: pickFirst(google.cover, openLibrary.cover), - description: pickFirst(google.description, openLibrary.description), - publisher: pickFirst(google.publisher, openLibrary.publisher), - series: pickFirst(google.series, openLibrary.series), - volume: pickFirst(google.volume, openLibrary.volume), - seriesIndex: pickFirst(google.seriesIndex, openLibrary.seriesIndex), - edition: pickFirst(google.edition, openLibrary.edition), - identifier: pickFirst(google.identifier, openLibrary.identifier, input.identifier), - pages: pickFirst(google.pages, openLibrary.pages), - externalRating: pickFirst(google.externalRating, openLibrary.externalRating), - externalRatingCount: pickFirst(google.externalRatingCount, openLibrary.externalRatingCount), - year: pickFirst(google.year, openLibrary.year), - month: pickFirst(google.month, openLibrary.month), - day: pickFirst(google.day, openLibrary.day) - }; + constructor(aggregator?: MetadataAggregatorService) { + this.aggregator = + aggregator ?? + new MetadataAggregatorService([ + new GoogleBooksMetadataProvider(), + new OpenLibraryMetadataProvider() + ]); } - private async lookupGoogleBooks(input: LookupInput): Promise<{ - id: string | null; - cover: string | null; - description: string | null; - publisher: string | null; - series: string | null; - volume: string | null; - seriesIndex: number | null; - edition: string | null; - identifier: string | null; - pages: number | null; - externalRating: number | null; - externalRatingCount: number | null; - year: number | null; - month: number | null; - day: number | null; - }> { - const queryParts = [`intitle:${input.title}`]; - if (input.author) { - queryParts.push(`inauthor:${input.author}`); - } - if (input.identifier) { - queryParts.push(`isbn:${input.identifier}`); - } - const query = encodeURIComponent(queryParts.join(' ')); - const langRestrict = languageTokens(input.language).find((token) => token.length === 2) ?? ''; - const langPart = langRestrict ? `&langRestrict=${encodeURIComponent(langRestrict)}` : ''; - const keyPart = googleBooksApiKey ? `&key=${encodeURIComponent(googleBooksApiKey)}` : ''; - const url = `https://www.googleapis.com/books/v1/volumes?q=${query}&maxResults=5${langPart}${keyPart}`; - - try { - const response = await fetch(url); - if (!response.ok) { - return this.emptyGoogle(); - } + async lookup(input: ExternalBookMetadataLookupInput): Promise { + const { candidates } = await this.aggregator.lookup({ + title: input.title, + author: input.author, + isbn: input.identifier, + language: input.language + }); - const payload = (await response.json()) as { - items?: Array<{ - id?: string; - volumeInfo?: { - title?: string; - subtitle?: string; - authors?: string[]; - language?: string; - publisher?: string; - description?: string; - pageCount?: number; - averageRating?: number; - ratingsCount?: number; - publishedDate?: string; - imageLinks?: { thumbnail?: string; smallThumbnail?: string }; - industryIdentifiers?: Array<{ type?: string; identifier?: string }>; - }; - }>; - }; + const googleCandidate = candidates.find((c) => c.providerId === 'googlebooks'); + const olCandidate = candidates.find((c) => c.providerId === 'openlibrary'); - const items = payload.items ?? []; - if (items.length === 0) { - return this.emptyGoogle(); - } + const bestIsbn13 = pickFirst(...candidates.map((c) => c.identifiers.isbn13)); + const bestIsbn10 = pickFirst(...candidates.map((c) => c.identifiers.isbn10)); + const bestIsbn = pickFirst(bestIsbn13, bestIsbn10, input.identifier); - const normalizedTitle = normalizeForMatch(input.title); - const normalizedAuthor = normalizeForMatch(input.author); - const targetLanguages = languageTokens(input.language); - const scoreGoogleItem = (item: (typeof items)[number]): number => { - const title = normalizeForMatch(item.volumeInfo?.title); - const authors = item.volumeInfo?.authors ?? []; - const hasTitleMatch = normalizedTitle.length > 0 && title.includes(normalizedTitle); - const hasAuthorMatch = - normalizedAuthor.length > 0 && - authors.some((author) => normalizeForMatch(author).includes(normalizedAuthor)); - const pages = asNumber(item.volumeInfo?.pageCount); - const langScore = languageScore(targetLanguages, [item.volumeInfo?.language]); - return (hasTitleMatch ? 5 : 0) + (hasAuthorMatch ? 3 : 0) + (pages ? 2 : 0) + langScore; - }; + const bestCoverUrl = pickFirst(...candidates.map((c) => c.covers[0]?.url ?? null)); - const best = [...items].sort((a, b) => scoreGoogleItem(b) - scoreGoogleItem(a))[0] ?? items[0]; - const pageSource = items.find((item) => asNumber(item.volumeInfo?.pageCount) !== null) ?? best; - const publishedDate = parseExternalPublicationDate(best.volumeInfo?.publishedDate); - - const identifiers = best.volumeInfo?.industryIdentifiers ?? []; - const isbn13 = identifiers.find((item) => item.type === 'ISBN_13')?.identifier; - const isbn10 = identifiers.find((item) => item.type === 'ISBN_10')?.identifier; - return { - id: asString(best.id), - cover: - asString(best.volumeInfo?.imageLinks?.thumbnail) ?? - asString(best.volumeInfo?.imageLinks?.smallThumbnail), - description: asString(best.volumeInfo?.description), - publisher: asString(best.volumeInfo?.publisher), - series: null, - volume: null, - seriesIndex: null, - edition: asString(best.volumeInfo?.subtitle), - identifier: asString(isbn13) ?? asString(isbn10), - pages: asNumber(pageSource.volumeInfo?.pageCount), - externalRating: asNumber(best.volumeInfo?.averageRating), - externalRatingCount: asNumber(best.volumeInfo?.ratingsCount), - year: publishedDate.year, - month: publishedDate.month, - day: publishedDate.day - }; - } catch { - return this.emptyGoogle(); - } - } - - private emptyGoogle() { return { - id: null, - cover: null, - description: null, - publisher: null, - series: null, + googleBooksId: googleCandidate?.identifiers.googleBooksId ?? null, + openLibraryKey: olCandidate?.identifiers.openLibraryKey ?? null, + amazonAsin: extractAmazonAsin(input.identifier), + cover: bestCoverUrl, + description: pickFirst(...candidates.map((c) => c.description)), + publisher: pickFirst(...candidates.map((c) => c.publisher)), + series: pickFirst(...candidates.map((c) => c.series)), volume: null, - seriesIndex: null, - edition: null, - identifier: null, - pages: null, - externalRating: null, - externalRatingCount: null, - year: null, - month: null, - day: null + seriesIndex: pickFirst(...candidates.map((c) => c.seriesIndex)), + edition: pickFirst(...candidates.map((c) => c.subtitle)), + identifier: bestIsbn, + pages: pickFirst(...candidates.map((c) => c.pageCount)), + externalRating: pickFirst(...candidates.map((c) => c.rating.average)), + externalRatingCount: pickFirst(...candidates.map((c) => c.rating.count)), + year: pickFirst(...candidates.map((c) => c.publishedDate.year)), + month: pickFirst(...candidates.map((c) => c.publishedDate.month)), + day: pickFirst(...candidates.map((c) => c.publishedDate.day)) }; } - - private async lookupOpenLibrary(input: LookupInput): Promise<{ - key: string | null; - cover: string | null; - description: string | null; - publisher: string | null; - series: string | null; - volume: string | null; - seriesIndex: number | null; - edition: string | null; - identifier: string | null; - pages: number | null; - externalRating: number | null; - externalRatingCount: number | null; - year: number | null; - month: number | null; - day: number | null; - }> { - const targetLanguages = languageTokens(input.language); - const preferredLanguage = - targetLanguages.find((token) => token.length === 3) ?? - targetLanguages.find((token) => token.length === 2) ?? - ''; - const queryBase = `${input.title}${input.author ? ` ${input.author}` : ''}`.trim(); - const query = encodeURIComponent( - preferredLanguage ? `${queryBase} language:${preferredLanguage}` : queryBase - ); - const url = - `https://openlibrary.org/search.json?q=${query}&limit=5&fields=key,title,author_name,language,cover_i,isbn,publisher,first_sentence,ratings_average,ratings_count,number_of_pages_median`; - try { - const response = await fetch(url); - if (!response.ok) { - return this.emptyOpenLibrary(); - } - - const payload = (await response.json()) as { - docs?: Array<{ - key?: string; - title?: string; - author_name?: string[]; - language?: string[]; - cover_i?: number; - isbn?: string[]; - publisher?: string[]; - first_sentence?: string | { value?: string }; - ratings_average?: number; - ratings_count?: number; - number_of_pages_median?: number; - }>; - }; - const docs = payload.docs ?? []; - if (docs.length === 0) { - return this.emptyOpenLibrary(); - } - - const normalizedTitle = normalizeForMatch(input.title); - const normalizedAuthor = normalizeForMatch(input.author); - const scoreOpenLibraryDoc = (doc: (typeof docs)[number]): number => { - const title = normalizeForMatch(doc.title); - const authors = doc.author_name ?? []; - const hasTitleMatch = normalizedTitle.length > 0 && title.includes(normalizedTitle); - const hasAuthorMatch = - normalizedAuthor.length > 0 && - authors.some((author) => normalizeForMatch(author).includes(normalizedAuthor)); - const pages = asNumber(doc.number_of_pages_median); - const langScore = languageScore(targetLanguages, doc.language ?? []); - return (hasTitleMatch ? 5 : 0) + (hasAuthorMatch ? 3 : 0) + (pages ? 2 : 0) + langScore; - }; - - const best = [...docs].sort((a, b) => scoreOpenLibraryDoc(b) - scoreOpenLibraryDoc(a))[0] ?? docs[0]; - const pageSource = docs.find((doc) => asNumber(doc.number_of_pages_median) !== null) ?? best; - - const firstSentence = - typeof best.first_sentence === 'string' - ? best.first_sentence - : asString(best.first_sentence?.value); - - return { - key: asString(best.key), - cover: typeof best.cover_i === 'number' ? `https://covers.openlibrary.org/b/id/${best.cover_i}-L.jpg` : null, - description: asString(firstSentence), - publisher: asString(best.publisher?.[0]), - series: null, - volume: null, - seriesIndex: null, - edition: null, - identifier: asString(best.isbn?.[0]), - pages: asNumber(pageSource.number_of_pages_median), - externalRating: asNumber(best.ratings_average), - externalRatingCount: asNumber(best.ratings_count), - year: null, - month: null, - day: null - }; - } catch { - return this.emptyOpenLibrary(); - } - } - - private emptyOpenLibrary() { - return { - key: null, - cover: null, - description: null, - publisher: null, - series: null, - volume: null, - seriesIndex: null, - edition: null, - identifier: null, - pages: null, - externalRating: null, - externalRatingCount: null, - year: null, - month: null, - day: null - }; - } - - private extractAmazonAsin(identifier: string | null): string | null { - if (!identifier) { - return null; - } - - const trimmed = identifier.trim(); - if (/^[A-Z0-9]{10}$/i.test(trimmed)) { - return trimmed.toUpperCase(); - } - return null; - } } diff --git a/sake/src/lib/server/application/services/MetadataAggregatorService.ts b/sake/src/lib/server/application/services/MetadataAggregatorService.ts new file mode 100644 index 0000000..91ff987 --- /dev/null +++ b/sake/src/lib/server/application/services/MetadataAggregatorService.ts @@ -0,0 +1,136 @@ +import type { + MetadataCandidate, + MetadataProviderPort, + MetadataQuery +} from '$lib/server/application/ports/MetadataProviderPort'; +import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; +import { languageTokens, normalizeForMatch } from '$lib/server/infrastructure/metadata-providers/metadataProviderUtils'; + +const DEFAULT_TIMEOUT_MS = 8_000; + +export interface MetadataAggregatorResult { + candidates: MetadataCandidate[]; + providerErrors: Array<{ providerId: MetadataProviderId; message: string }>; +} + +function readTimeoutMs(): number { + const raw = process.env.METADATA_PROVIDER_TIMEOUT_MS; + if (!raw) { + return DEFAULT_TIMEOUT_MS; + } + const parsed = Number.parseInt(raw, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_TIMEOUT_MS; +} + +function aggregatorScore(candidate: MetadataCandidate, query: MetadataQuery): number { + let score = 0; + + if (query.isbn) { + const { isbn10, isbn13 } = candidate.identifiers; + if ((isbn13 && isbn13 === query.isbn) || (isbn10 && isbn10 === query.isbn)) { + score += 4; + } + } + + if (candidate.covers.length > 0) { + score += 2; + } + + if (query.language && candidate.language) { + const targetTokens = languageTokens(query.language); + const candidateTokens = languageTokens(candidate.language); + score += candidateTokens.some((t) => targetTokens.includes(t)) ? 2 : -1; + } + + if (query.title && candidate.title) { + const normalizedQuery = normalizeForMatch(query.title); + const normalizedTitle = normalizeForMatch(candidate.title); + if (normalizedQuery.length > 0 && normalizedTitle.includes(normalizedQuery)) { + score += 1; + } + } + + if (candidate.description) { + score += Math.min(candidate.description.length / 1000, 1); + } + + return score; +} + +function withTimeout(promise: Promise, ms: number, label: string): Promise { + let timeoutId: ReturnType | null = null; + + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms); + }); + + return Promise.race([promise, timeoutPromise]).finally(() => { + if (timeoutId !== null) { + clearTimeout(timeoutId); + } + }); +} + +export class MetadataAggregatorService { + constructor(private readonly providers: MetadataProviderPort[]) {} + + async lookup(query: MetadataQuery): Promise { + const timeoutMs = readTimeoutMs(); + + const settled = await Promise.allSettled( + this.providers.map((provider) => + withTimeout( + provider.lookup(query), + timeoutMs, + `Metadata provider '${provider.id}'` + ) + ) + ); + + const allCandidates: MetadataCandidate[] = []; + const providerErrors: Array<{ providerId: MetadataProviderId; message: string }> = []; + + for (let i = 0; i < settled.length; i++) { + const result = settled[i]; + const provider = this.providers[i]; + if (!provider) continue; + + if (result.status === 'rejected') { + providerErrors.push({ + providerId: provider.id, + message: + result.reason instanceof Error + ? result.reason.message + : String(result.reason) + }); + continue; + } + + const apiResult = result.value; + if (!apiResult.ok) { + providerErrors.push({ + providerId: provider.id, + message: apiResult.error.message + }); + continue; + } + + allCandidates.push(...apiResult.value); + } + + const ranked = allCandidates + .map((candidate) => ({ + candidate, + score: aggregatorScore(candidate, query) + })) + .sort((a, b) => { + if (b.score !== a.score) { + return b.score - a.score; + } + return b.candidate.providerScore - a.candidate.providerScore; + }) + .map(({ candidate }) => candidate); + + return { candidates: ranked, providerErrors }; + } +} diff --git a/sake/src/lib/server/config/activatedMetadataProviders.ts b/sake/src/lib/server/config/activatedMetadataProviders.ts new file mode 100644 index 0000000..ccf9a0f --- /dev/null +++ b/sake/src/lib/server/config/activatedMetadataProviders.ts @@ -0,0 +1,47 @@ +import { env } from '$env/dynamic/private'; +import { METADATA_PROVIDER_IDS, type MetadataProviderId } from '$lib/types/Metadata/Provider'; + +const PROVIDER_ALIASES: Record = { + googlebooks: 'googlebooks', + google: 'googlebooks', + 'google-books': 'googlebooks', + openlibrary: 'openlibrary', + openlib: 'openlibrary', + 'open-library': 'openlibrary', + hardcover: 'hardcover', + isbndb: 'isbndb', + isbn: 'isbndb' +}; + +function normalizeMetadataProviderToken(value: string): MetadataProviderId | null { + const normalized = value.trim().toLowerCase(); + if (!normalized) { + return null; + } + return PROVIDER_ALIASES[normalized] ?? null; +} + +export function parseActivatedMetadataProviders( + rawValue: string | undefined | null +): MetadataProviderId[] { + if (!rawValue) { + return []; + } + + const parsed = rawValue + .split(',') + .map((entry) => normalizeMetadataProviderToken(entry)) + .filter((entry): entry is MetadataProviderId => entry !== null); + + return [...new Set(parsed)]; +} + +export function getActivatedMetadataProviders(): MetadataProviderId[] { + return parseActivatedMetadataProviders(env.ACTIVATED_METADATA_PROVIDERS); +} + +export function isMetadataLookupEnabled(): boolean { + return getActivatedMetadataProviders().length > 0; +} + +export { METADATA_PROVIDER_IDS }; diff --git a/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts new file mode 100644 index 0000000..2577caf --- /dev/null +++ b/sake/src/lib/server/infrastructure/metadata-providers/googleBooksMetadataProvider.ts @@ -0,0 +1,171 @@ +import { apiError, apiOk, type ApiResult } from '$lib/server/http/api'; +import type { + MetadataCandidate, + MetadataProviderCapabilities, + MetadataProviderPort, + MetadataQuery +} from '$lib/server/application/ports/MetadataProviderPort'; +import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; +import { + asNonNegativeNumber, + asPositiveNumber, + asString, + languageScore, + languageTokens, + normalizeForMatch, + parseProviderPublicationDate +} from './metadataProviderUtils'; + +const TOUCHED_FIELDS = new Set([ + 'title', + 'subtitle', + 'authors', + 'description', + 'publisher', + 'publishedDate', + 'pageCount', + 'covers', + 'rating', + 'identifiers' +]); + +export class GoogleBooksMetadataProvider implements MetadataProviderPort { + readonly id: MetadataProviderId = 'googlebooks'; + + readonly capabilities: MetadataProviderCapabilities = { + touchedFields: TOUCHED_FIELDS, + hasCover: true, + hasRating: true, + requiresIsbn: false + }; + + lookup(query: MetadataQuery): Promise> { + return this.fetchCandidates(query); + } + + private async fetchCandidates(query: MetadataQuery): Promise> { + const apiKey = process.env.GOOGLE_BOOKS_API_KEY?.trim() || ''; + const limit = query.limit ?? 5; + + const queryParts: string[] = []; + if (query.title) { + queryParts.push(`intitle:${query.title}`); + } + if (query.author) { + queryParts.push(`inauthor:${query.author}`); + } + if (query.isbn) { + queryParts.push(`isbn:${query.isbn}`); + } + + if (queryParts.length === 0) { + return apiError('No query terms provided', 400); + } + + const q = encodeURIComponent(queryParts.join(' ')); + const langTokens = languageTokens(query.language); + const langRestrict = langTokens.find((t) => t.length === 2) ?? ''; + const langPart = langRestrict ? `&langRestrict=${encodeURIComponent(langRestrict)}` : ''; + const keyPart = apiKey ? `&key=${encodeURIComponent(apiKey)}` : ''; + const url = `https://www.googleapis.com/books/v1/volumes?q=${q}&maxResults=${limit}${langPart}${keyPart}`; + + try { + const response = await fetch(url, { + headers: { 'User-Agent': 'Sake/1.0 (+https://github.com/Sudashiii/Sake)' } + }); + if (!response.ok) { + return apiError(`Google Books API returned ${response.status}`, 502); + } + + const payload = (await response.json()) as { + items?: Array<{ + id?: string; + volumeInfo?: { + title?: string; + subtitle?: string; + authors?: string[]; + language?: string; + publisher?: string; + description?: string; + pageCount?: number; + averageRating?: number; + ratingsCount?: number; + publishedDate?: string; + imageLinks?: { thumbnail?: string; smallThumbnail?: string }; + industryIdentifiers?: Array<{ type?: string; identifier?: string }>; + infoLink?: string; + }; + }>; + }; + + const items = payload.items ?? []; + if (items.length === 0) { + return apiOk([]); + } + + const normalizedTitle = normalizeForMatch(query.title); + const normalizedAuthor = normalizeForMatch(query.author); + const targetLangTokens = languageTokens(query.language); + + const scoreItem = (item: (typeof items)[number]): number => { + const title = normalizeForMatch(item.volumeInfo?.title); + const authors = item.volumeInfo?.authors ?? []; + const hasTitleMatch = normalizedTitle.length > 0 && title.includes(normalizedTitle); + const hasAuthorMatch = + normalizedAuthor.length > 0 && + authors.some((a) => normalizeForMatch(a).includes(normalizedAuthor)); + const pages = asPositiveNumber(item.volumeInfo?.pageCount); + const langScoreVal = languageScore(targetLangTokens, [item.volumeInfo?.language]); + return (hasTitleMatch ? 5 : 0) + (hasAuthorMatch ? 3 : 0) + (pages ? 2 : 0) + langScoreVal; + }; + + const candidates: MetadataCandidate[] = items.map((item) => { + const providerScore = scoreItem(item); + const pubDate = parseProviderPublicationDate(item.volumeInfo?.publishedDate); + const identifiers = item.volumeInfo?.industryIdentifiers ?? []; + const isbn13 = asString(identifiers.find((id) => id.type === 'ISBN_13')?.identifier); + const isbn10 = asString(identifiers.find((id) => id.type === 'ISBN_10')?.identifier); + + const coverUrl = + asString(item.volumeInfo?.imageLinks?.thumbnail) ?? + asString(item.volumeInfo?.imageLinks?.smallThumbnail); + + return { + providerId: 'googlebooks', + providerScore, + identifiers: { + isbn10, + isbn13, + asin: null, + googleBooksId: asString(item.id), + openLibraryKey: null, + hardcoverId: null + }, + title: item.volumeInfo?.title ?? '', + subtitle: asString(item.volumeInfo?.subtitle), + authors: item.volumeInfo?.authors ?? [], + description: asString(item.volumeInfo?.description), + descriptionFormat: 'html', + subjects: [], + series: null, + seriesIndex: null, + publisher: asString(item.volumeInfo?.publisher), + publishedDate: pubDate, + language: asString(item.volumeInfo?.language), + pageCount: asPositiveNumber(item.volumeInfo?.pageCount), + covers: coverUrl ? [{ url: coverUrl, source: 'googlebooks' }] : [], + rating: { + average: asNonNegativeNumber(item.volumeInfo?.averageRating), + count: asNonNegativeNumber(item.volumeInfo?.ratingsCount) + }, + sourceUrl: asString(item.volumeInfo?.infoLink) + } satisfies MetadataCandidate; + }); + + candidates.sort((a, b) => b.providerScore - a.providerScore); + return apiOk(candidates); + } catch { + return apiError('Google Books lookup failed', 502); + } + } +} diff --git a/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts new file mode 100644 index 0000000..6669dfc --- /dev/null +++ b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderFactory.ts @@ -0,0 +1,25 @@ +import type { MetadataProviderPort } from '$lib/server/application/ports/MetadataProviderPort'; +import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; +import { GoogleBooksMetadataProvider } from './googleBooksMetadataProvider'; +import { OpenLibraryMetadataProvider } from './openLibraryMetadataProvider'; + +export function createMetadataProvider(providerId: MetadataProviderId): MetadataProviderPort { + switch (providerId) { + case 'googlebooks': + return new GoogleBooksMetadataProvider(); + case 'openlibrary': + return new OpenLibraryMetadataProvider(); + case 'hardcover': + throw new Error('Hardcover metadata provider is not yet implemented'); + case 'isbndb': + throw new Error('ISBNdb metadata provider is not yet implemented'); + default: { + const exhaustiveId: never = providerId; + throw new Error(`Unsupported metadata provider: ${exhaustiveId}`); + } + } +} + +export function createMetadataProviders(providerIds: MetadataProviderId[]): MetadataProviderPort[] { + return providerIds.map((id) => createMetadataProvider(id)); +} diff --git a/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderUtils.ts b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderUtils.ts new file mode 100644 index 0000000..613a08a --- /dev/null +++ b/sake/src/lib/server/infrastructure/metadata-providers/metadataProviderUtils.ts @@ -0,0 +1,127 @@ +import { parsePublicationDateString } from '$lib/utils/publicationDate'; + +export function asString(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +export function asPositiveNumber(value: unknown): number | null { + return typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : null; +} + +export function asNonNegativeNumber(value: unknown): number | null { + return typeof value === 'number' && Number.isFinite(value) && value >= 0 ? value : null; +} + +export function pickFirstValue(...values: Array): T | null { + for (const value of values) { + if (value !== null && value !== undefined) { + return value; + } + } + return null; +} + +export function parseProviderPublicationDate(value: string | null | undefined): { + year: number | null; + month: number | null; + day: number | null; +} { + return ( + parsePublicationDateString(value) ?? { + year: null, + month: null, + day: null + } + ); +} + +export function normalizeForMatch(value: string | null | undefined): string { + if (!value) { + return ''; + } + return value.toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim(); +} + +export function languageTokens(input: string | null | undefined): string[] { + if (!input) { + return []; + } + + const normalized = input.trim().toLowerCase(); + if (!normalized) { + return []; + } + + const mapped = new Set(); + const add = (token: string) => mapped.add(token.toLowerCase()); + + const mapByName: Record = { + english: ['en', 'eng'], + german: ['de', 'deu', 'ger'], + deutsch: ['de', 'deu', 'ger'], + french: ['fr', 'fra', 'fre'], + spanish: ['es', 'spa'], + italian: ['it', 'ita'], + portuguese: ['pt', 'por'], + dutch: ['nl', 'nld', 'dut'], + polish: ['pl', 'pol'], + russian: ['ru', 'rus'], + japanese: ['ja', 'jpn'], + chinese: ['zh', 'zho', 'chi'] + }; + + add(normalized); + for (const token of normalized.split(/[^a-z0-9]+/g)) { + if (token) { + add(token); + } + } + for (const token of mapByName[normalized] ?? []) { + add(token); + } + + return [...mapped]; +} + +function normalizeLanguageToken(value: string | null | undefined): string { + if (!value) { + return ''; + } + + const lower = value.toLowerCase().trim(); + if (!lower) { + return ''; + } + + const parts = lower.split('/').filter(Boolean); + return parts[parts.length - 1] ?? lower; +} + +export function languageScore( + targetLanguageTokens: string[], + candidateLanguages: Array +): number { + if (targetLanguageTokens.length === 0) { + return 0; + } + + const normalizedCandidates = candidateLanguages + .map((value) => normalizeLanguageToken(value)) + .filter((token) => token.length > 0); + + if (normalizedCandidates.length === 0) { + return 0; + } + + const matched = normalizedCandidates.some((token) => { + if (targetLanguageTokens.includes(token)) { + return true; + } + if (token.length >= 2 && targetLanguageTokens.includes(token.slice(0, 2))) { + return true; + } + return false; + }); + + return matched ? 4 : -4; +} diff --git a/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts b/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts new file mode 100644 index 0000000..f0349a8 --- /dev/null +++ b/sake/src/lib/server/infrastructure/metadata-providers/openLibraryMetadataProvider.ts @@ -0,0 +1,166 @@ +import { apiError, apiOk, type ApiResult } from '$lib/server/http/api'; +import type { + MetadataCandidate, + MetadataProviderCapabilities, + MetadataProviderPort, + MetadataQuery +} from '$lib/server/application/ports/MetadataProviderPort'; +import type { MetadataProviderId } from '$lib/types/Metadata/Provider'; +import { + asNonNegativeNumber, + asPositiveNumber, + asString, + languageScore, + languageTokens, + normalizeForMatch +} from './metadataProviderUtils'; + +const TOUCHED_FIELDS = new Set([ + 'title', + 'authors', + 'description', + 'publisher', + 'pageCount', + 'covers', + 'rating', + 'subjects', + 'language', + 'identifiers' +]); + +export class OpenLibraryMetadataProvider implements MetadataProviderPort { + readonly id: MetadataProviderId = 'openlibrary'; + + readonly capabilities: MetadataProviderCapabilities = { + touchedFields: TOUCHED_FIELDS, + hasCover: true, + hasRating: true, + requiresIsbn: false + }; + + lookup(query: MetadataQuery): Promise> { + return this.fetchCandidates(query); + } + + private async fetchCandidates(query: MetadataQuery): Promise> { + const limit = query.limit ?? 5; + const targetLangTokens = languageTokens(query.language); + const preferredLanguage = + targetLangTokens.find((t) => t.length === 3) ?? + targetLangTokens.find((t) => t.length === 2) ?? + ''; + + const queryBase = + `${query.title ?? ''}${query.author ? ` ${query.author}` : ''}`.trim(); + + if (!queryBase) { + return apiError('No query terms provided', 400); + } + + const q = encodeURIComponent( + preferredLanguage ? `${queryBase} language:${preferredLanguage}` : queryBase + ); + const url = + `https://openlibrary.org/search.json?q=${q}&limit=${limit}&fields=key,title,author_name,language,cover_i,isbn,publisher,first_sentence,ratings_average,ratings_count,number_of_pages_median,subject`; + + try { + const response = await fetch(url, { + headers: { 'User-Agent': 'Sake/1.0 (+https://github.com/Sudashiii/Sake)' } + }); + if (!response.ok) { + return apiError(`OpenLibrary API returned ${response.status}`, 502); + } + + const payload = (await response.json()) as { + docs?: Array<{ + key?: string; + title?: string; + author_name?: string[]; + language?: string[]; + cover_i?: number; + isbn?: string[]; + publisher?: string[]; + first_sentence?: string | { value?: string }; + ratings_average?: number; + ratings_count?: number; + number_of_pages_median?: number; + subject?: string[]; + }>; + }; + + const docs = payload.docs ?? []; + if (docs.length === 0) { + return apiOk([]); + } + + const normalizedTitle = normalizeForMatch(query.title); + const normalizedAuthor = normalizeForMatch(query.author); + + const scoreDoc = (doc: (typeof docs)[number]): number => { + const title = normalizeForMatch(doc.title); + const authors = doc.author_name ?? []; + const hasTitleMatch = normalizedTitle.length > 0 && title.includes(normalizedTitle); + const hasAuthorMatch = + normalizedAuthor.length > 0 && + authors.some((a) => normalizeForMatch(a).includes(normalizedAuthor)); + const pages = asPositiveNumber(doc.number_of_pages_median); + const langScoreVal = languageScore(targetLangTokens, doc.language ?? []); + return (hasTitleMatch ? 5 : 0) + (hasAuthorMatch ? 3 : 0) + (pages ? 2 : 0) + langScoreVal; + }; + + const candidates: MetadataCandidate[] = docs.map((doc) => { + const providerScore = scoreDoc(doc); + + const firstSentence = + typeof doc.first_sentence === 'string' + ? doc.first_sentence + : asString(doc.first_sentence?.value); + + const coverUrl = + typeof doc.cover_i === 'number' + ? `https://covers.openlibrary.org/b/id/${doc.cover_i}-L.jpg` + : null; + + const isbns = doc.isbn ?? []; + const isbn13 = asString(isbns.find((isbn) => isbn.length === 13)); + const isbn10 = asString(isbns.find((isbn) => isbn.length === 10)); + + return { + providerId: 'openlibrary', + providerScore, + identifiers: { + isbn10, + isbn13, + asin: null, + googleBooksId: null, + openLibraryKey: asString(doc.key), + hardcoverId: null + }, + title: doc.title ?? '', + subtitle: null, + authors: doc.author_name ?? [], + description: asString(firstSentence), + descriptionFormat: 'text', + subjects: doc.subject?.slice(0, 20) ?? [], + series: null, + seriesIndex: null, + publisher: asString(doc.publisher?.[0]), + publishedDate: { year: null, month: null, day: null }, + language: asString(doc.language?.[0]), + pageCount: asPositiveNumber(doc.number_of_pages_median), + covers: coverUrl ? [{ url: coverUrl, source: 'openlibrary' }] : [], + rating: { + average: asNonNegativeNumber(doc.ratings_average), + count: asNonNegativeNumber(doc.ratings_count) + }, + sourceUrl: doc.key ? `https://openlibrary.org${doc.key}` : null + } satisfies MetadataCandidate; + }); + + candidates.sort((a, b) => b.providerScore - a.providerScore); + return apiOk(candidates); + } catch { + return apiError('OpenLibrary lookup failed', 502); + } + } +} diff --git a/sake/src/lib/types/Metadata/Provider.ts b/sake/src/lib/types/Metadata/Provider.ts new file mode 100644 index 0000000..07d6641 --- /dev/null +++ b/sake/src/lib/types/Metadata/Provider.ts @@ -0,0 +1,3 @@ +export const METADATA_PROVIDER_IDS = ['googlebooks', 'openlibrary', 'hardcover', 'isbndb'] as const; + +export type MetadataProviderId = (typeof METADATA_PROVIDER_IDS)[number];