sjdonado
diff --git a/‎.env.test‎
Lines changed: 1 addition & 0 deletions b/‎.env.test‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎build.config.ts‎
Lines changed: 5 additions & 3 deletions b/‎build.config.ts‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/adapters/pandora.ts‎
Lines changed: 130 additions & 0 deletions b/‎src/adapters/pandora.ts‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎src/config/constants.ts‎
Lines changed: 4 additions & 1 deletion b/‎src/config/constants.ts‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/config/enum.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/config/enum.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/config/env.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/config/env.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/parsers/link.ts‎
Lines changed: 7 additions & 0 deletions b/‎src/parsers/link.ts‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/parsers/pandora.ts‎
Lines changed: 132 additions & 0 deletions b/‎src/parsers/pandora.ts‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎src/schemas/web.schema.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/schemas/web.schema.ts‎
Lines changed: 1 addition & 1 deletion
@@ -30,6 +30,7 @@ BANDCAMP_BASE_URL=https://bandcamp.com/
 DEEZER_API_URL=https://api.deezer.com/search
 APPLE_MUSIC_API_URL=https://music.apple.com/ca
 SOUNDCLOUD_BASE_URL=https://soundcloud.com
+PANDORA_API_URL=https://www.pandora.com/api/v3/sod/search
 
 URL_SHORTENER_API_URL=http://localhost:4000/api/links
 URL_SHORTENER_API_KEY=url_shortener_api_key
 
@@ -18,6 +18,7 @@ Adapters represent the streaming services supported by the Web App and the Rayca
 | SoundCloud       | Yes             | No                     | Yes            |
 | Qobuz            | Yes             | No                     | Yes            |
 | Bandcamp         | Yes             | No                     | Yes            |
+| Pandora          | Yes             | No                     | Yes            |
 
 ## Disclaimer: Search-based results
 
 
@@ -135,17 +135,19 @@ async function buildAssets(options: BuildOptions = {}) {
   if (options.watch) {
     console.log('👀 Starting watch mode...');
 
-    // Initial build
+    // Initial builds
     try {
+      // Build CSS once before starting watcher
+      await buildCSS({ ...options, watch: false });
       await buildJS(options);
-      console.log('✅ Initial JS build completed');
+      console.log('✅ Initial build completed');
     } catch (error) {
       console.error('Initial build failed:', error);
       process.exit(1);
     }
 
     // Start watchers (both return processes that stay alive)
-    const cssProc = await buildCSS(options);
+    const cssProc = await buildCSS({ ...options, watch: true });
     const jsProc = await watchJS(options);
 
     console.log('✅ Watch mode started - files will rebuild on changes');
 
@@ -0,0 +1,130 @@
+import { compareTwoStrings } from 'string-similarity';
+
+import {
+  ADAPTERS_QUERY_LIMIT,
+  RESPONSE_COMPARE_MIN_INCLUSION_SCORE,
+  RESPONSE_COMPARE_MIN_SCORE,
+} from '~/config/constants';
+import { Adapter, MetadataType } from '~/config/enum';
+import { ENV } from '~/config/env';
+import { cacheSearchResultLink, getCachedSearchResultLink } from '~/services/cache';
+import type { SearchMetadata, SearchResultLink } from '~/services/search';
+import HttpClient from '~/utils/http-client';
+import { logger } from '~/utils/logger';
+
+const PANDORA_SEARCH_TYPES = {
+  [MetadataType.Song]: 'TR',
+  [MetadataType.Album]: 'AL',
+  [MetadataType.Playlist]: 'PL',
+  [MetadataType.Artist]: 'AR',
+  [MetadataType.Show]: 'PC',
+  [MetadataType.Podcast]: 'PE',
+};
+
+interface PandoraSearchResponse {
+  searchToken: string;
+  // These results would be needlessly verbose to model, and the fields vary a lot by type...
+  annotations: Record<string, any>;
+  results: string[];
+}
+
+interface PandoraSearchRequest {
+  query: string;
+  types: string[];
+  listener: null;
+  start: number;
+  count: number;
+  annotate: true;
+  searchTime: number;
+  annotationRecipe: "CLASS_OF_2019"; // Adorable
+}
+
+export async function getPandoraLink(query: string, metadata: SearchMetadata) {
+  const searchType = PANDORA_SEARCH_TYPES[metadata.type];
+  if (!searchType) return null;
+
+  const params: PandoraSearchRequest = {
+    query,
+    types: [searchType],
+    listener: null,
+    start: 0,
+    count: Number(ADAPTERS_QUERY_LIMIT),
+    annotate: true,
+    searchTime: 0,
+    annotationRecipe: "CLASS_OF_2019",
+  };
+
+  // We're going to POST to the API, so our URL doesn't contain any query-specific information
+  // Here we'll just construct a fake one for caching purposes
+  const cacheurl = new URL('https://pandora.com/');
+  cacheurl.search = new URLSearchParams({q: query,t: searchType}).toString();
+
+  const cache = await getCachedSearchResultLink(cacheurl);
+  if (cache) {
+    logger.info(`[Pandora] (${cacheurl}) cache hit`);
+    return cache;
+  }
+
+  const url = new URL(ENV.adapters.pandora.apiUrl);
+  const body = JSON.stringify(params);
+
+  try {
+    const response = await HttpClient.post<PandoraSearchResponse>(url.toString(), body, {
+      headers: {
+        Accept: 'application/json',
+        'Content-Type': 'application/json',
+      },
+    });
+
+    if (response.results.length === 0) {
+      throw new Error(`No results found: ${JSON.stringify(response)}`);
+    }
+
+    let bestMatch: SearchResultLink | null = null;
+    let highestScore = 0;
+
+    for (const key of response.results) {
+      if (!(key in response.annotations)) continue;
+
+      const item = response.annotations[key];
+
+      // Debug
+      // logger.info(JSON.stringify(item, null, 2));
+
+      let title = item.name || '';
+      if ((item.type === 'AL' || item.type === 'TR') && 'artistName' in item) {
+        title += ` ${item.artistName}`;
+      }
+      if (item.type === 'PE' && 'programName' in item) {
+        title += ` ${item.programName}`;
+      }
+
+      const score = compareTwoStrings(title.toLowerCase(), query.toLowerCase());
+
+      if (score > highestScore) {
+        highestScore = score;
+        bestMatch = {
+          type: Adapter.Pandora,
+          url: `https://www.pandora.com${item.shareableUrlPath}`,
+          isVerified: score >= RESPONSE_COMPARE_MIN_SCORE,
+          notAvailable: score < RESPONSE_COMPARE_MIN_INCLUSION_SCORE,
+        };
+      }
+    }
+
+    if (!bestMatch) {
+      throw new Error('No valid matches found.');
+    }
+
+    logger.info(
+      `[Pandora] Best match score: ${highestScore.toFixed(3)} (verified: ${bestMatch.isVerified ? 'yes' : 'no'}, available: ${!bestMatch.notAvailable ? 'yes' : 'no'})`
+    );
+
+    await cacheSearchResultLink(cacheurl, bestMatch);
+
+    return bestMatch;
+  } catch (error) {
+    logger.error(`[Pandora] (${url}) ${error}`);
+    return null;
+  }
+}
@@ -30,7 +30,10 @@ export const QOBUZ_LINK_REGEX =
 export const BANDCAMP_LINK_REGEX =
   /^https:\/\/([^\.]+)\.bandcamp\.com\/(album|track)?\/?([^/?]+)?\/?$/;
 
-export const ALLOWED_LINKS_REGEX = `${SPOTIFY_LINK_REGEX.source}|${YOUTUBE_LINK_REGEX.source}|${APPLE_MUSIC_LINK_REGEX.source}|${DEEZER_LINK_REGEX.source}|${SOUNDCLOUD_LINK_REGEX.source}|${TIDAL_LINK_REGEX.source}|${QOBUZ_LINK_REGEX.source}|${GOOGLE_LINK_REGEX.source}`;
+export const PANDORA_LINK_REGEX =
+  /^https:\/\/(?:www\.)?pandora\.com\/(playlist|podcast|artist)\/(?:[^/]+\/)?([^/]+\/)?(?:[^/]+\/)?((?:AL|AR|TR|PC|PE).+)\/?$/;
+
+export const ALLOWED_LINKS_REGEX = `${SPOTIFY_LINK_REGEX.source}|${YOUTUBE_LINK_REGEX.source}|${APPLE_MUSIC_LINK_REGEX.source}|${DEEZER_LINK_REGEX.source}|${SOUNDCLOUD_LINK_REGEX.source}|${TIDAL_LINK_REGEX.source}|${QOBUZ_LINK_REGEX.source}|${BANDCAMP_LINK_REGEX.source}|${PANDORA_LINK_REGEX.source}|${GOOGLE_LINK_REGEX.source}`;
 
 export const ADAPTERS_QUERY_LIMIT = 4;
 export const RESPONSE_COMPARE_MIN_SCORE = 0.7;
 
@@ -8,6 +8,7 @@ export enum StreamingService {
   Google = 'google',
   Qobuz = 'qobuz',
   Bandcamp = 'bandcamp',
+  Pandora = 'pandora',
 }
 
 export enum Adapter {
@@ -19,6 +20,7 @@ export enum Adapter {
   Tidal = StreamingService.Tidal,
   Qobuz = StreamingService.Qobuz,
   Bandcamp = StreamingService.Bandcamp,
+  Pandora = StreamingService.Pandora,
 }
 
 export enum Parser {
@@ -31,6 +33,7 @@ export enum Parser {
   Google = StreamingService.Google,
   Qobuz = StreamingService.Qobuz,
   Bandcamp = StreamingService.Bandcamp,
+  Pandora = StreamingService.Pandora,
 }
 
 export type StreamingServiceType = Adapter & Parser;
 
@@ -40,6 +40,9 @@ export const ENV = {
       apiUrl: Bun.env['BANDCAMP_API_URL']!,
       baseUrl: Bun.env['BANDCAMP_BASE_URL']!,
     },
+    pandora: {
+      apiUrl: Bun.env['PANDORA_API_URL']!,
+    },
   },
   services: {
     urlShortener: {
 
@@ -3,6 +3,7 @@ import {
   BANDCAMP_LINK_REGEX,
   DEEZER_LINK_REGEX,
   GOOGLE_LINK_REGEX,
+  PANDORA_LINK_REGEX,
   QOBUZ_LINK_REGEX,
   SOUNDCLOUD_LINK_REGEX,
   SPOTIFY_LINK_REGEX,
@@ -91,6 +92,12 @@ export const getSearchParser = (link?: string, searchId?: string) => {
     type = Parser.Bandcamp;
   }
 
+  const pandoraId = source.match(PANDORA_LINK_REGEX)?.[3];
+  if (pandoraId) {
+    id = pandoraId;
+    type = Parser.Pandora;
+  }
+
   const googleMatch = source.match(GOOGLE_LINK_REGEX);
   if (googleMatch) {
     // For gasearch URLs, capture group [1] is undefined, so use the full path
 
@@ -0,0 +1,132 @@
+import { PANDORA_LINK_REGEX } from '~/config/constants';
+import { MetadataType, Parser } from '~/config/enum';
+import { cacheSearchMetadata, getCachedSearchMetadata } from '~/services/cache';
+import { fetchMetadata } from '~/services/metadata';
+import type { SearchMetadata } from '~/services/search';
+import { logger } from '~/utils/logger';
+import { getCheerioDoc, linkedDataScript, metaTagContent } from '~/utils/scraper';
+
+enum PandoraMetadataType {
+  Song = 'TR',
+  Album = 'AL',
+  Artist = 'AR',
+  Podcast = 'PE',
+  Show = 'PC',
+}
+
+const PANDORA_METADATA_TO_METADATA_TYPE = {
+  [PandoraMetadataType.Song]: MetadataType.Song,
+  [PandoraMetadataType.Album]: MetadataType.Album,
+  [PandoraMetadataType.Artist]: MetadataType.Artist,
+  [PandoraMetadataType.Podcast]: MetadataType.Podcast,
+  [PandoraMetadataType.Show]: MetadataType.Show,
+};
+
+export const getPandoraMetadata = async (id: string, link: string) => {
+  // Pandora's IDs are predictable and prefixed with their type: ${two-letter-type}:${actual-id}
+  // For some URLs (Podcasts and Shows), the captured ID from the URL is correct/not transformed
+  // For Albums, Tracks, and Artists, the ID is hashed and I haven't been able to identify it....
+  // e.g.: ALcdVpX6J57q54q (URL) -> AL:49608296 (actual)
+
+  const cached = await getCachedSearchMetadata(id, Parser.Pandora);
+  if (cached) {
+    logger.info(`[Pandora] (${id}) metadata cache hit`);
+    return cached;
+  }
+
+  try {
+    const type = id.slice(0, 2);
+
+    const html = await fetchMetadata(link);
+
+    const doc = getCheerioDoc(html);
+
+    let title, description, image;
+
+    // Pandora's codebase must be such a rat's nest...
+    if (['AL', 'AR', 'TR'].indexOf(type) !== -1) {
+      // === Music Page ===
+
+      // There's a helpfully quite complete JSON Linked Data script node right at the top of the page,
+      // And it's *much* more straightforward for getting some of our structured data than regexing the og tags
+      const atts = linkedDataScript(doc);
+
+      // Free up a handful of bytes of memory
+      delete atts.potentialAction;
+
+      // Debug
+      // logger.info(JSON.stringify(atts, null, 2));
+
+      // Instead of fussing with the ID from the URL and checking whether it's hashed or not
+      // Just grab a known-good one from our linked data
+      id = atts['@id'];
+
+      title = atts.name;
+      image = atts.image;
+
+      // There is no `og:description` tag and `twitter:description` tags are inconsistently available
+      // (and not populated with different info most of the time anyway)
+      // So we're just going to use the `description` field to bake in the Artist for the eventual query
+      description =
+        'byArtist' in atts && 'name' in atts.byArtist
+          ? [title, atts.byArtist.name].join(' ')
+          : title;
+    } else if (['PC', 'PE'].indexOf(type) !== -1) {
+      // === Podcast Page ===
+
+      // The Linked Data node is present, but empty for podcast links :|
+
+      // This `title` tag will be the name of the Podcast on the main Podcast page (good)
+      // ...and also still the name of the Podcast on any individual Episode page (very bad)
+      title = metaTagContent(doc, 'og:title', 'property');
+      image = metaTagContent(doc, 'og:image', 'property');
+
+      // Podcasts seem to have even fewer meta tags for some reason
+      description = `Listen to the ${title} podcast on Pandora.`;
+
+      // The Episode title can be scraped from the HTML of the page layout: `[data-qa="header_static_text_title"]`
+      // But it doesn't exist cleanly in any tag or script anywhere in the document's HEAD
+      // So our only options are to pull it from the HTML or accept the slugified version from the URL
+      if (type === 'PE') {
+        const ep_title = link
+          .match(PANDORA_LINK_REGEX)?.[2]
+          .replace(/[^\w]/g, ' ')
+          .trim();
+
+        title = [ep_title, title].join(' ');
+      }
+    } else {
+      throw new Error('Unknown Pandora type (or malformed ID).');
+    }
+
+    if (!title || !image) {
+      throw new Error('Pandora metadata not found');
+    }
+
+    const parsedTitle = title?.trim();
+
+    const metadata = {
+      id,
+      title: parsedTitle,
+      description,
+      type: PANDORA_METADATA_TO_METADATA_TYPE[type as PandoraMetadataType],
+      image,
+    } as SearchMetadata;
+
+    await cacheSearchMetadata(id, Parser.Pandora, metadata);
+
+    return metadata;
+  } catch (err) {
+    throw new Error(`[${getPandoraMetadata.name}] (${link}) ${err}`);
+  }
+};
+
+export const getPandoraQueryFromMetadata = (metadata: SearchMetadata) => {
+  let query = metadata.title;
+
+  if (metadata.type === MetadataType.Album || metadata.type === MetadataType.Song) {
+    query = metadata.description;
+  }
+
+  return query;
+};
@@ -12,7 +12,7 @@ export const searchRouteSchema = z.object({
   body: z.object({
     link: z.string().regex(new RegExp(ALLOWED_LINKS_REGEX), {
       message:
-        'Invalid link, please try with Spotify, YouTube, Apple Music, Deezer, SoundCloud, Tidal, Qobuz, Bandcamp, or Google Music Share links.',
+        'Invalid link, please try with Spotify, YouTube, Apple Music, Deezer, SoundCloud, Tidal, Qobuz, Bandcamp, Pandora, or Google Music Share links.',
     }),
   }),
 });
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ export enum StreamingService {`
`8`	`8`	`Google = 'google',`
`9`	`9`	`Qobuz = 'qobuz',`
`10`	`10`	`Bandcamp = 'bandcamp',`
	`11`	`+ Pandora = 'pandora',`
`11`	`12`	`}`
`12`	`13`
`13`	`14`	`export enum Adapter {`
`@@ -19,6 +20,7 @@ export enum Adapter {`
`19`	`20`	`Tidal = StreamingService.Tidal,`
`20`	`21`	`Qobuz = StreamingService.Qobuz,`
`21`	`22`	`Bandcamp = StreamingService.Bandcamp,`
	`23`	`+ Pandora = StreamingService.Pandora,`
`22`	`24`	`}`
`23`	`25`
`24`	`26`	`export enum Parser {`
`@@ -31,6 +33,7 @@ export enum Parser {`
`31`	`33`	`Google = StreamingService.Google,`
`32`	`34`	`Qobuz = StreamingService.Qobuz,`
`33`	`35`	`Bandcamp = StreamingService.Bandcamp,`
	`36`	`+ Pandora = StreamingService.Pandora,`
`34`	`37`	`}`
`35`	`38`
`36`	`39`	`export type StreamingServiceType = Adapter & Parser;`