Skip to content

Commit 369b1f7

Browse files
authored
Merge pull request #76 from sjdonado/feat-pandora
Feat pandora
2 parents d797ac4 + f17de66 commit 369b1f7

20 files changed

Lines changed: 464 additions & 36 deletions

File tree

.env.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ BANDCAMP_BASE_URL=https://bandcamp.com/
3030
DEEZER_API_URL=https://api.deezer.com/search
3131
APPLE_MUSIC_API_URL=https://music.apple.com/ca
3232
SOUNDCLOUD_BASE_URL=https://soundcloud.com
33+
PANDORA_API_URL=https://www.pandora.com/api/v3/sod/search
3334

3435
URL_SHORTENER_API_URL=http://localhost:4000/api/links
3536
URL_SHORTENER_API_KEY=url_shortener_api_key

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Adapters represent the streaming services supported by the Web App and the Rayca
1818
| SoundCloud | Yes | No | Yes |
1919
| Qobuz | Yes | No | Yes |
2020
| Bandcamp | Yes | No | Yes |
21+
| Pandora | Yes | No | Yes |
2122

2223
## Disclaimer: Search-based results
2324

build.config.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,17 +135,19 @@ async function buildAssets(options: BuildOptions = {}) {
135135
if (options.watch) {
136136
console.log('👀 Starting watch mode...');
137137

138-
// Initial build
138+
// Initial builds
139139
try {
140+
// Build CSS once before starting watcher
141+
await buildCSS({ ...options, watch: false });
140142
await buildJS(options);
141-
console.log('✅ Initial JS build completed');
143+
console.log('✅ Initial build completed');
142144
} catch (error) {
143145
console.error('Initial build failed:', error);
144146
process.exit(1);
145147
}
146148

147149
// Start watchers (both return processes that stay alive)
148-
const cssProc = await buildCSS(options);
150+
const cssProc = await buildCSS({ ...options, watch: true });
149151
const jsProc = await watchJS(options);
150152

151153
console.log('✅ Watch mode started - files will rebuild on changes');

src/adapters/pandora.ts

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import { compareTwoStrings } from 'string-similarity';
2+
3+
import {
4+
ADAPTERS_QUERY_LIMIT,
5+
RESPONSE_COMPARE_MIN_INCLUSION_SCORE,
6+
RESPONSE_COMPARE_MIN_SCORE,
7+
} from '~/config/constants';
8+
import { Adapter, MetadataType } from '~/config/enum';
9+
import { ENV } from '~/config/env';
10+
import { cacheSearchResultLink, getCachedSearchResultLink } from '~/services/cache';
11+
import type { SearchMetadata, SearchResultLink } from '~/services/search';
12+
import HttpClient from '~/utils/http-client';
13+
import { logger } from '~/utils/logger';
14+
15+
const PANDORA_SEARCH_TYPES = {
16+
[MetadataType.Song]: 'TR',
17+
[MetadataType.Album]: 'AL',
18+
[MetadataType.Playlist]: 'PL',
19+
[MetadataType.Artist]: 'AR',
20+
[MetadataType.Show]: 'PC',
21+
[MetadataType.Podcast]: 'PE',
22+
};
23+
24+
interface PandoraSearchResponse {
25+
searchToken: string;
26+
// These results would be needlessly verbose to model, and the fields vary a lot by type...
27+
annotations: Record<string, any>;
28+
results: string[];
29+
}
30+
31+
interface PandoraSearchRequest {
32+
query: string;
33+
types: string[];
34+
listener: null;
35+
start: number;
36+
count: number;
37+
annotate: true;
38+
searchTime: number;
39+
annotationRecipe: "CLASS_OF_2019"; // Adorable
40+
}
41+
42+
export async function getPandoraLink(query: string, metadata: SearchMetadata) {
43+
const searchType = PANDORA_SEARCH_TYPES[metadata.type];
44+
if (!searchType) return null;
45+
46+
const params: PandoraSearchRequest = {
47+
query,
48+
types: [searchType],
49+
listener: null,
50+
start: 0,
51+
count: Number(ADAPTERS_QUERY_LIMIT),
52+
annotate: true,
53+
searchTime: 0,
54+
annotationRecipe: "CLASS_OF_2019",
55+
};
56+
57+
// We're going to POST to the API, so our URL doesn't contain any query-specific information
58+
// Here we'll just construct a fake one for caching purposes
59+
const cacheurl = new URL('https://pandora.com/');
60+
cacheurl.search = new URLSearchParams({q: query,t: searchType}).toString();
61+
62+
const cache = await getCachedSearchResultLink(cacheurl);
63+
if (cache) {
64+
logger.info(`[Pandora] (${cacheurl}) cache hit`);
65+
return cache;
66+
}
67+
68+
const url = new URL(ENV.adapters.pandora.apiUrl);
69+
const body = JSON.stringify(params);
70+
71+
try {
72+
const response = await HttpClient.post<PandoraSearchResponse>(url.toString(), body, {
73+
headers: {
74+
Accept: 'application/json',
75+
'Content-Type': 'application/json',
76+
},
77+
});
78+
79+
if (response.results.length === 0) {
80+
throw new Error(`No results found: ${JSON.stringify(response)}`);
81+
}
82+
83+
let bestMatch: SearchResultLink | null = null;
84+
let highestScore = 0;
85+
86+
for (const key of response.results) {
87+
if (!(key in response.annotations)) continue;
88+
89+
const item = response.annotations[key];
90+
91+
// Debug
92+
// logger.info(JSON.stringify(item, null, 2));
93+
94+
let title = item.name || '';
95+
if ((item.type === 'AL' || item.type === 'TR') && 'artistName' in item) {
96+
title += ` ${item.artistName}`;
97+
}
98+
if (item.type === 'PE' && 'programName' in item) {
99+
title += ` ${item.programName}`;
100+
}
101+
102+
const score = compareTwoStrings(title.toLowerCase(), query.toLowerCase());
103+
104+
if (score > highestScore) {
105+
highestScore = score;
106+
bestMatch = {
107+
type: Adapter.Pandora,
108+
url: `https://www.pandora.com${item.shareableUrlPath}`,
109+
isVerified: score >= RESPONSE_COMPARE_MIN_SCORE,
110+
notAvailable: score < RESPONSE_COMPARE_MIN_INCLUSION_SCORE,
111+
};
112+
}
113+
}
114+
115+
if (!bestMatch) {
116+
throw new Error('No valid matches found.');
117+
}
118+
119+
logger.info(
120+
`[Pandora] Best match score: ${highestScore.toFixed(3)} (verified: ${bestMatch.isVerified ? 'yes' : 'no'}, available: ${!bestMatch.notAvailable ? 'yes' : 'no'})`
121+
);
122+
123+
await cacheSearchResultLink(cacheurl, bestMatch);
124+
125+
return bestMatch;
126+
} catch (error) {
127+
logger.error(`[Pandora] (${url}) ${error}`);
128+
return null;
129+
}
130+
}

src/config/constants.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ export const QOBUZ_LINK_REGEX =
3030
export const BANDCAMP_LINK_REGEX =
3131
/^https:\/\/([^\.]+)\.bandcamp\.com\/(album|track)?\/?([^/?]+)?\/?$/;
3232

33-
export const ALLOWED_LINKS_REGEX = `${SPOTIFY_LINK_REGEX.source}|${YOUTUBE_LINK_REGEX.source}|${APPLE_MUSIC_LINK_REGEX.source}|${DEEZER_LINK_REGEX.source}|${SOUNDCLOUD_LINK_REGEX.source}|${TIDAL_LINK_REGEX.source}|${QOBUZ_LINK_REGEX.source}|${GOOGLE_LINK_REGEX.source}`;
33+
export const PANDORA_LINK_REGEX =
34+
/^https:\/\/(?:www\.)?pandora\.com\/(playlist|podcast|artist)\/(?:[^/]+\/)?([^/]+\/)?(?:[^/]+\/)?((?:AL|AR|TR|PC|PE).+)\/?$/;
35+
36+
export const ALLOWED_LINKS_REGEX = `${SPOTIFY_LINK_REGEX.source}|${YOUTUBE_LINK_REGEX.source}|${APPLE_MUSIC_LINK_REGEX.source}|${DEEZER_LINK_REGEX.source}|${SOUNDCLOUD_LINK_REGEX.source}|${TIDAL_LINK_REGEX.source}|${QOBUZ_LINK_REGEX.source}|${BANDCAMP_LINK_REGEX.source}|${PANDORA_LINK_REGEX.source}|${GOOGLE_LINK_REGEX.source}`;
3437

3538
export const ADAPTERS_QUERY_LIMIT = 4;
3639
export const RESPONSE_COMPARE_MIN_SCORE = 0.7;

src/config/enum.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export enum StreamingService {
88
Google = 'google',
99
Qobuz = 'qobuz',
1010
Bandcamp = 'bandcamp',
11+
Pandora = 'pandora',
1112
}
1213

1314
export enum Adapter {
@@ -19,6 +20,7 @@ export enum Adapter {
1920
Tidal = StreamingService.Tidal,
2021
Qobuz = StreamingService.Qobuz,
2122
Bandcamp = StreamingService.Bandcamp,
23+
Pandora = StreamingService.Pandora,
2224
}
2325

2426
export enum Parser {
@@ -31,6 +33,7 @@ export enum Parser {
3133
Google = StreamingService.Google,
3234
Qobuz = StreamingService.Qobuz,
3335
Bandcamp = StreamingService.Bandcamp,
36+
Pandora = StreamingService.Pandora,
3437
}
3538

3639
export type StreamingServiceType = Adapter & Parser;

src/config/env.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ export const ENV = {
4040
apiUrl: Bun.env['BANDCAMP_API_URL']!,
4141
baseUrl: Bun.env['BANDCAMP_BASE_URL']!,
4242
},
43+
pandora: {
44+
apiUrl: Bun.env['PANDORA_API_URL']!,
45+
},
4346
},
4447
services: {
4548
urlShortener: {

src/parsers/link.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
BANDCAMP_LINK_REGEX,
44
DEEZER_LINK_REGEX,
55
GOOGLE_LINK_REGEX,
6+
PANDORA_LINK_REGEX,
67
QOBUZ_LINK_REGEX,
78
SOUNDCLOUD_LINK_REGEX,
89
SPOTIFY_LINK_REGEX,
@@ -91,6 +92,12 @@ export const getSearchParser = (link?: string, searchId?: string) => {
9192
type = Parser.Bandcamp;
9293
}
9394

95+
const pandoraId = source.match(PANDORA_LINK_REGEX)?.[3];
96+
if (pandoraId) {
97+
id = pandoraId;
98+
type = Parser.Pandora;
99+
}
100+
94101
const googleMatch = source.match(GOOGLE_LINK_REGEX);
95102
if (googleMatch) {
96103
// For gasearch URLs, capture group [1] is undefined, so use the full path

src/parsers/pandora.ts

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import { PANDORA_LINK_REGEX } from '~/config/constants';
2+
import { MetadataType, Parser } from '~/config/enum';
3+
import { cacheSearchMetadata, getCachedSearchMetadata } from '~/services/cache';
4+
import { fetchMetadata } from '~/services/metadata';
5+
import type { SearchMetadata } from '~/services/search';
6+
import { logger } from '~/utils/logger';
7+
import { getCheerioDoc, linkedDataScript, metaTagContent } from '~/utils/scraper';
8+
9+
enum PandoraMetadataType {
10+
Song = 'TR',
11+
Album = 'AL',
12+
Artist = 'AR',
13+
Podcast = 'PE',
14+
Show = 'PC',
15+
}
16+
17+
const PANDORA_METADATA_TO_METADATA_TYPE = {
18+
[PandoraMetadataType.Song]: MetadataType.Song,
19+
[PandoraMetadataType.Album]: MetadataType.Album,
20+
[PandoraMetadataType.Artist]: MetadataType.Artist,
21+
[PandoraMetadataType.Podcast]: MetadataType.Podcast,
22+
[PandoraMetadataType.Show]: MetadataType.Show,
23+
};
24+
25+
export const getPandoraMetadata = async (id: string, link: string) => {
26+
// Pandora's IDs are predictable and prefixed with their type: ${two-letter-type}:${actual-id}
27+
// For some URLs (Podcasts and Shows), the captured ID from the URL is correct/not transformed
28+
// For Albums, Tracks, and Artists, the ID is hashed and I haven't been able to identify it....
29+
// e.g.: ALcdVpX6J57q54q (URL) -> AL:49608296 (actual)
30+
31+
const cached = await getCachedSearchMetadata(id, Parser.Pandora);
32+
if (cached) {
33+
logger.info(`[Pandora] (${id}) metadata cache hit`);
34+
return cached;
35+
}
36+
37+
try {
38+
const type = id.slice(0, 2);
39+
40+
const html = await fetchMetadata(link);
41+
42+
const doc = getCheerioDoc(html);
43+
44+
let title, description, image;
45+
46+
// Pandora's codebase must be such a rat's nest...
47+
if (['AL', 'AR', 'TR'].indexOf(type) !== -1) {
48+
// === Music Page ===
49+
50+
// There's a helpfully quite complete JSON Linked Data script node right at the top of the page,
51+
// And it's *much* more straightforward for getting some of our structured data than regexing the og tags
52+
const atts = linkedDataScript(doc);
53+
54+
// Free up a handful of bytes of memory
55+
delete atts.potentialAction;
56+
57+
// Debug
58+
// logger.info(JSON.stringify(atts, null, 2));
59+
60+
// Instead of fussing with the ID from the URL and checking whether it's hashed or not
61+
// Just grab a known-good one from our linked data
62+
id = atts['@id'];
63+
64+
title = atts.name;
65+
image = atts.image;
66+
67+
// There is no `og:description` tag and `twitter:description` tags are inconsistently available
68+
// (and not populated with different info most of the time anyway)
69+
// So we're just going to use the `description` field to bake in the Artist for the eventual query
70+
description =
71+
'byArtist' in atts && 'name' in atts.byArtist
72+
? [title, atts.byArtist.name].join(' ')
73+
: title;
74+
} else if (['PC', 'PE'].indexOf(type) !== -1) {
75+
// === Podcast Page ===
76+
77+
// The Linked Data node is present, but empty for podcast links :|
78+
79+
// This `title` tag will be the name of the Podcast on the main Podcast page (good)
80+
// ...and also still the name of the Podcast on any individual Episode page (very bad)
81+
title = metaTagContent(doc, 'og:title', 'property');
82+
image = metaTagContent(doc, 'og:image', 'property');
83+
84+
// Podcasts seem to have even fewer meta tags for some reason
85+
description = `Listen to the ${title} podcast on Pandora.`;
86+
87+
// The Episode title can be scraped from the HTML of the page layout: `[data-qa="header_static_text_title"]`
88+
// But it doesn't exist cleanly in any tag or script anywhere in the document's HEAD
89+
// So our only options are to pull it from the HTML or accept the slugified version from the URL
90+
if (type === 'PE') {
91+
const ep_title = link
92+
.match(PANDORA_LINK_REGEX)?.[2]
93+
.replace(/[^\w]/g, ' ')
94+
.trim();
95+
96+
title = [ep_title, title].join(' ');
97+
}
98+
} else {
99+
throw new Error('Unknown Pandora type (or malformed ID).');
100+
}
101+
102+
if (!title || !image) {
103+
throw new Error('Pandora metadata not found');
104+
}
105+
106+
const parsedTitle = title?.trim();
107+
108+
const metadata = {
109+
id,
110+
title: parsedTitle,
111+
description,
112+
type: PANDORA_METADATA_TO_METADATA_TYPE[type as PandoraMetadataType],
113+
image,
114+
} as SearchMetadata;
115+
116+
await cacheSearchMetadata(id, Parser.Pandora, metadata);
117+
118+
return metadata;
119+
} catch (err) {
120+
throw new Error(`[${getPandoraMetadata.name}] (${link}) ${err}`);
121+
}
122+
};
123+
124+
export const getPandoraQueryFromMetadata = (metadata: SearchMetadata) => {
125+
let query = metadata.title;
126+
127+
if (metadata.type === MetadataType.Album || metadata.type === MetadataType.Song) {
128+
query = metadata.description;
129+
}
130+
131+
return query;
132+
};

src/schemas/web.schema.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export const searchRouteSchema = z.object({
1212
body: z.object({
1313
link: z.string().regex(new RegExp(ALLOWED_LINKS_REGEX), {
1414
message:
15-
'Invalid link, please try with Spotify, YouTube, Apple Music, Deezer, SoundCloud, Tidal, Qobuz, Bandcamp, or Google Music Share links.',
15+
'Invalid link, please try with Spotify, YouTube, Apple Music, Deezer, SoundCloud, Tidal, Qobuz, Bandcamp, Pandora, or Google Music Share links.',
1616
}),
1717
}),
1818
});

0 commit comments

Comments
 (0)