Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion backend/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "mediafusion-api"
version = "6.0.0-beta.12"
version = "6.0.0-beta.13"
edition = "2021"

[lib]
Expand Down Expand Up @@ -64,6 +64,7 @@ sha2 = "0.11"

# Compression
flate2 = "1"
tempfile = "3"

# Random bytes for AES IV generation + sports poster selection
rand_core = "0.10"
Expand Down
12 changes: 12 additions & 0 deletions backend/migrations/0011_imdb_dataset_import.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
DELETE FROM cron_jobs WHERE name = 'imdb_dataset_import';

DROP TABLE IF EXISTS imdb_stage_names;
DROP TABLE IF EXISTS imdb_stage_principals;
DROP TABLE IF EXISTS imdb_stage_episode;
DROP TABLE IF EXISTS imdb_stage_crew;
DROP TABLE IF EXISTS imdb_stage_akas;
DROP TABLE IF EXISTS imdb_stage_ratings;
DROP TABLE IF EXISTS imdb_stage_basics;
DROP TABLE IF EXISTS imdb_import_state;

-- Provider seed rows are intentionally left in place (shared with Python runtime).
108 changes: 108 additions & 0 deletions backend/migrations/0011_imdb_dataset_import.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
-- IMDb bulk import infrastructure (additive only — no ALTER on existing tables).

-- ── Provider seeds (idempotent) ─────────────────────────────────────────────

INSERT INTO metadata_provider (name, display_name, is_external, is_active, priority, default_priority, created_at)
VALUES
('imdb', 'IMDb', true, true, 10, 10, now()),
('tvdb', 'TVDB', true, true, 15, 15, now()),
('tmdb', 'TMDB', true, true, 20, 20, now())
ON CONFLICT (name) DO NOTHING;

INSERT INTO rating_provider (name, display_name, max_rating, is_percentage, is_active, display_order)
VALUES ('imdb', 'IMDb', 10, false, true, 10)
ON CONFLICT (name) DO NOTHING;

-- ── Import state (conditional GET + observability) ──────────────────────────

CREATE TABLE IF NOT EXISTS imdb_import_state (
dataset text PRIMARY KEY,
etag text,
last_modified text,
last_run_at timestamptz,
rows_loaded bigint
);

-- ── UNLOGGED staging tables (all text; column order matches IMDb TSV headers) ─

CREATE UNLOGGED TABLE IF NOT EXISTS imdb_stage_basics (
tconst text,
title_type text,
primary_title text,
original_title text,
is_adult text,
start_year text,
end_year text,
runtime_minutes text,
genres text
);

CREATE UNLOGGED TABLE IF NOT EXISTS imdb_stage_ratings (
tconst text,
average_rating text,
num_votes text
);

CREATE UNLOGGED TABLE IF NOT EXISTS imdb_stage_akas (
title_id text,
ordering text,
title text,
region text,
language text,
types text,
attributes text,
is_original_title text
);

CREATE UNLOGGED TABLE IF NOT EXISTS imdb_stage_crew (
tconst text,
directors text,
writers text
);

CREATE UNLOGGED TABLE IF NOT EXISTS imdb_stage_episode (
tconst text,
parent_tconst text,
season_number text,
episode_number text
);

CREATE UNLOGGED TABLE IF NOT EXISTS imdb_stage_principals (
tconst text,
ordering text,
nconst text,
category text,
job text,
characters text
);

CREATE UNLOGGED TABLE IF NOT EXISTS imdb_stage_names (
nconst text,
primary_name text,
birth_year text,
death_year text,
primary_profession text,
known_for_titles text
);

CREATE INDEX IF NOT EXISTS imdb_stage_basics_tconst_idx
ON imdb_stage_basics (tconst);
CREATE INDEX IF NOT EXISTS imdb_stage_ratings_tconst_idx
ON imdb_stage_ratings (tconst);
CREATE INDEX IF NOT EXISTS imdb_stage_akas_title_id_idx
ON imdb_stage_akas (title_id);
CREATE INDEX IF NOT EXISTS imdb_stage_crew_tconst_idx
ON imdb_stage_crew (tconst);
CREATE INDEX IF NOT EXISTS imdb_stage_episode_parent_idx
ON imdb_stage_episode (parent_tconst);
CREATE INDEX IF NOT EXISTS imdb_stage_principals_tconst_idx
ON imdb_stage_principals (tconst);
CREATE INDEX IF NOT EXISTS imdb_stage_principals_nconst_idx
ON imdb_stage_principals (nconst);
CREATE INDEX IF NOT EXISTS imdb_stage_names_nconst_idx
ON imdb_stage_names (nconst);

-- Weekly refresh (disabled by default — enable via admin UI or UPDATE cron_jobs)
INSERT INTO cron_jobs (name, schedule, queue, payload, enabled)
VALUES ('imdb_dataset_import', '0 4 * * 0', 'imdb_dataset_import', '{}', false)
ON CONFLICT (name) DO NOTHING;
3 changes: 3 additions & 0 deletions backend/migrations/0012_imdb_scheduler_config.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
UPDATE cron_jobs
SET payload = '{}'::jsonb
WHERE name = 'imdb_dataset_import';
8 changes: 8 additions & 0 deletions backend/migrations/0012_imdb_scheduler_config.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Store default IMDb import options in cron_jobs.payload for admin UI management.
UPDATE cron_jobs
SET payload = '{
"datasets": ["basics", "names", "ratings", "akas", "episode", "crew", "principals"],
"include_adult": false
}'::jsonb
WHERE name = 'imdb_dataset_import'
AND payload = '{}'::jsonb;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-- Irreversible cleanup: deleted internal mediafusion external_id rows are not restored.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- Drop internal MediaFusion identifiers from media_external_id.
--
-- media.id is the sole internal primary key. Stremio-facing aliases like mf:123
-- are computed at read time and must not be persisted as external provider IDs.
--
-- Preserves real provider rows (imdb, tmdb, tvdb, kitsu, …) and user-scoped
-- identifiers (mf:user:…) used for IPTV/M3U imports.

DELETE FROM media_external_id
WHERE provider = 'mediafusion';

DELETE FROM media_external_id
WHERE external_id LIKE 'mfm\_%' ESCAPE '\'
OR external_id LIKE 'mfs\_%' ESCAPE '\'
OR external_id LIKE 'mf_tv\_%' ESCAPE '\'
OR external_id ~ '^mf:[0-9]+$'
OR external_id ~ '^mf[0-9]+$';
2 changes: 2 additions & 0 deletions backend/src/bin/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use mediafusion_api::{
cleanup::Cleanup,
discover_prewarm::DiscoverPrewarm,
dmm_hashlist::DmmHashlistScraper,
imdb_dataset_import::ImdbDatasetImport,
integration_syncs::IntegrationSyncs,
jackett_feed::JackettFeedScraper,
m3u_import::M3uImport,
Expand Down Expand Up @@ -182,6 +183,7 @@ async fn main() {
reg.register(Arc::new(UpdateSeeders));
reg.register(Arc::new(UpdateTvPosters));
reg.register(Arc::new(DiscoverPrewarm));
reg.register(Arc::new(ImdbDatasetImport));
reg.register(Arc::new(Cleanup));
reg.register(Arc::new(IntegrationSyncs));
reg.register(Arc::new(M3uImport));
Expand Down
55 changes: 55 additions & 0 deletions backend/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,19 @@ pub struct AppConfig {
pub is_scrap_from_mediafusion: bool,
pub is_scrap_from_dmm_hashlist: bool,
pub disable_dmm_hashlist_scraper: bool,
/// GitHub repo owner for DMM hashlist ingestion.
pub dmm_hashlist_repo_owner: String,
/// GitHub repo name for DMM hashlist ingestion.
pub dmm_hashlist_repo_name: String,
/// Git branch to read DMM hashlist commits from.
pub dmm_hashlist_branch: String,
/// Max new commits to process per incremental DMM hashlist run.
pub dmm_hashlist_commits_per_run: usize,
/// Max backfill commits to walk per DMM hashlist run.
pub dmm_hashlist_backfill_commits_per_run: usize,
/// Optional GitHub token for DMM hashlist fetches (`DMM_HASHLIST_GITHUB_TOKEN`, else `GITHUB_TOKEN`).
/// Unauthenticated requests still work against the public GitHub API with lower rate limits.
pub dmm_hashlist_github_token: Option<String>,
pub is_scrap_from_public_indexers: bool,
pub is_scrap_from_public_usenet_indexers: bool,
pub is_scrap_from_jackett: bool,
Expand Down Expand Up @@ -233,6 +246,12 @@ pub struct AppConfig {
pub tvdb_api_key: Option<String>,
/// When false, do not call v3-cinemeta.strem.io (mirrors Python `imdb_cinemeta_fallback_enabled`).
pub imdb_cinemeta_fallback_enabled: bool,
/// Base URL for IMDb non-commercial dataset files.
pub imdb_datasets_base_url: String,
/// Include adult titles when importing IMDb basics (default: false).
pub imdb_import_include_adult: bool,
/// Optional allowlist of dataset keys to import (empty = all).
pub imdb_import_datasets: Vec<String>,
/// Primary metadata source for scrapers (`imdb` or `tmdb`, Python `metadata_primary_source`).
pub metadata_primary_source: String,
/// Ordered anime provider chain for search/fetch (`kitsu`, `anilist`).
Expand Down Expand Up @@ -425,6 +444,24 @@ impl AppConfig {
.ok().and_then(|v| v.parse().ok()).unwrap_or(false),
disable_dmm_hashlist_scraper: env("DISABLE_DMM_HASHLIST_SCRAPER")
.ok().and_then(|v| v.parse().ok()).unwrap_or(false),
dmm_hashlist_repo_owner: env("DMM_HASHLIST_REPO_OWNER")
.unwrap_or_else(|_| "debridmediamanager".into()),
dmm_hashlist_repo_name: env("DMM_HASHLIST_REPO_NAME")
.unwrap_or_else(|_| "hashlists".into()),
dmm_hashlist_branch: env("DMM_HASHLIST_BRANCH")
.unwrap_or_else(|_| "main".into()),
dmm_hashlist_commits_per_run: env("DMM_HASHLIST_COMMITS_PER_RUN")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(20),
dmm_hashlist_backfill_commits_per_run: env("DMM_HASHLIST_BACKFILL_COMMITS_PER_RUN")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(20),
dmm_hashlist_github_token: env("DMM_HASHLIST_GITHUB_TOKEN")
.ok()
.filter(|s| !s.is_empty())
.or_else(|| env("GITHUB_TOKEN").ok().filter(|s| !s.is_empty())),
is_scrap_from_public_indexers: env("IS_SCRAP_FROM_PUBLIC_INDEXERS")
.ok().and_then(|v| v.parse().ok()).unwrap_or(true),
is_scrap_from_public_usenet_indexers: env("IS_SCRAP_FROM_PUBLIC_USENET_INDEXERS")
Expand Down Expand Up @@ -650,6 +687,24 @@ impl AppConfig {
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(true),
imdb_datasets_base_url: env("IMDB_DATASETS_BASE_URL")
.unwrap_or_else(|_| "https://datasets.imdbws.com".into())
.trim_end_matches('/')
.to_string(),
imdb_import_include_adult: env("IMDB_IMPORT_INCLUDE_ADULT")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(false),
imdb_import_datasets: env("IMDB_IMPORT_DATASETS")
.ok()
.filter(|s| !s.is_empty())
.map(|s| {
s.split(',')
.map(|p| p.trim().to_ascii_lowercase())
.filter(|p| !p.is_empty())
.collect()
})
.unwrap_or_default(),
metadata_primary_source: env("METADATA_PRIMARY_SOURCE")
.unwrap_or_else(|_| "imdb".into())
.to_lowercase(),
Expand Down
27 changes: 24 additions & 3 deletions backend/src/db/media.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub struct MediaCandidate {
pub media_id: MediaId,
pub title: String,
pub year: Option<i32>,
pub end_year: Option<i32>,
pub imdb_id: Option<String>,
pub tmdb_id: Option<String>,
pub tvdb_id: Option<String>,
Expand All @@ -33,6 +34,7 @@ pub async fn search_media_candidates(
m.id AS media_id,
m.title,
m.year,
EXTRACT(YEAR FROM m.end_date)::int AS end_year,
MAX(CASE WHEN mei.provider = 'imdb' THEN mei.external_id END) AS imdb_id,
MAX(CASE WHEN mei.provider = 'tmdb' THEN mei.external_id END) AS tmdb_id,
MAX(CASE WHEN mei.provider = 'tvdb' THEN mei.external_id END) AS tvdb_id
Expand All @@ -41,9 +43,16 @@ pub async fn search_media_candidates(
ON mei.media_id = m.id
AND mei.provider IN ('imdb', 'tmdb', 'tvdb')
WHERE m.type = $1
AND m.title_tsv @@ plainto_tsquery('simple', $2)
GROUP BY m.id, m.title, m.year
ORDER BY m.popularity DESC NULLS LAST
AND (
m.title_tsv @@ plainto_tsquery('simple', $2)
OR EXISTS (
SELECT 1 FROM aka_title at
WHERE at.media_id = m.id
AND at.title_tsv @@ plainto_tsquery('simple', $2)
)
)
GROUP BY m.id, m.title, m.year, m.end_date
ORDER BY m.popularity DESC NULLS LAST, m.id
LIMIT 12
"#,
)
Expand All @@ -57,6 +66,18 @@ pub async fn search_media_candidates(
})
}

/// Load alternate titles for metadata matching (e.g. IMDB AKAs).
pub async fn load_aka_titles(pool: &PgPool, media_id: MediaId) -> Vec<String> {
sqlx::query_scalar("SELECT title FROM aka_title WHERE media_id = $1")
.bind(media_id)
.fetch_all(pool)
.await
.unwrap_or_else(|e| {
warn!("load_aka_titles media_id={media_id}: {e}");
vec![]
})
}

/// Fetch title, year, and imdb_id for a known media_id.
/// Returns None if the row doesn't exist.
pub async fn get_media_meta(
Expand Down
3 changes: 2 additions & 1 deletion backend/src/db/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ pub mod torznab;
pub mod watch_history;

pub use media::{
get_media_id_by_external_id, get_media_meta, resolve_media_ids, search_media_candidates,
get_media_id_by_external_id, get_media_meta, load_aka_titles, resolve_media_ids,
search_media_candidates,
};
pub use streams::{
fetch_acestream_streams_bulk, fetch_http_streams_bulk, fetch_stream_playback_info,
Expand Down
7 changes: 5 additions & 2 deletions backend/src/jobs/handlers/background_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use serde::Deserialize;
use tracing::{debug, warn};

use crate::{
db::MediaType,
jobs::{
error::JobError,
handler::{JobCtx, JobHandler},
Expand Down Expand Up @@ -44,9 +45,10 @@ struct MediaRow {
async fn lookup_movie(pool: &sqlx::PgPool, id: &str) -> Option<MediaRow> {
let parsed_id: i32 = id.parse().ok()?;
sqlx::query_as::<_, (i32, String, Option<i32>)>(
"SELECT id, title, year FROM media WHERE id = $1 AND type = 'movie'",
"SELECT id, title, year FROM media WHERE id = $1 AND type = $2",
)
.bind(parsed_id)
.bind(MediaType::Movie)
.fetch_optional(pool)
.await
.ok()?
Expand All @@ -59,9 +61,10 @@ async fn lookup_movie(pool: &sqlx::PgPool, id: &str) -> Option<MediaRow> {

async fn lookup_series_media(pool: &sqlx::PgPool, media_id: i32) -> Option<MediaRow> {
sqlx::query_as::<_, (i32, String, Option<i32>)>(
"SELECT id, title, year FROM media WHERE id = $1 AND type = 'series'",
"SELECT id, title, year FROM media WHERE id = $1 AND type = $2",
)
.bind(media_id)
.bind(MediaType::Series)
.fetch_optional(pool)
.await
.ok()?
Expand Down
Loading
Loading