Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5108f53
Add PGS to SRT OCR conversion feature
mikeSGman Oct 15, 2025
1c6c486
Address PR review feedback
mikeSGman Oct 19, 2025
f5ddccc
Fix OCR conversion for files with special characters in path
mikeSGman Oct 19, 2025
5dd7627
Add pgsrip metadata to PyInstaller builds
mikeSGman Oct 20, 2025
c1d63d1
Include babelfish data files in PyInstaller builds
mikeSGman Oct 20, 2025
39b1a5f
Include cleanit metadata in PyInstaller builds
mikeSGman Oct 20, 2025
964ce3c
Include cleanit data files in PyInstaller builds
mikeSGman Oct 20, 2025
4f8e347
Include trakit metadata in PyInstaller builds
mikeSGman Oct 20, 2025
aacb011
Add OCR dependencies to pyproject.toml
mikeSGman Oct 30, 2025
9bd98ea
Add babelfish converter submodules as hidden imports
mikeSGman Oct 30, 2025
fdee985
Add MKVToolNix directory to PATH for pgsrip
mikeSGman Oct 30, 2025
c7fcaa1
Run pgsrip from video directory to avoid Windows path issues
mikeSGman Oct 30, 2025
61e9735
Add test script and use POSIX paths for pgsrip
mikeSGman Oct 30, 2025
d967c82
Update test script with tesseract/mkvextract paths
mikeSGman Oct 30, 2025
5060f88
Fix tesseract path for Subtitle Edit installation
mikeSGman Oct 30, 2025
613c64f
Use Tesseract 5.5.0 for testing
mikeSGman Oct 30, 2025
d7e7a49
Detect Tesseract from Subtitle Edit and prioritize newest version
mikeSGman Oct 30, 2025
54376c9
Add detection test script
mikeSGman Oct 30, 2025
90a64ba
Add debug logging for pgsrip
mikeSGman Oct 30, 2025
532d855
Set OCR tool paths at app startup for PyInstaller compatibility
mikeSGman Oct 30, 2025
4560654
Enable keep_temp_files for debugging PyInstaller temp folder issue
mikeSGman Oct 30, 2025
211b08b
Ensure TEMP/TMP env vars are set for PyInstaller
mikeSGman Oct 30, 2025
8ecb3ad
Remove invalid keep_temp_files parameter
mikeSGman Oct 30, 2025
ef3dcf9
Monkey-patch pgsrip for PyInstaller temp folder compatibility
mikeSGman Oct 30, 2025
953c967
Fix pgsrip monkey-patch to apply before Mkv import
mikeSGman Oct 30, 2025
f8893f6
Move pgsrip monkey-patch to app startup in __main__.py
mikeSGman Oct 30, 2025
b7884e4
Apply pgsrip patch after environment setup
mikeSGman Oct 30, 2025
1ba9941
Add debug output to verify pgsrip patch is applied
mikeSGman Oct 30, 2025
ddaae55
Revert to simpler pgsrip usage - works from source
mikeSGman Oct 30, 2025
835607e
Document known PyInstaller limitation for PGS OCR
mikeSGman Oct 30, 2025
2f89be5
Add PGS to SRT OCR subtitle extraction
mikeSGman Oct 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions FastFlix_Windows_Installer.spec
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- mode: python ; coding: utf-8 -*-
from PyInstaller.utils.hooks import collect_submodules
from PyInstaller.utils.hooks import collect_submodules, copy_metadata, collect_data_files
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not know about those functions, handy!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Just testing some final changes. I had to deal with detection for Subtitle Edit's tesseract installations. It works locally, testing a build now.

import toml

block_cipher = None
Expand All @@ -24,9 +24,12 @@ all_imports.remove("python-box")
all_imports.append("box")
all_imports.append("iso639")

# Add pgsrip for OCR support
all_imports.extend(["pgsrip", "pytesseract", "cv2", "numpy", "pysrt", "babelfish", "babelfish.converters", "babelfish.converters.alpha2", "babelfish.converters.alpha3b", "babelfish.converters.alpha3t", "babelfish.converters.name", "babelfish.converters.opensubtitles", "cleanit"])

a = Analysis(['fastflix\\__main__.py'],
binaries=[],
datas=[('CHANGES', 'fastflix\\.'), ('docs\\build-licenses.txt', 'docs')] + all_fastflix_files,
datas=[('CHANGES', 'fastflix\\.'), ('docs\\build-licenses.txt', 'docs')] + all_fastflix_files + copy_metadata('pgsrip') + copy_metadata('pytesseract') + copy_metadata('babelfish') + copy_metadata('cleanit') + copy_metadata('trakit') + collect_data_files('babelfish') + collect_data_files('cleanit'),
hiddenimports=all_imports,
hookspath=[],
runtime_hooks=[],
Expand Down
7 changes: 5 additions & 2 deletions FastFlix_Windows_OneFile.spec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import toml

from PyInstaller.utils.hooks import collect_submodules
from PyInstaller.utils.hooks import collect_submodules, copy_metadata, collect_data_files

block_cipher = None

Expand All @@ -27,13 +27,16 @@ all_imports.remove("python-box")
all_imports.append("box")
all_imports.append("iso639")

# Add pgsrip for OCR support
all_imports.extend(["pgsrip", "pytesseract", "cv2", "numpy", "pysrt", "babelfish", "babelfish.converters", "babelfish.converters.alpha2", "babelfish.converters.alpha3b", "babelfish.converters.alpha3t", "babelfish.converters.name", "babelfish.converters.opensubtitles", "cleanit"])

portable_file = "fastflix\\portable.py"
with open(portable_file, "w") as portable:
portable.write(" ")

a = Analysis(['fastflix\\__main__.py'],
binaries=[],
datas=[('CHANGES', 'fastflix\\.'), ('docs\\build-licenses.txt', 'docs')] + all_fastflix_files,
datas=[('CHANGES', 'fastflix\\.'), ('docs\\build-licenses.txt', 'docs')] + all_fastflix_files + copy_metadata('pgsrip') + copy_metadata('pytesseract') + copy_metadata('babelfish') + copy_metadata('cleanit') + copy_metadata('trakit') + collect_data_files('babelfish') + collect_data_files('cleanit'),
hiddenimports=all_imports,
hookspath=[],
runtime_hooks=[],
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,17 @@ Windows: Go into FastFlix's settings and select the corresponding EXE file for e

Linux: Install the rpm or deb and restart FastFlix

# Subtitle Extraction

FastFlix can extract subtitles from video files in various formats (SRT, ASS, SSA, PGS). For PGS (Presentation Graphic Stream) subtitles, FastFlix can perform OCR conversion to SRT format.

## PGS to SRT OCR

**Requirements**:
- Tesseract OCR 4.x or higher (auto-detected from PATH or Subtitle Edit installations)
- MKVToolNix (mkvextract, mkvmerge) (auto-detected from standard install locations)
- pgsrip Python library (included in FastFlix)

# HDR

On any 10-bit or higher video output, FastFlix will copy the input HDR colorspace (bt2020). Which is [different than HDR10 or HDR10+](https://codecalamity.com/hdr-hdr10-hdr10-hlg-and-dolby-vision/).
Expand Down
133 changes: 133 additions & 0 deletions WINDOWS_BUILD.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Building FastFlix on Windows

This guide explains how to build FastFlix executables on Windows.

## Prerequisites

1. **Python 3.12 or higher**
- Download from [python.org](https://www.python.org/downloads/)
- Make sure to check "Add Python to PATH" during installation

2. **Git** (to clone/update the repository)
- Download from [git-scm.com](https://git-scm.com/download/win)

## Build Steps

### 1. Open Command Prompt or PowerShell

Navigate to where you want to clone/have the FastFlix repository:

```bash
cd C:\path\to\your\projects
git clone https://github.com/cdgriffith/FastFlix.git
cd FastFlix
```

Or if you already have it:

```bash
cd C:\path\to\FastFlix
```

### 2. Create and Activate Virtual Environment

```bash
python -m venv venv
venv\Scripts\activate
```

You should see `(venv)` in your command prompt.

### 3. Install Dependencies

```bash
pip install --upgrade pip
pip install -e ".[dev]"
```

This installs FastFlix in editable mode with all development dependencies including PyInstaller.

### 4. Build the Executable

You have two options:

#### Option A: Single Executable (Recommended for distribution)

```bash
pyinstaller FastFlix_Windows_OneFile.spec
```

The executable will be in: `dist\FastFlix.exe`

#### Option B: Directory with Multiple Files (Faster startup)

```bash
pyinstaller FastFlix_Windows_Installer.spec
```

The executable will be in: `dist\FastFlix\FastFlix.exe`

### 5. Test the Build

```bash
cd dist
FastFlix.exe
```

Or for the installer version:

```bash
cd dist\FastFlix
FastFlix.exe
```

## Running Without Building (For Testing)

If you just want to test changes without building an executable:

```bash
python -m fastflix
```

## Troubleshooting

### Missing Dependencies

If you get import errors, try reinstalling:

```bash
pip install --upgrade --force-reinstall -e ".[dev]"
```

### Build Errors

1. Make sure you're in the FastFlix root directory
2. Ensure the virtual environment is activated (you see `(venv)`)
3. Try deleting `build` and `dist` folders and rebuilding:

```bash
rmdir /s /q build dist
pyinstaller FastFlix_Windows_OneFile.spec
```

### FFmpeg Not Found

The FastFlix executable doesn't include FFmpeg. You need to:

1. Download FFmpeg from [ffmpeg.org](https://ffmpeg.org/download.html#build-windows)
2. Extract it somewhere
3. Add the `bin` folder to your PATH, or configure it in FastFlix settings

## Known Limitations

### PGS to SRT OCR (PyInstaller builds)

Due to an upstream issue in pgsrip v0.1.12, PGS to SRT OCR conversion does not work in PyInstaller-built executables. The feature works perfectly when running from source (`python -m fastflix`).

If you need PGS OCR functionality, please run FastFlix from source instead of using the compiled executable.

## Notes

- The build process creates a `portable.py` file temporarily (it's removed after)
- The `.spec` files automatically collect all dependencies from `pyproject.toml`
- The icon is located at `fastflix\data\icon.ico`
27 changes: 27 additions & 0 deletions fastflix/__main__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
# -*- coding: utf-8 -*-
import os
import sys
import traceback
from multiprocessing import freeze_support
from pathlib import Path

from fastflix.entry import main


def setup_ocr_environment():
"""Set up environment variables for OCR tools early in app startup.

This is necessary for PyInstaller frozen executables where os.environ
modifications later in the code don't properly propagate to subprocesses.
"""
from fastflix.models.config import find_ocr_tool

# Find tesseract and add to PATH
tesseract_path = find_ocr_tool("tesseract")
if tesseract_path:
tesseract_dir = str(Path(tesseract_path).parent)
os.environ["PATH"] = f"{tesseract_dir}{os.pathsep}{os.environ.get('PATH', '')}"
os.environ["TESSERACT_CMD"] = str(tesseract_path)

# Find mkvmerge and add MKVToolNix to PATH
mkvmerge_path = find_ocr_tool("mkvmerge")
if mkvmerge_path:
mkvtoolnix_dir = str(Path(mkvmerge_path).parent)
os.environ["PATH"] = f"{mkvtoolnix_dir}{os.pathsep}{os.environ.get('PATH', '')}"


def start_fastflix():
exit_code = 2
portable_mode = True
Expand All @@ -17,6 +41,9 @@ def start_fastflix():
if portable_mode:
print("PORTABLE MODE DETECTED: now using local config file and workspace in same directory as the executable")

# Set up OCR environment variables early for PyInstaller compatibility
setup_ocr_environment()

try:
exit_code = main(portable_mode)
except Exception:
Expand Down
115 changes: 115 additions & 0 deletions fastflix/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,115 @@ def where(filename: str, portable_mode=False) -> Path | None:
return None


def find_ocr_tool(name):
"""Find OCR tools (tesseract, mkvmerge, pgsrip) similar to how we find FFmpeg"""
# Check environment variable
if ocr_location := os.getenv(f"FF_{name.upper()}"):
return Path(ocr_location).absolute()

# Check system PATH
if (ocr_location := shutil.which(name)) is not None:
return Path(ocr_location).absolute()

# Special handling for tesseract on Windows (not in PATH by default)
if name == "tesseract" and win_based:
# Check common install locations using environment variables
localappdata = os.getenv("LOCALAPPDATA")
appdata = os.getenv("APPDATA")
program_files = os.getenv("PROGRAMFILES")
program_files_x86 = os.getenv("PROGRAMFILES(X86)")

# Check for Subtitle Edit's Tesseract installations and find the newest version
subtitle_edit_versions = []
if appdata:
subtitle_edit_dir = Path(appdata) / "Subtitle Edit"
if subtitle_edit_dir.exists():
# Find all Tesseract* directories
for tesseract_dir in subtitle_edit_dir.glob("Tesseract*"):
tesseract_exe = tesseract_dir / "tesseract.exe"
if tesseract_exe.exists():
# Extract version number from directory name (e.g., Tesseract550 -> 550)
version_str = tesseract_dir.name.replace("Tesseract", "")
try:
version = int(version_str)
subtitle_edit_versions.append((version, tesseract_exe))
except ValueError:
# If we can't parse version, still add it with version 0
subtitle_edit_versions.append((0, tesseract_exe))

# If we found Subtitle Edit versions, return the newest one
if subtitle_edit_versions:
subtitle_edit_versions.sort(reverse=True) # Sort by version descending
return subtitle_edit_versions[0][1]

common_paths = []
# Check user-local installation first
if localappdata:
common_paths.append(Path(localappdata) / "Programs" / "Tesseract-OCR" / "tesseract.exe")
# Check system-wide installations
if program_files:
common_paths.append(Path(program_files) / "Tesseract-OCR" / "tesseract.exe")
if program_files_x86:
common_paths.append(Path(program_files_x86) / "Tesseract-OCR" / "tesseract.exe")

for path in common_paths:
if path.exists():
return path

# Check Windows registry for Tesseract install location
try:
import winreg

# Try HKEY_LOCAL_MACHINE first (system-wide install)
for root_key in [winreg.HKEY_LOCAL_MACHINE, winreg.HKEY_CURRENT_USER]:
try:
key = winreg.OpenKey(root_key, r"SOFTWARE\Tesseract-OCR")
install_path = winreg.QueryValueEx(key, "InstallDir")[0]
winreg.CloseKey(key)
tesseract_exe = Path(install_path) / "tesseract.exe"
if tesseract_exe.exists():
return tesseract_exe
except (FileNotFoundError, OSError):
pass
except ImportError:
pass

# Special handling for mkvmerge on Windows
if name == "mkvmerge" and win_based:
# Check common install locations using environment variables
localappdata = os.getenv("LOCALAPPDATA")
program_files = os.getenv("PROGRAMFILES")
program_files_x86 = os.getenv("PROGRAMFILES(X86)")

common_paths = []
# Check user-local installation first
if localappdata:
common_paths.append(Path(localappdata) / "Programs" / "MKVToolNix" / "mkvmerge.exe")
# Check system-wide installations
if program_files:
common_paths.append(Path(program_files) / "MKVToolNix" / "mkvmerge.exe")
if program_files_x86:
common_paths.append(Path(program_files_x86) / "MKVToolNix" / "mkvmerge.exe")

for path in common_paths:
if path.exists():
return path

# Check in FastFlix OCR tools folder
ocr_folder = Path(user_data_dir("FastFlix_OCR", appauthor=False, roaming=True))
if ocr_folder.exists():
for file in ocr_folder.iterdir():
if file.is_file() and file.name.lower() in (name, f"{name}.exe"):
return file
# Check bin subfolder
if (ocr_folder / "bin").exists():
for file in (ocr_folder / "bin").iterdir():
if file.is_file() and file.name.lower() in (name, f"{name}.exe"):
return file

return None


class Config(BaseModel):
version: str = __version__
config_path: Path = Field(default_factory=get_config)
Expand Down Expand Up @@ -168,6 +277,12 @@ class Config(BaseModel):

disable_cover_extraction: bool = False

# PGS to SRT OCR Settings
enable_pgs_ocr: bool = False
tesseract_path: Path | None = Field(default_factory=lambda: find_ocr_tool("tesseract"))
mkvmerge_path: Path | None = Field(default_factory=lambda: find_ocr_tool("mkvmerge"))
pgs_ocr_language: str = "eng"

def encoder_opt(self, profile_name, profile_option_name):
encoder_settings = getattr(self.profiles[self.selected_profile], profile_name)
if encoder_settings:
Expand Down
Loading