|
| 1 | +# Calibre-Web Automated – fork of Calibre-Web |
| 2 | +# Copyright (C) 2018-2025 Calibre-Web contributors |
| 3 | +# Copyright (C) 2024-2025 Calibre-Web Automated contributors |
| 4 | +# SPDX-License-Identifier: GPL-3.0-or-later |
| 5 | +# See CONTRIBUTORS for full list of authors. |
| 6 | + |
| 7 | +from flask import Blueprint |
| 8 | +from flask_babel import gettext as _ |
| 9 | +from sqlalchemy import func, and_ |
| 10 | + |
| 11 | +from . import db, calibre_db, logger |
| 12 | +from .admin import admin_required |
| 13 | +from .usermanagement import login_required_if_no_ano |
| 14 | +from .render_template import render_title_template |
| 15 | +from .cw_login import current_user |
| 16 | + |
| 17 | +duplicates = Blueprint('duplicates', __name__) |
| 18 | +log = logger.create() |
| 19 | + |
| 20 | + |
| 21 | +@duplicates.route("/duplicates") |
| 22 | +@login_required_if_no_ano |
| 23 | +@admin_required |
| 24 | +def show_duplicates(): |
| 25 | + """Display books with duplicate titles and authors""" |
| 26 | + print("[cwa-duplicates] Loading duplicates page...", flush=True) |
| 27 | + log.info("[cwa-duplicates] Loading duplicates page for user: %s", current_user.name) |
| 28 | + |
| 29 | + try: |
| 30 | + # Use SQL to efficiently find duplicates with proper user filtering |
| 31 | + duplicate_groups = find_duplicate_books() |
| 32 | + |
| 33 | + print(f"[cwa-duplicates] Found {len(duplicate_groups)} duplicate groups total", flush=True) |
| 34 | + log.info("[cwa-duplicates] Found %s duplicate groups total", len(duplicate_groups)) |
| 35 | + |
| 36 | + return render_title_template('duplicates.html', |
| 37 | + duplicate_groups=duplicate_groups, |
| 38 | + title=_("Duplicate Books"), |
| 39 | + page="duplicates") |
| 40 | + |
| 41 | + except Exception as e: |
| 42 | + print(f"[cwa-duplicates] Critical error loading duplicates page: {str(e)}", flush=True) |
| 43 | + log.error("[cwa-duplicates] Critical error loading duplicates page: %s", str(e)) |
| 44 | + # Return empty page on error |
| 45 | + return render_title_template('duplicates.html', |
| 46 | + duplicate_groups=[], |
| 47 | + title=_("Duplicate Books"), |
| 48 | + page="duplicates") |
| 49 | + |
| 50 | + |
| 51 | +def find_duplicate_books(): |
| 52 | + """Find books with duplicate title + primary author combinations using efficient SQL""" |
| 53 | + |
| 54 | + # Get all books with proper user filtering - this is much simpler and more reliable |
| 55 | + # than trying to do complex joins for duplicate detection |
| 56 | + books_query = (calibre_db.session.query(db.Books) |
| 57 | + .filter(calibre_db.common_filters()) # Respect user permissions and library filtering |
| 58 | + .order_by(db.Books.title, db.Books.timestamp.desc())) |
| 59 | + |
| 60 | + all_books = books_query.all() |
| 61 | + print(f"[cwa-duplicates] Retrieved {len(all_books)} books with user filtering applied", flush=True) |
| 62 | + |
| 63 | + # Group books by title + primary author combination (case-insensitive) |
| 64 | + title_author_groups = {} |
| 65 | + |
| 66 | + for book in all_books: |
| 67 | + # Ensure authors are loaded (lazy loading) |
| 68 | + if not book.authors: |
| 69 | + continue |
| 70 | + |
| 71 | + # Get primary author (use Calibre-Web's standard approach) |
| 72 | + book.ordered_authors = calibre_db.order_authors([book]) |
| 73 | + primary_author = book.ordered_authors[0].name if book.ordered_authors else "Unknown" |
| 74 | + |
| 75 | + # Create case-insensitive key |
| 76 | + key = (book.title.lower().strip(), primary_author.lower().strip()) |
| 77 | + |
| 78 | + if key not in title_author_groups: |
| 79 | + title_author_groups[key] = [] |
| 80 | + title_author_groups[key].append(book) |
| 81 | + |
| 82 | + print(f"[cwa-duplicates] Grouped books into {len(title_author_groups)} unique title+author combinations", flush=True) |
| 83 | + |
| 84 | + # Filter to only groups with duplicates and prepare display data |
| 85 | + duplicate_groups = [] |
| 86 | + for (lower_title, lower_author), books in title_author_groups.items(): |
| 87 | + if len(books) > 1: |
| 88 | + # Sort books by timestamp (newest first) |
| 89 | + books.sort(key=lambda x: x.timestamp, reverse=True) |
| 90 | + |
| 91 | + # Add additional information for display |
| 92 | + for book in books: |
| 93 | + # Ensure we have ordered authors |
| 94 | + if not hasattr(book, 'ordered_authors') or not book.ordered_authors: |
| 95 | + book.ordered_authors = calibre_db.order_authors([book]) |
| 96 | + |
| 97 | + book.author_names = ', '.join([author.name.replace('|', ',') for author in book.ordered_authors]) |
| 98 | + |
| 99 | + # Add cover URL |
| 100 | + if book.has_cover: |
| 101 | + book.cover_url = f"/cover/{book.id}" |
| 102 | + else: |
| 103 | + book.cover_url = "/static/generic_cover.jpg" |
| 104 | + |
| 105 | + duplicate_groups.append({ |
| 106 | + 'title': books[0].title, |
| 107 | + 'author': books[0].author_names.split(',')[0].strip(), # Primary author |
| 108 | + 'count': len(books), |
| 109 | + 'books': books |
| 110 | + }) |
| 111 | + |
| 112 | + book_ids = [book.id for book in books] |
| 113 | + print(f"[cwa-duplicates] Found duplicate group: '{books[0].title}' by {books[0].author_names.split(',')[0].strip()} ({len(books)} copies) - IDs: {book_ids}", flush=True) |
| 114 | + log.info("[cwa-duplicates] Found duplicate group: '%s' by %s (%s copies) - IDs: %s", |
| 115 | + books[0].title, books[0].author_names.split(',')[0].strip(), len(books), book_ids) |
| 116 | + |
| 117 | + # Sort by title, then author for consistent display |
| 118 | + duplicate_groups.sort(key=lambda x: (x['title'].lower(), x['author'].lower())) |
| 119 | + |
| 120 | + print(f"[cwa-duplicates] Found {len(duplicate_groups)} duplicate groups total", flush=True) |
| 121 | + |
| 122 | + return duplicate_groups |
0 commit comments