diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..e71de4fa --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,18 @@ +name: SIR Tests + +on: + push: + branches: [ master ] + pull_request: + branches: [ '*' ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: ./test.sh diff --git a/Dockerfile b/Dockerfile index 0c80ca50..7ad3390e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -ARG PYTHON_VERSION=2.7 -ARG BASE_IMAGE_DATE=20220421 +ARG PYTHON_VERSION=3.13 +ARG BASE_IMAGE_DATE=20250313 FROM metabrainz/python:$PYTHON_VERSION-$BASE_IMAGE_DATE ARG SIR_VERSION @@ -26,6 +26,7 @@ RUN apt-get update && \ curl \ git \ gnupg \ + libz-dev \ libpq-dev \ libffi-dev \ libssl-dev \ @@ -39,9 +40,10 @@ RUN mkdir -p /usr/local/share/keyrings && \ gpg --no-default-keyring --keyring /tmp/postgres-keyring.gpg --import /tmp/postgres-key.asc && \ gpg --no-default-keyring --keyring /tmp/postgres-keyring.gpg --export --output /usr/local/share/keyrings/apt.postgresql.org.gpg && \ rm -f /tmp/postgres-key.asc /tmp/postgres-keyring.gpg -ENV PG_MAJOR 12 -RUN echo 'deb [signed-by=/usr/local/share/keyrings/apt.postgresql.org.gpg] http://apt.postgresql.org/pub/repos/apt/ focal-pgdg main' $PG_MAJOR > /etc/apt/sources.list.d/pgdg.list -RUN apt-get update && \ +ENV PG_MAJOR 17 +RUN . /etc/os-release && \ + echo "deb [signed-by=/usr/local/share/keyrings/apt.postgresql.org.gpg] https://apt.postgresql.org/pub/repos/apt/ ${VERSION_CODENAME}-pgdg main ${PG_MAJOR}" > /etc/apt/sources.list.d/pgdg.list && \ + apt-get update && \ apt-get install -y postgresql-client-$PG_MAJOR && \ rm -rf /var/lib/apt/lists/* # Specifying password so that client doesn't ask scripts for it... diff --git a/RELEASING.md b/RELEASING.md new file mode 100644 index 00000000..a9f44ffd --- /dev/null +++ b/RELEASING.md @@ -0,0 +1,124 @@ +# Search Index Rebuilder (SIR) release process + +> Preamble: +> This document covers steps for releasing new versions of Search Index +> Rebuilder which is performed by maintainers only. +> It includes discussion about private servers, repositories and tools which +> other contributors don’t have access to. +> It is made public for transparency and to allow for improvement suggestions. + +## Table of contents + + + +- [Prerequisites](#prerequisites) +- [Interdependencies](#interdependencies) +- [Update SQL trigger files](#update-sql-trigger-files) +- [Prepare Jira tickets](#prepare-jira-tickets) +- [Prepare GitHub release notes](#prepare-github-release-notes) +- [Add Git tag](#add-git-tag) +- [Build Docker image](#build-docker-image) +- [Deploy to production](#deploy-to-production) +- [Push Git tag](#push-git-tag) +- [Publish GitHub release notes](#publish-github-release-notes) +- [Update Jira tickets](#update-jira-tickets) + + + +## Prerequisites + +* Docker +* Git + +## Interdependencies + +Both repositories [`mb-solr`](https://github.com/metabrainz/mb-solr) +and [`mbsssss`](https://github.com/metabrainz/mbsssss) +may have to be updated at the same time. +Make sure to keep those working with SIR in any case. + +The repository [`musicbrainz-docker`](https://github.com/metabrainz/musicbrainz-docker) +can be used to test everything together locally. +The website [`test.mb.o`](https://test.musicbrainz.org/) +can be used to get community feedback as well if needed. + +## Update SQL trigger files + +Assuming that you followed development setup for +[local development of Search Index Rebuilder](https://github.com/metabrainz/musicbrainz-docker#local-development-of-search-index-rebuilder), +run the following commands in the `indexer` service: + +```sh +python -m sir triggers -bid 2 +./GenerateDropSql.pl +``` + +## Prepare Jira tickets + +* Make sure that `sir-next` is an unreleased version of “Indexer” component in + [SEARCH component versions](https://tickets.metabrainz.org/projects/SEARCH?selectedItem=net.brokenbuild.subcomponents:component-versions-organizer); + Otherwise add it with “next release” as description. +* Make sure that noticeable changes are covered by appropriate +[tickets in the “Indexer” component of the “SEARCH” project marked as “In Development Branch”](https://tickets.metabrainz.org/issues/?jql=project%20%3D%20SEARCH%20AND%20component%20%3D%20Indexer%20AND%20status%20%3D%20%22In%20Development%20Branch%22); + Otherwise create/split/update tickets as needed. +* Set their _Fix Version_ field to `sir-next`. + +## Prepare GitHub release notes + +* Draft a new release at ; +* Set [semantic version](https://semver.org/) number `M.m.p` for release title. + A new major version `M` is required when SIR cannot be updated independently of other search components (See below); +* Copy the formatted list of resolved tickets from [unreleased SEARCH versions](https://tickets.metabrainz.org/projects/SEARCH?selectedItem=com.atlassian.jira.jira-projects-plugin%3Arelease-page&status=unreleased) (by clicking on `sir-next`, then on “Release Notes“) to the description; +* Add an introductive section “New Requirements” i, + especially if a new version is required for any of the following: + - MusicBrainz database schema (`musicbrainz-server` [schema-change code](https://github.com/metabrainz/musicbrainz-server/tree/master/admin/sql/updates/schema-change) and MBDB schema [documentation](https://musicbrainz.org/doc/MusicBrainz_Database/Schema)), + - MusicBrainz XML metadata schema (`mmd-schema` [releases](https://github.com/metabrainz/mmd-schema/releases)), + - its associated Python bindings (`mb-rngpy` [tags](https://github.com/metabrainz/mb-rngpy/tags)), + - MusicBrainz Solr search schema (`mbsssss` [releases](https://github.com/metabrainz/mbsssss/releases)), + - and its associated MusicBrainz Solr query response writer (`mb-solr` [releases](https://github.com/metabrainz/mb-solr/releases)); +* Add update instructions if needed (to reinstall triggers or rebuild any search index); +* Add task list items to cover other (supposedly unnoticeable) changes. + +## Add Git tag + +For version `M.m.p`: + +```sh +git status +# Please verify that the clone is on branch master without any local change +git pull --ff-only +git tag 'vM.m.p' -m 'One-line summary' +``` + +## Build Docker image + +```sh +docker/push.sh +``` + +Please verify that a new image tag (_M.m.p_`-git2consul`) is available from +from . + +## Deploy to production + +Point deployment configuration to the new image and follow update instructions if any. + +## Push Git tag + +```sh +git push origin 'vM.m.d' +``` + +## Publish GitHub release notes + +Choose the above pushed tag for the above drafted release and publish it. + +## Update Jira tickets + +1. Edit `sir-next` from [SEARCH component versions](https://tickets.metabrainz.org/projects/SEARCH?selectedItem=net.brokenbuild.subcomponents:component-versions-organizer) (in “Indexer” component) as follows: + - Change name to `sir-`_M.m.p_ + - Set release date + - Replace description with the GitHub release URL; +2. Close tickets for this version from [SEARCH releases](https://tickets.metabrainz.org/projects/SEARCH?selectedItem=com.atlassian.jira.jira-projects-plugin:release-page&status=released-unreleased); +3. Mark it as released; +4. Archive the previous `sir-`_*_ version. diff --git a/config.ini.ctmpl b/config.ini.ctmpl index 86c1dcac..64ba8114 100644 --- a/config.ini.ctmpl +++ b/config.ini.ctmpl @@ -29,9 +29,12 @@ user = {{template "KEY" "db/user"}} [solr] uri = {{template "KEY" "solr/uri"}} batch_size = {{template "KEY" "solr/batch_size"}} +retries = {{template "KEY" "solr/retries"}} +backoff_factor = {{template "KEY" "solr/backoff_factor"}} {{- with $rabbitmq_service_name := or (env "RABBITMQ_SERVICE") "rabbitmq"}} {{- if service $rabbitmq_service_name}} + [rabbitmq] {{- with index (service $rabbitmq_service_name) 0}} host = {{.Address}}:{{.Port}} diff --git a/config.ini.example b/config.ini.example index 9f4ee4ab..e0fa01a7 100644 --- a/config.ini.example +++ b/config.ini.example @@ -8,6 +8,8 @@ user = musicbrainz [solr] uri = http://127.0.0.1:8983/solr batch_size = 60 +retries = 3 +backoff_factor = 1 [sir] import_threads = 2 diff --git a/config.test.ini b/config.test.ini new file mode 100644 index 00000000..715d27bb --- /dev/null +++ b/config.test.ini @@ -0,0 +1,27 @@ +[database] +dbname = musicbrainz_test +host = musicbrainz_db +password = ${PGPASSWORD} +port = 5432 +user = musicbrainz + +[solr] +uri = ${SKIP} +batch_size = 60 +retries = 3 +backoff_factor = 1 + +[sir] +import_threads = 2 +query_batch_size = 20000 +wscompat = on + +[rabbitmq] +host = SKIP +user = SKIP +password = SKIP +vhost = SKIP +prefetch_count = 350 + +[sentry] +dsn = SKIP diff --git a/docker/Dockerfile.test b/docker/Dockerfile.test index 93933d6e..e0723e44 100644 --- a/docker/Dockerfile.test +++ b/docker/Dockerfile.test @@ -1,8 +1,12 @@ -FROM metabrainz/python:2.7-20220421 +FROM metabrainz/python:3.13-20250313 RUN mkdir /code WORKDIR /code +ENV DOCKERIZE_VERSION v0.6.1 +RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ + && tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz + # Python dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -10,6 +14,7 @@ RUN apt-get update && \ ca-certificates \ cron \ git \ + libz-dev \ libpq-dev \ libffi-dev \ libssl-dev \ @@ -23,8 +28,10 @@ RUN pip install -r requirements.txt RUN pip install -r requirements_dev.txt COPY . /code/ +RUN cp config.test.ini config.ini -CMD py.test --junitxml=/data/test_report.xml \ +CMD dockerize -wait tcp://musicbrainz_db:5432 -timeout 600s \ + bash -c "pytest --junitxml=/data/test_report.xml \ --cov=sir \ --cov-report xml:/data/coverage.xml \ - --cov-report html:/data/coverage-html + --cov-report html:/data/coverage-html" diff --git a/docker/docker-compose.test.yml b/docker/docker-compose.test.yml index 4ce507d9..563b8a2c 100644 --- a/docker/docker-compose.test.yml +++ b/docker/docker-compose.test.yml @@ -1,8 +1,14 @@ -# Docker Compose file for testing -version: "2" services: test: build: context: .. dockerfile: ./docker/Dockerfile.test + depends_on: + - musicbrainz_db + + musicbrainz_db: + image: metabrainz/musicbrainz-test-database:master + environment: + POSTGRES_HOST_AUTH_METHOD: trust + PGDATA: /var/lib/postgresql-musicbrainz/data diff --git a/docs/source/conf.py b/docs/source/conf.py index fde09a0c..af26f8e5 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -33,8 +33,8 @@ autodoc_default_flags = ["members", "undoc-members", "show-inheritance"] autoclass_content = "both" -intersphinx_mapping = {'python': ('https://docs.python.org/2.7', None), - 'sqla': ('http://docs.sqlalchemy.org/en/rel_1_0/', None), +intersphinx_mapping = {'python': ('https://docs.python.org/3.13', None), + 'sqla': ('https://docs.sqlalchemy.org/en/20/', None), 'solr': ('https://pythonhosted.org//solrpy/', None), 'amqp': ('https://amqp.readthedocs.org/en/latest', None)} diff --git a/docs/source/index.rst b/docs/source/index.rst index b850f7e3..f82e5dc2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,6 +10,7 @@ Contents: usage import queues + service/index api Indices and tables diff --git a/docs/source/service/index.rst b/docs/source/service/index.rst new file mode 100644 index 00000000..3e964762 --- /dev/null +++ b/docs/source/service/index.rst @@ -0,0 +1,6 @@ +.. _service: + +Service maintenance +=================== + +.. include:: rabbitmq.rst diff --git a/docs/source/service/rabbitmq.rst b/docs/source/service/rabbitmq.rst new file mode 100644 index 00000000..9bf8a223 --- /dev/null +++ b/docs/source/service/rabbitmq.rst @@ -0,0 +1,105 @@ +.. _rabbitmq: + +RabbitMQ +-------- + +Maintenance +~~~~~~~~~~~ + +Requirements +++++++++++++ + +* Tolerance to connectivity issues: + When running in watch mode, losing connection to RabbitMQ can make the indexer + to stale indefinitely. + To recover, the container running the indexer has to be manually restarted. + See the ticket `SEARCH-678 `_ + for follow-up on improving tolerance. +* Maintenance mode: + It doesn’t exist. + To perform maintenance operations, it requires switching to another instance + of RabbitMQ to prevent any data loss, even for a short period of time. +* Data importance: + The RabbitMQ instance is conveying notification messages about changes that + must be made to the search indexes. + If any message is lost, all search indexes would have to be rebuilt, + which currently takes hours and implies a downtime for searches. + See the ticket `SEARCH-674 `_ + for follow-up on rebuilding with zero-downtime. +* Data persistence: + Messages are expected to be processed within seconds (or minutes during + activity peaks), so there is no need for persistent volumes. + Losing these messages isn’t critical either as search indexes can be + rebuilt in hours, so there is no need for backups either. + +Procedures +++++++++++ + + +* Start service: + + See :ref:`amqp` + +* Reload service configuration: + + After: + + * Check the indexer logs to ensure that it did not stale and that it continues + to process new messages. + +* Stop service: + + Before: + + * Uninstall search triggers + * Stop the live indexer + + It implies that search indexes will be outdated for good. + Updating search indexes requires to rebuild these and takes hours of downtime. + +* Restart service: + + It implies that search indexes will be likely missing some updates. + Updating search indexes requires to rebuild these and takes hours of downtime. + +* Move service: + + * Create vhost, user, permissions, queues in the new instance + * Declare exchanges and queues as described in :ref:`amqp` + * Update broker in PostgreSQL to point to the new instance + * Once the queues in the old instance are empty, + switch the live indexer to the new instance + + Neiher data loss nor downtime will occur. + +* Remove service: + + Before: + + * Uninstall search triggers + * Stop the live indexer + + It implies that search indexes will be outdated for good. + Updating search indexes requires to rebuild these and takes hours of downtime. + +Implementation details +~~~~~~~~~~~~~~~~~~~~~~ + +* Connectivity issues are reported through both Docker logs and Sentry. +* Producer and consumer are separate as follows: + + * Producer is `pg_amqp` used by triggers in Postgres database. + + * ack mode: transactional + * heartbeat timeout: (not using 0.8 version) + * message protocol version: 0.8 + + * Consumer is `sir` running in watch mode for live indexing. + + * ack mode: basic/manual + * heartbeat timeout: (not configured/server’s default) + * message protocol version: 0.9.1 + +* There are known issues related to queues declaration; See :ref:`amqp` +* Connections are not named properly (just using proxy interface IP and port) + diff --git a/docs/source/setup/amqp.rst b/docs/source/setup/amqp.rst index e1a552eb..e093361b 100644 --- a/docs/source/setup/amqp.rst +++ b/docs/source/setup/amqp.rst @@ -23,6 +23,9 @@ Database Sir requires that you both install an extension into your MusicBrainz database and add triggers to it. +It also requires to have built the materialized (or denormalized) tables +for the MusicBrainz database. + AMQP Extension ++++++++++++++ diff --git a/docs/source/setup/install.rst b/docs/source/setup/install.rst index ce327fa0..0b212941 100644 --- a/docs/source/setup/install.rst +++ b/docs/source/setup/install.rst @@ -14,11 +14,11 @@ You can easily clone the code with git:: Now you can install it system-wide:: - python2 setup.py install + python setup.py install or start hacking on the code. To do that, you'll need to run at least:: - python2 setup version + python setup version once to generate the file ``sir/version.py`` which the code needs. This file does not have to be added into the git repository because it only contains the @@ -29,7 +29,7 @@ Setup The easiest way to run sir at the moment is to use a `virtual environment `_. Once you have virtualenv for Python -2.7 installed, use the following to create the environment:: +3.13 installed, use the following to create the environment:: virtualenv venv source venv/bin/activate diff --git a/docs/source/setup/mbdbschema.rst b/docs/source/setup/mbdbschema.rst index 6ab3a43a..49cf7350 100644 --- a/docs/source/setup/mbdbschema.rst +++ b/docs/source/setup/mbdbschema.rst @@ -2,11 +2,11 @@ MusicBrainz Database Schema ---- Of course you'll need a MusicBrainz database somewhere to read the data from. -The active database schema sequence must be `27` (or any future schema version +The active database schema sequence must be `30` (or any future schema version if still compatible). Follow `announcements`_ from the MetaBrainz blog. -Only Sir `3.y.z` is able to read from database of schema sequence `27` +Only Sir `4.y.z` is able to read from database of schema sequence `30` (or any future schema if still compatible, but it reads and sends the -data made available from schema sequence `27` only). +data made available from schema sequence `30` only). .. _announcements: https://blog.metabrainz.org/category/schema-change-release/ diff --git a/docs/source/setup/solr.rst b/docs/source/setup/solr.rst index e95f0c14..bb8e5391 100644 --- a/docs/source/setup/solr.rst +++ b/docs/source/setup/solr.rst @@ -5,4 +5,19 @@ Of course you'll need a Solr server somewhere to send the data to. The `mbsssss`_ repository contains instructions on how to add the MusicBrainz schemas to a Solr server. +Also check values for the following keys in the file ``config.ini``: + +===================== =========== +Keys Description +===================== =========== +[solr] uri The URI to Solr top-level V1 API (ending with ``/solr``) +[solr] batch_size The number of Solr documents to submit at once +[solr] retries Optional, the number of retries for connecting to Solr + (default is ``3``) +[solr] backoff_factor Optional, the `backoff factor`_ of the waiting + time between two retries for connecting to Solr + (default is ``1``) +===================== =========== + +.. _backoff factor: https://urllib3.readthedocs.io/en/2.4.0/reference/urllib3.util.html#urllib3.util.Retry .. _mbsssss: https://github.com/metabrainz/mbsssss diff --git a/requirements.txt b/requirements.txt index a7d3ecaf..50f4ecfe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,12 @@ -amqp==2.5.2 -backports.functools_lru_cache==1.0.1 -enum34==1.1.6 -git+https://github.com/amCap1712/mbdata.git@v27.0.dev2#egg=mbdata -git+https://github.com/metabrainz/mb-rngpy.git@v-2.20201112.0#egg=mb-rngpy -psycopg2==2.8.4 +amqp==5.1.1 +mbdata==30.0.1 +mb-rngpy==2.20250423.0 +psycopg2-binary==2.9.10 retrying==1.3.3 -pysolr==3.8.1 -sqlalchemy==1.0.19 -requests==2.22.0 +pysolr==3.10.0 +sqlalchemy==2.0.38 +requests==2.32.3 ujson==1.35 -sentry-sdk==1.3.1 -typing==3.10.0.0 +sentry-sdk==2.23.1 +sqlalchemy-utils==0.41.1 +urllib3==2.4.0 diff --git a/requirements_dev.txt b/requirements_dev.txt index c4c3b21b..38aea4ba 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,4 +1,2 @@ -pytest==4.6.9 -pytest-cov==2.8.1 -mock==3.0.5 -pysqlite==2.8.3 +pytest==8.3.5 +pytest-cov==6.0.0 diff --git a/setup.py b/setup.py index f2730ed2..8f5ee602 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python2 -from __future__ import print_function +#!/usr/bin/env python3 + import os, subprocess, re from distutils.core import setup, Command from distutils.command.sdist import sdist as _sdist @@ -32,7 +32,7 @@ def update_version_py(): if p.returncode != 0: print("unable to run git, leaving sir/version.py alone") return - ver = stdout.strip() + ver = str(stdout.strip(), encoding="utf-8") f = open("sir/version.py", "w") f.write(VERSION_PY % ver) f.close() @@ -90,6 +90,7 @@ def run(self): self.distribution.metadata.version = get_version() return _build.run(self) + # Here ends the code taken from Brian Warner setup(name="sir", @@ -102,14 +103,14 @@ def run(self): "sir.trigger_generation", "sir.wscompat"], package_dir={"sir": "sir"}, - download_url=["https://github.com/metabrainz/sir/tarball/master"], - url=["http://github.com/metabrainz/sir"], + download_url="https://github.com/metabrainz/sir/tarball/master", + url="https://github.com/metabrainz/sir", license="MIT", classifiers=["Development Status :: 4 - Beta", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", - "Programming Language :: Python :: 2.7"], + "Programming Language :: Python :: 3.13"], cmdclass={"version": Version, "sdist": sdist, "build": build}, description="Search Index Rabbit", long_description=open("README.rst").read() diff --git a/sir/__main__.py b/sir/__main__.py index 8719abb4..049b689f 100644 --- a/sir/__main__.py +++ b/sir/__main__.py @@ -1,10 +1,9 @@ -from __future__ import absolute_import # Copyright (c) 2014, 2015, 2019 Wieland Hoffmann, MetaBrainz Foundation # License: MIT, see LICENSE for details import argparse import logging import multiprocessing -import ConfigParser +import configparser from . import config, init_sentry_sdk from .amqp.extension_generation import generate_extension @@ -123,7 +122,7 @@ def after_cursor_execute(conn, cursor, statement, config.read_config() try: init_sentry_sdk(config.CFG.get("sentry", "dsn")) - except ConfigParser.Error as e: + except configparser.Error as e: logger.info("Skipping sentry initialization. Configuration issue: %s", e) func = args.func args = vars(args) diff --git a/sir/amqp/handler.py b/sir/amqp/handler.py index a997ac67..7e54929a 100644 --- a/sir/amqp/handler.py +++ b/sir/amqp/handler.py @@ -29,11 +29,10 @@ from socket import error as socket_error from sqlalchemy.orm import class_mapper from sys import exit -from urllib2 import URLError -from ConfigParser import NoOptionError +from urllib.error import URLError +from configparser import NoOptionError from collections import defaultdict -from traceback import format_exc - +from traceback import format_exc, format_exception __all__ = ["callback_wrapper", "watch", "Handler"] @@ -83,9 +82,9 @@ def wrapper(self, msg, *args, **kwargs): try: logger.debug('Performing %s on %s', f.__name__, vars(msg)) return f(self, msg, *args, **kwargs) - except Exception as exc: + except Exception: logger.error('Unable to perform action %s on message %s. Exception encountered: %s', - f.__name__, vars(msg), format_exc(exc)) + f.__name__, vars(msg), format_exc()) return wrapper @@ -127,7 +126,7 @@ def wrapper(self, msg, queue): self.reject_message(msg) self.requeue_message(msg, exc, fail=True) except Exception as exc: - logger.error(exc, extra={"data": {"message": vars(msg)}}) + logger.error(exc, extra={"data": {"message": vars(msg)}}, exc_info=True) self.reject_message(msg) self.requeue_message(msg, exc) else: @@ -216,7 +215,7 @@ def requeue_message(self, msg, exc, fail=False): msg.properties['application_headers'] = {} retries_remaining = msg.application_headers.get("mb-retries", _DEFAULT_MB_RETRIES) routing_key = msg.delivery_info["routing_key"] - msg.application_headers["mb-exception"] = format_exc(exc) + msg.application_headers["mb-exception"] = format_exception(exc) if retries_remaining and not fail: msg.application_headers["mb-retries"] = retries_remaining - 1 self.channel.basic_publish(msg, exchange="search.retry", routing_key=routing_key) @@ -285,20 +284,20 @@ def delete_callback(self, parsed_message): :param sir.amqp.message.Message parsed_message: Message parsed by the `callback_wrapper`. """ - column_name = "gid" + table_name = parsed_message.table_name + + if "gid" in parsed_message.columns: + doc_id = parsed_message.columns["gid"] + elif "id" in parsed_message.columns and table_name in _ID_DELETE_TABLE_NAMES: + doc_id = str(parsed_message.columns["id"]) + else: + raise ValueError("`gid` column missing from delete message") - if "gid" not in parsed_message.columns: - if "id" in parsed_message.columns and parsed_message.table_name in _ID_DELETE_TABLE_NAMES: - column_name = "id" - else: - raise ValueError("`gid` column missing from delete message") - logger.debug("Deleting {entity_type}: {id}".format( - entity_type=parsed_message.table_name, - id=parsed_message.columns[column_name])) + logger.debug(f"Deleting {table_name}: {doc_id}") - core_name = core_map[parsed_message.table_name] + core_name = core_map[table_name] if core_name in self.cores: - self.cores[core_name].delete(parsed_message.columns[column_name]) + self.cores[core_name].delete(doc_id) self._index_by_fk(parsed_message) def process_messages(self): @@ -378,7 +377,7 @@ def _index_by_fk(self, parsed_message): # to update the related entities. For 'one to many' relationships, the related # entity would have had an update trigger firing off to unlink the `index_entity` # before `index_entity` itself is deleted, so we can ignore those. - relevant_rels = dict((r.table.name, (list(r.local_columns)[0].name, list(r.remote_side)[0])) + relevant_rels = dict((r.mapper.persist_selectable.name, (list(r.local_columns)[0].name, list(r.remote_side)[0])) for r in class_mapper(index_model).mapper.relationships if r.direction.name == 'MANYTOONE') for core_name, path in update_map[parsed_message.table_name]: @@ -398,7 +397,7 @@ def _index_by_fk(self, parsed_message): related_model, new_path = second_last_model_in_path(entity.model, path) related_table_name = "" if related_model: - related_table_name = class_mapper(related_model).mapped_table.name + related_table_name = class_mapper(related_model).persist_selectable.name if related_table_name in relevant_rels: with db_session_ctx(self.db_session) as session: select_query = None diff --git a/sir/config.py b/sir/config.py index 70bec95b..4c63b22b 100644 --- a/sir/config.py +++ b/sir/config.py @@ -1,17 +1,16 @@ # Copyright (c) 2014 Wieland Hoffmann # License: MIT, see LICENSE for details -import ConfigParser +from configparser import ConfigParser, ExtendedInterpolation import os.path -#: A :class:`SafeExpandingConfigParser` instance holding the configuration -#: data. -CFG = None # type: SafeExpandingConfigParser +#: A :class:`ConfigParser` instance holding the configuration data. +CFG = None # type: ConfigParser -class SafeExpandingConfigParser(ConfigParser.SafeConfigParser, object): - def _interpolate(self, section, option, rawval, vars): - return os.path.expandvars(super(SafeExpandingConfigParser, - self)._interpolate(section, option, rawval, vars)) +class EnvironmentInterpolation(ExtendedInterpolation): + def before_read(self, parser, section, option, value): + value = super().before_read(parser, section, option, value) + return os.path.expandvars(value) class ConfigError(Exception): @@ -24,7 +23,7 @@ def read_config(): :const:`sir.config.CFG` to a :class:`SafeExpandingConfigParser` instance. """ - config = SafeExpandingConfigParser() + config = ConfigParser(interpolation=EnvironmentInterpolation()) read_files = config.read([os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "config.ini" diff --git a/sir/indexing.py b/sir/indexing.py index 8c7d64bb..889e8896 100644 --- a/sir/indexing.py +++ b/sir/indexing.py @@ -7,11 +7,11 @@ from . import config, querying, util from .schema import SCHEMA -from ConfigParser import NoOptionError +from configparser import NoOptionError from functools import partial from logging import getLogger, DEBUG, INFO -from pysolr import SolrError from sqlalchemy import and_ +from sqlalchemy.orm import Session from .util import SIR_EXIT from ctypes import c_bool @@ -122,11 +122,10 @@ def _multiprocessed_import(entity_names, live=False, entities=None): manager = multiprocessing.Manager() entity_data_queue = manager.Queue() - solr_connection = util.solr_connection(e) process_function = partial(queue_to_solr, entity_data_queue, solr_batch_size, - solr_connection) + e) solr_processes = [] for i in range(max_solr_processes): p = multiprocessing.Process(target=process_function, name="Solr-" + str(i)) @@ -141,7 +140,7 @@ def _multiprocessed_import(entity_names, live=False, entities=None): entity_data_queue)) else: with util.db_session_ctx(db_session) as session: - for bounds in querying.iter_bounds(session, SCHEMA[e].model.id, + for bounds in querying.iter_bounds(session, SCHEMA[e].model, query_batch_size, importlimit): args = (e, bounds, entity_data_queue) index_function_args.append(args) @@ -187,10 +186,13 @@ def _index_entity_process_wrapper(args, live=False): # its workers signal.signal(signal.SIGTERM, signal.SIG_DFL) + config.read_config() + try: + session = Session(util.engine()) if live: - return live_index_entity(*args) - return index_entity(*args) + return live_index_entity(session, *args) + return index_entity(session, *args) except Exception as exc: logger.error("Failed to import %s with id in bounds %s", args[0], @@ -199,12 +201,13 @@ def _index_entity_process_wrapper(args, live=False): raise -def index_entity(entity_name, bounds, data_queue): +def index_entity(session, entity_name, bounds, data_queue): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and put the dicts into ``queue``. + :param sqlalchemy.orm.Session session: :param str entity_name: :param bounds: :type bounds: (int, int) @@ -217,15 +220,16 @@ def index_entity(entity_name, bounds, data_queue): condition = and_(model.id >= lower_bound, model.id < upper_bound) else: condition = model.id >= lower_bound - _query_database(entity_name, condition, data_queue) + _query_database(session, entity_name, condition, data_queue) -def live_index_entity(entity_name, ids, data_queue): +def live_index_entity(session, entity_name, ids, data_queue): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and put the dicts into ``queue``. + :param sqlalchemy.orm.Session session: :param str entity_name: :param [int] ids: :param Queue.Queue data_queue: @@ -234,10 +238,10 @@ def live_index_entity(entity_name, ids, data_queue): return condition = and_(SCHEMA[entity_name].model.id.in_(ids)) logger.debug("Importing %s new rows for entity %s", len(ids), entity_name) - _query_database(entity_name, condition, data_queue) + _query_database(session, entity_name, condition, data_queue) -def _query_database(entity_name, condition, data_queue): +def _query_database(session, entity_name, condition, data_queue): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and @@ -254,7 +258,8 @@ def _query_database(entity_name, condition, data_queue): search_entity = SCHEMA[entity_name] model = search_entity.model row_converter = search_entity.query_result_to_dict - with util.db_session_ctx(util.db_session()) as session: + + with session: query = search_entity.query.filter(condition).with_session(session) total_records = 0 for row in query: @@ -280,20 +285,23 @@ def _query_database(entity_name, condition, data_queue): logger.debug("Retrieved %s records in %s", total_records, model) -def queue_to_solr(queue, batch_size, solr_connection): +def queue_to_solr(queue, batch_size, entity_name): """ Read :class:`dict` objects from ``queue`` and send them to the Solr server behind ``solr_connection`` in batches of ``batch_size``. :param multiprocessing.Queue queue: :param int batch_size: - :param solr.Solr solr_connection: + :param str entity_name: """ # Restoring the default SIGTERM handler so the Solr process can actually # be terminated on calling terminate. signal.signal(signal.SIGTERM, signal.SIG_DFL) + config.read_config() + solr_connection = util.solr_connection(entity_name) + data = [] count = 0 while True: @@ -324,12 +332,13 @@ def send_data_to_solr(solr_connection, data): :param [dict] data: :raises: :class:`solr:solr.SolrException` """ - with sentry_sdk.push_scope() as scope: + with sentry_sdk.new_scope() as scope: scope.set_extra("data", data) try: solr_connection.add(data) logger.debug("Done sending data to Solr") - except SolrError as e: + except Exception as e: + logger.error("Error while submitting data to Solr:", exc_info=True) sentry_sdk.capture_exception(e) FAILED.value = True else: diff --git a/sir/querying.py b/sir/querying.py index 52c4dafd..5c8e35f5 100644 --- a/sir/querying.py +++ b/sir/querying.py @@ -3,11 +3,12 @@ import logging -from sqlalchemy import func +from sqlalchemy import func, select from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.orm.interfaces import ONETOMANY, MANYTOONE from sqlalchemy.orm.properties import RelationshipProperty + logger = logging.getLogger("sir") @@ -29,6 +30,10 @@ def iterate_path_values(path, obj): returned by the :func:`getattr` call will be returned and added to the list of values for this field. + .. warning:: + + Hybrid attributes like @hybrid_property are currently not supported. + To give an example, lets presume the object we're starting with is an instance of :class:`~mbdata.models.Artist` and the path is "begin_area.name". The first :func:`getattr` call will be:: @@ -83,7 +88,7 @@ def iterate_path_values(path, obj): yield getattr(obj, pathelem) -def iter_bounds(db_session, column, batch_size, importlimit): +def iter_bounds(db_session, model, batch_size, importlimit): """ Return a list of (lower bound, upper bound) tuples which contain row ids to iterate through a table in batches of ``batch_size``. If ``importlimit`` is @@ -93,26 +98,22 @@ def iter_bounds(db_session, column, batch_size, importlimit): ``batch_size`` rows. :param sqlalchemy.orm.session.Session db_session: - :param sqlalchemy.Column column: + :param model: A :ref:`declarative ` class. :param int batch_size: :param int importlimit: :rtype: [(int, int)] """ - q = db_session.query( - column, - func.row_number(). - over(order_by=column). - label('rownum') - ).\ - from_self(column) - + subq = select( + model.id, + func.row_number().over(order_by=model.id).label('rownum') + ).subquery() + q = select(subq.c.id) if batch_size > 1: - q = q.filter("rownum %% %d=1" % batch_size) - + q = q.filter(subq.c.rownum % batch_size == 1) if importlimit: - q = q.filter("rownum <= %d" % (importlimit)) + q = q.filter(subq.c.rownum <= importlimit) - intervals = [id for id in q] + intervals = list(db_session.execute(q).all()) bounds = [] while intervals: diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 6b6b02b1..e432a2f8 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -86,12 +86,14 @@ "aliases.sort_name", "aliases.type.gid", "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", - "area_links.area0.name", - "area_links.area0.gid", - "area_links.area0.begin_date", - "area_links.area0.end_date", - "area_links.area0.type.id", - "area_links.area0.type.gid", + "area_links.entity0.name", + "area_links.entity0.gid", + "area_links.entity0.begin_date", + "area_links.entity0.end_date", + "area_links.entity0.ended", + "area_links.entity0.type.id", + "area_links.entity0.type.gid", + "area_links.entity0.type.name", "area_links.link.link_type.name", "area_links.link.link_type.gid", "area_links.link.attributes.attribute_type.name", @@ -111,7 +113,6 @@ F("begin", "begin_date", transformfunc=tfs.index_partialdate_to_string), F("end", "end_date", transformfunc=tfs.index_partialdate_to_string), F("ended", "ended", transformfunc=tfs.ended_to_string), - F("area", ["area.name", "area.aliases.name"]), F("beginarea", ["begin_area.name", "begin_area.aliases.name"]), F("country", "area.iso_3166_1_codes.code"), @@ -133,7 +134,13 @@ "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", "begin_area.gid", "area.gid", "end_area.gid", - "gender.gid", + "area.begin_date", "area.end_date", "area.ended", + "begin_area.begin_date", "begin_area.end_date", + "begin_area.ended", "end_area.begin_date", + "end_area.end_date", "end_area.ended", + "gender.gid", "area.type.gid", "area.type.name", + "begin_area.type.gid", "begin_area.type.name", + "end_area.type.gid", "end_area.type.name", "type.gid"] ) @@ -144,7 +151,7 @@ F("artist", "artist"), F("comment", "comment"), F("barcode", "barcode"), - F("added", "added"), + F("added", "added", transformfunc=tfs.datetime_to_timestamp), F("tracks", "discids.track_count"), F("discid", "discids.discid") ], @@ -166,7 +173,7 @@ F("alias", "aliases.name"), F("aid", "area_links.entity0.gid"), F("area", "area_links.entity0.name"), - F("arid", "artist_links.entity0.gid"), + F("arid", "artist_links.entity0.gid", transformfunc=tfs.uuid_set_to_str_set), F("artist", "artist_links.entity0.name"), F("pid", "place_links.entity1.gid"), F("place", "place_links.entity1.name"), @@ -188,21 +195,22 @@ "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", - "area_links.area.name", - "area_links.area.gid", + "area_links.entity0.name", + "area_links.entity0.gid", "area_links.link.link_type.name", "area_links.link.link_type.gid", "area_links.link.attributes.attribute_type.name", "area_links.link.attributes.attribute_type.gid", - "artist_links.artist.gid", - "artist_links.artist.name", - "artist_links.artist.comment", + "artist_links.entity0.gid", + "artist_links.entity0.name", + "artist_links.entity0.comment", + "artist_links.entity0.sort_name", "artist_links.link.link_type.name", "artist_links.link.link_type.gid", "artist_links.link.attributes.attribute_type.name", "artist_links.link.attributes.attribute_type.gid", - "place_links.place.gid", - "place_links.place.name", + "place_links.entity1.gid", + "place_links.entity1.name", "place_links.link.link_type.name", "place_links.link.link_type.gid", "place_links.link.attributes.attribute_type.name", @@ -255,6 +263,7 @@ "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", "area.gid", "area.type.name", "area.type.gid", + "area.begin_date", "area.end_date", "area.ended", "tags.count", "type.gid" ] ) @@ -280,13 +289,15 @@ "aliases.type.gid", "aliases.sort_name", "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", - "area.gid", "type.gid"] + "area.gid", "area.type.gid", "area.type.name", + "area.begin_date", "area.end_date", "area.ended", + "type.gid"] ) SearchRecording = E(modelext.CustomRecording, [ F("alias", "aliases.name"), - F("arid", "artist_credit.artists.artist.gid"), + F("arid", "artist_credit.artists.artist.gid", transformfunc=tfs.uuid_set_to_str_set), F("artist", "artist_credit.name"), F("artistname", "artist_credit.artists.artist.name"), F("comment", "comment"), @@ -337,10 +348,12 @@ "artist_credit.artists.artist.sort_name", "artist_credit.artists.join_phrase", "artist_credit.artists.name", + "artist_credit.gid", "artist_credit.name", "tags.count", "tags.tag.name", "tracks.length", + "tracks.medium.gid", "tracks.medium.cdtocs.id", "tracks.medium.release.artist_credit.artists.artist.comment", "tracks.medium.release.artist_credit.artists.artist.gid", @@ -348,6 +361,7 @@ "tracks.medium.release.artist_credit.artists.artist.sort_name", "tracks.medium.release.artist_credit.artists.join_phrase", "tracks.medium.release.artist_credit.artists.name", + "tracks.medium.release.artist_credit.gid", "tracks.medium.release.artist_credit.name", "tracks.medium.release.comment", "tracks.medium.release.country_dates.country.area.gid", @@ -370,7 +384,7 @@ F("mbid", "gid"), F("release", "name"), F("alias", "aliases.name"), - F("arid", "artist_credit.artists.artist.gid"), + F("arid", "artist_credit.artists.artist.gid", transformfunc=tfs.uuid_set_to_str_set), F("artist", "artist_credit.name"), F("artistname", "artist_credit.artists.artist.name"), F("asin", "asin.amazon_asin"), @@ -386,6 +400,7 @@ F("label", "labels.label.name"), F("lang", "language.iso_code_3"), F("mediums", "medium_count", transformfunc=tfs.integer_sum, trigger=False), + F("mediumid", "mediums.gid"), F("packaging", "packaging.name"), F("primarytype", "release_group.type.name"), F("quality", "quality"), @@ -412,6 +427,8 @@ "artist_credit.artists.artist.aliases.type.gid", "artist_credit.artists.artist.gid", "artist_credit.artists.artist.sort_name", + "artist_credit.artists.artist.comment", + "artist_credit.gid", "country_dates.country.area.gid", "country_dates.country.area.name", "country_dates.country.area.iso_3166_1_codes.code", @@ -424,6 +441,7 @@ "release_group.type.gid", "release_group.secondary_types.secondary_type.gid", "status.gid", + "packaging.gid", "language.iso_code_3", "tags.count"] ) @@ -433,7 +451,7 @@ F("mbid", "gid"), F("releasegroup", "name"), F("alias", "aliases.name"), - F("arid", "artist_credit.artists.artist.gid"), + F("arid", "artist_credit.artists.artist.gid", transformfunc=tfs.uuid_set_to_str_set), F("artist", "artist_credit.name"), F("artistname", "artist_credit.artists.artist.name"), F("creditname", "artist_credit.artists.name"), @@ -458,10 +476,12 @@ "artist_credit.artists.artist.aliases.primary_for_locale", "artist_credit.artists.artist.aliases.sort_name", "artist_credit.artists.artist.aliases.type.id", + "artist_credit.artists.artist.aliases.type.gid", "artist_credit.artists.artist.aliases.type.name", "artist_credit.artists.artist.gid", "artist_credit.artists.artist.sort_name", "artist_credit.artists.artist.comment", + "artist_credit.gid", "tags.count", "type.gid", "releases.status.gid", "secondary_types.secondary_type.gid" @@ -501,25 +521,25 @@ F("url", "url"), F("relationtype", ["artist_links.link.link_type.name", "release_links.link.link_type.name"]), - F("targetid", ["artist_links.artist.gid", - "release_links.release.gid"]), + F("targetid", ["artist_links.entity0.gid", + "release_links.entity0.gid"]), F("targettype", ["artist_links.__tablename__", "release_links.__tablename__"], transformfunc=tfs.url_type), ], 1.5, convert.convert_url, - extrapaths=["artist_links.artist.gid", - "artist_links.artist.name", - "artist_links.artist.comment", - "artist_links.artist.sort_name", + extrapaths=["artist_links.entity0.gid", + "artist_links.entity0.name", + "artist_links.entity0.comment", + "artist_links.entity0.sort_name", "artist_links.link.link_type.name", "artist_links.link.link_type.gid", "artist_links.link.attributes.attribute_type.name", "artist_links.link.attributes.attribute_type.gid", - "release_links.release.gid", - "release_links.release.name", - "release_links.release.comment", + "release_links.entity0.gid", + "release_links.entity0.name", + "release_links.entity0.comment", "release_links.link.link_type.name", "release_links.link.link_type.gid", "release_links.link.attributes.attribute_type.name", @@ -532,14 +552,14 @@ F("mbid", "gid"), F("work", "name"), F("alias", "aliases.name"), - F("arid", "artist_links.artist.gid"), - F("artist", "artist_links.artist.name"), + F("arid", "artist_links.entity0.gid", transformfunc=tfs.uuid_set_to_str_set), + F("artist", "artist_links.entity0.name"), F("comment", "comment"), F("iswc", "iswcs.iswc"), F("lang", "languages.language.iso_code_3"), - F("recording", "recording_links.recording.name"), - F("recording_count", "recording_count", transformfunc=tfs.integer_sum, trigger=False), - F("rid", "recording_links.recording.gid"), + F("recording", "recording_links.entity0.name"), + F("recording_count", "recording_links.entity0.gid", transformfunc=tfs.integer_count_all, trigger=False), + F("rid", "recording_links.entity0.gid"), F("tag", "tags.tag.name"), F("type", "type.name") ], @@ -550,6 +570,8 @@ "aliases.sort_name", "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", + "artist_links.entity0.sort_name", + "artist_links.entity0.comment", "artist_links.link.link_type.name", "artist_links.link.link_type.gid", "artist_links.link.attributes.attribute_type.name", @@ -558,7 +580,7 @@ "recording_links.link.link_type.gid", "recording_links.link.attributes.attribute_type.name", "recording_links.link.attributes.attribute_type.gid", - "recording_links.recording.video", + "recording_links.entity0.video", "tags.count", "type.gid"] ) @@ -611,16 +633,16 @@ def generate_update_map(): for core_name, entity in SCHEMA.items(): # Entity itself: # TODO(roman): See if the line below is necessary, if there is a better way to implement this. - mapped_table = class_mapper(entity.model).mapped_table.name - core_map[mapped_table] = core_name - paths[mapped_table].add((core_name, None)) - models[mapped_table] = entity.model + table_name = class_mapper(entity.model).persist_selectable.name + core_map[table_name] = core_name + paths[table_name].add((core_name, None)) + models[table_name] = entity.model # Related tables: for path in unique_split_paths([path for field in entity.fields for path in field.paths if field.trigger] + [path for path in entity.extrapaths or []]): model = last_model_in_path(entity.model, path) if model is not None: - name = class_mapper(model).mapped_table.name + name = class_mapper(model).persist_selectable.name paths[name].add((core_name, path)) if name not in models: models[name] = model diff --git a/sir/schema/modelext.py b/sir/schema/modelext.py index 69a91d53..416b627b 100644 --- a/sir/schema/modelext.py +++ b/sir/schema/modelext.py @@ -5,41 +5,55 @@ that are used in SIR. """ from mbdata.models import (Annotation, Area, Artist, ArtistAlias, Event, - Instrument, Label, LinkAttribute, LinkAttributeType, + Instrument, Label, LinkAttribute, LinkRecordingWork, Medium, MediumCDTOC, Place, Recording, Release, ReleaseGroup, ReleaseLabel, ReleaseRaw, ReleaseTag, Series, Work, URL) -from sqlalchemy import exc as sa_exc, func, select +from sqlalchemy import func, select from sqlalchemy.orm import relationship, column_property from sqlalchemy.sql.expression import and_ -from warnings import simplefilter - -# Ignore SQLAlchemy's warnings that we're overriding some attributes -simplefilter(action="ignore", category=sa_exc.SAWarning) class CustomAnnotation(Annotation): - areas = relationship("AreaAnnotation") - artists = relationship("ArtistAnnotation") - events = relationship("EventAnnotation") - instruments = relationship("InstrumentAnnotation") - labels = relationship("LabelAnnotation") - places = relationship("PlaceAnnotation") - recordings = relationship("RecordingAnnotation") - releases = relationship("ReleaseAnnotation") - release_groups = relationship("ReleaseGroupAnnotation") - series = relationship("SeriesAnnotation") - works = relationship("WorkAnnotation") + areas = relationship("AreaAnnotation", viewonly=True) + artists = relationship("ArtistAnnotation", viewonly=True) + events = relationship("EventAnnotation", viewonly=True) + instruments = relationship("InstrumentAnnotation", viewonly=True) + labels = relationship("LabelAnnotation", viewonly=True) + places = relationship("PlaceAnnotation", viewonly=True) + recordings = relationship("RecordingAnnotation", viewonly=True) + releases = relationship("ReleaseAnnotation", viewonly=True) + release_groups = relationship("ReleaseGroupAnnotation", viewonly=True) + series = relationship("SeriesAnnotation", viewonly=True) + works = relationship("WorkAnnotation", viewonly=True) class CustomArea(Area): - aliases = relationship("AreaAlias") - area_links = relationship("LinkAreaArea", - primaryjoin="Area.id == LinkAreaArea.entity1_id") - tags = relationship("AreaTag") - place_count = column_property(select([func.count(Place.id)]).where(Place.area_id == Area.id)) - label_count = column_property(select([func.count(Label.id)]).where(Label.area_id == Area.id)) - artist_count = column_property(select([func.count(Artist.id)]).where(Artist.area_id == Area.id)) + aliases = relationship("AreaAlias", viewonly=True) + area_links = relationship( + "LinkAreaArea", + primaryjoin="Area.id == LinkAreaArea.entity1_id", + viewonly=True + ) + tags = relationship("AreaTag", viewonly=True) + place_count = column_property( + select(func.count(Place.id)). + where(Place.area_id == Area.id). + correlate_except(Place). + scalar_subquery() + ) + label_count = column_property( + select(func.count(Label.id)). + where(Label.area_id == Area.id). + correlate_except(Label). + scalar_subquery() + ) + artist_count = column_property( + select(func.count(Artist.id)). + where(Artist.area_id == Area.id). + correlate_except(Artist). + scalar_subquery() + ) class CustomArtist(Artist): @@ -47,99 +61,119 @@ class CustomArtist(Artist): begin_area = relationship('CustomArea', foreign_keys=[Artist.begin_area_id]) end_area = relationship('CustomArea', foreign_keys=[Artist.end_area_id]) - tags = relationship('ArtistTag') - artist_credit_names = relationship("ArtistCreditName", innerjoin=True) - primary_aliases = column_property(select( - [func.array_agg(ArtistAlias.name)]).where( - and_(ArtistAlias.artist_id == Artist.id, - ArtistAlias.primary_for_locale == True))) + tags = relationship('ArtistTag', viewonly=True) + artist_credit_names = relationship("ArtistCreditName", innerjoin=True, + viewonly=True) + primary_aliases = column_property( + select(func.array_agg(ArtistAlias.name)). + where(and_( + ArtistAlias.artist_id == Artist.id, + ArtistAlias.primary_for_locale == True + )). + correlate_except(ArtistAlias). + scalar_subquery() + ) class CustomArtistAlias(ArtistAlias): artist = relationship('Artist', foreign_keys=[ArtistAlias.artist_id], - innerjoin=True, backref="aliases") + innerjoin=True, backref="aliases", viewonly=True) class CustomEvent(Event): # still need to allow searching with place/area/artist aliases - aliases = relationship("EventAlias") - place_links = relationship("LinkEventPlace") - area_links = relationship("LinkAreaEvent") - artist_links = relationship("LinkArtistEvent") - tags = relationship("EventTag") + aliases = relationship("EventAlias", viewonly=True) + place_links = relationship("LinkEventPlace", viewonly=True) + area_links = relationship("LinkAreaEvent", viewonly=True) + artist_links = relationship("LinkArtistEvent", viewonly=True) + tags = relationship("EventTag", viewonly=True) class CustomInstrument(Instrument): - aliases = relationship("InstrumentAlias") - tags = relationship("InstrumentTag") + aliases = relationship("InstrumentAlias", viewonly=True) + tags = relationship("InstrumentTag", viewonly=True) class CustomLabel(Label): - aliases = relationship("LabelAlias") + aliases = relationship("LabelAlias", viewonly=True) area = relationship("CustomArea", foreign_keys=[Label.area_id]) - tags = relationship("LabelTag") - release_count = column_property(select([func.count(ReleaseLabel.id)]).where(ReleaseLabel.label_id == Label.id)) + tags = relationship("LabelTag", viewonly=True) + release_count = column_property( + select(func.count(ReleaseLabel.id)). + where(ReleaseLabel.label_id == Label.id). + correlate_except(ReleaseLabel). + scalar_subquery() + ) class CustomMediumCDToc(MediumCDTOC): medium = relationship('Medium', foreign_keys=[MediumCDTOC.medium_id], - innerjoin=True, backref="cdtocs") + innerjoin=True, backref="cdtocs", viewonly=True) class CustomPlace(Place): area = relationship("CustomArea", foreign_keys=[Place.area_id]) - aliases = relationship("PlaceAlias") + aliases = relationship("PlaceAlias", viewonly=True) class CustomRecording(Recording): - aliases = relationship("RecordingAlias") - first_release_date = relationship("RecordingFirstReleaseDate") - tags = relationship("RecordingTag") + aliases = relationship("RecordingAlias", viewonly=True) + first_release_date = relationship("RecordingFirstReleaseDate", viewonly=True) + tags = relationship("RecordingTag", viewonly=True) class CustomReleaseGroup(ReleaseGroup): - aliases = relationship("ReleaseGroupAlias") - first_release_date = relationship("ReleaseGroupMeta") - releases = relationship("Release") - tags = relationship("ReleaseGroupTag") - release_count = column_property(select([func.count(Release.id)]).where(Release.release_group_id == ReleaseGroup.id)) + aliases = relationship("ReleaseGroupAlias", viewonly=True) + first_release_date = relationship("ReleaseGroupMeta", viewonly=True) + releases = relationship("Release", viewonly=True) + tags = relationship("ReleaseGroupTag", viewonly=True) + release_count = column_property( + select(func.count(Release.id)). + where(Release.release_group_id == ReleaseGroup.id). + correlate_except(Release). + scalar_subquery() + ) class CustomRelease(Release): - aliases = relationship("ReleaseAlias") - asin = relationship("ReleaseMeta") - medium_count = column_property(select([func.count(Medium.id)]).where(Medium.release_id == Release.id)) + aliases = relationship("ReleaseAlias", viewonly=True) + asin = relationship("ReleaseMeta", viewonly=True) + medium_count = column_property( + select(func.count(Medium.id)). + where(Medium.release_id == Release.id). + correlate_except(Medium). + scalar_subquery() + ) class CustomReleaseRaw(ReleaseRaw): - discids = relationship("CDTOCRaw") + discids = relationship("CDTOCRaw", viewonly=True) class CustomReleaseTag(ReleaseTag): release = relationship('Release', foreign_keys=[ReleaseTag.release_id], - innerjoin=True, backref="tags") + innerjoin=True, backref="tags", viewonly=True) class CustomSeries(Series): - aliases = relationship("SeriesAlias") - tags = relationship("SeriesTag") + aliases = relationship("SeriesAlias", viewonly=True) + tags = relationship("SeriesTag", viewonly=True) class CustomWork(Work): - aliases = relationship("WorkAlias") - artist_links = relationship("LinkArtistWork") - tags = relationship("WorkTag") - languages = relationship("WorkLanguage") - recording_links = relationship("LinkRecordingWork") - recording_count = column_property(select([func.count(LinkRecordingWork.id)]).where(LinkRecordingWork.work_id == Work.id)) + aliases = relationship("WorkAlias", viewonly=True) + artist_links = relationship("LinkArtistWork", viewonly=True) + tags = relationship("WorkTag", viewonly=True) + languages = relationship("WorkLanguage", viewonly=True) + recording_links = relationship("LinkRecordingWork", viewonly=True) class CustomURL(URL): - artist_links = relationship("LinkArtistURL") - release_links = relationship("LinkReleaseURL") + artist_links = relationship("LinkArtistURL", viewonly=True) + release_links = relationship("LinkReleaseURL", viewonly=True) class CustomLinkAttribute(LinkAttribute): link = relationship('Link', foreign_keys=[LinkAttribute.link_id], innerjoin=True, - backref="attributes") + backref="attributes", viewonly=True) diff --git a/sir/schema/queryext.py b/sir/schema/queryext.py index db6b8471..eedeae1e 100644 --- a/sir/schema/queryext.py +++ b/sir/schema/queryext.py @@ -25,7 +25,8 @@ def filter_valid_annotations(query): - # TODO: Document this. What's going on in this filter? + # Skip all annotations for an entity except the last + # since all others are no longer current queries = [Query(func.max(getattr(m, "annotation_id"))). group_by( getattr(m, diff --git a/sir/schema/searchentities.py b/sir/schema/searchentities.py index 6b97d5b9..47d7d1ae 100644 --- a/sir/schema/searchentities.py +++ b/sir/schema/searchentities.py @@ -1,5 +1,7 @@ # Copyright (c) 2014, 2015 Lukas Lalinsky, Wieland Hoffmann # License: MIT, see LICENSE for details +from uuid import UUID + from sir import config from sir.querying import iterate_path_values from collections import defaultdict @@ -9,11 +11,11 @@ from xml.etree.cElementTree import tostring except ImportError: from xml.etree.ElementTree import tostring -from sqlalchemy.orm import class_mapper, Load +from sqlalchemy.orm import class_mapper, Load, raiseload, defer from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.orm.descriptor_props import CompositeProperty from sqlalchemy.orm.interfaces import ONETOMANY, MANYTOONE -from sqlalchemy.orm.properties import RelationshipProperty +from sqlalchemy.orm.properties import RelationshipProperty, ColumnProperty from sqlalchemy.orm.query import Query @@ -67,17 +69,29 @@ def merge_paths(field_paths): def defer_everything_but(mapper, load, *columns): primary_keys = [c.name for c in mapper.primary_key] + columns_to_keep = set(columns) + defers = [] for prop in mapper.iterate_properties: - if hasattr(prop, "columns"): - key = prop.key - if (key not in columns and key[:-3] not in columns and - key[-3:] != "_id" and key != "position" and - key not in primary_keys): - # We need the _id columns for subqueries and joins - # Position is needed because sqla automatically orders by - # artist_credit_name.position - logger.debug("Deferring %s on %s", key, mapper) - load.defer(key) + if isinstance(prop, CompositeProperty): + continue + key = prop.key + if ( + key not in columns_to_keep + and key[:-3] not in columns_to_keep + and key[-3:] != "_id" + and key != "position" and + key not in primary_keys + ): + # We need the _id columns for subqueries and joins + # Position is needed because sqla automatically orders by + # artist_credit_name.position + logger.debug("Deferring %s on %s", key, mapper) + if isinstance(prop, ColumnProperty): + defers.append(defer(prop, raiseload=True)) + else: + defers.append(raiseload(prop)) + if defers: + load = load.options(*defers) return load @@ -193,12 +207,12 @@ def build_entity_query(self): if isinstance(prop, RelationshipProperty): pk = column.mapper.primary_key[0].name if prop.direction == ONETOMANY: - load = load.subqueryload(pathelem) + load = load.subqueryload(column) elif prop.direction == MANYTOONE: - load = load.joinedload(pathelem) + load = load.joinedload(column) else: - load = load.defaultload(pathelem) - required_columns = current_merged_path.keys() + load = load.defaultload(column) + required_columns = list(current_merged_path.keys()) required_columns.append(pk) # Get the mapper class of the current element of the @@ -208,20 +222,19 @@ def build_entity_query(self): # For composite properties, load the columns they # consist of because eagerly loading a composite # property doesn't load automatically load them. - composite_columns = filter( + composite_columns = list(filter( partial(is_composite_column, model), - required_columns) + required_columns)) for composite_column in composite_columns: - composite_parts = (c.name for c in - getattr(model, - composite_column). - property.columns) + composite_parts = getattr(model, composite_column)\ + .property.columns logger.debug("Loading %s instead of %s on %s", composite_parts, composite_column, model) required_columns.remove(composite_column) - required_columns.extend(composite_parts) + for column in composite_parts: + required_columns.append(column.name) logger.debug("Loading only %s on %s", required_columns, @@ -255,13 +268,23 @@ def query_result_to_dict(self, obj): tempvals.add(value) if field.transformfunc is not None: tempvals = field.transformfunc(tempvals) - if isinstance(tempvals, set) and len(tempvals) == 1: + if (isinstance(tempvals, set) or isinstance(tempvals, list)) and len(tempvals) == 1: tempvals = tempvals.pop() if tempvals is not None and tempvals: - data[fieldname] = tempvals + if isinstance(tempvals, UUID): + new_tempvals = str(tempvals) + elif isinstance(tempvals, set) or isinstance(tempvals, list): + new_tempvals = list() + for tempval in tempvals: + if isinstance(tempval, UUID): + tempval = str(tempval) + new_tempvals.append(tempval) + else: + new_tempvals = tempvals + data[fieldname] = new_tempvals if (config.CFG.getboolean("sir", "wscompat") and self.compatconverter is not None): - data["_store"] = tostring(self.compatconverter(obj).to_etree()) + data["_store"] = str(tostring(self.compatconverter(obj).to_etree(), encoding='us-ascii'), encoding='us-ascii') return data diff --git a/sir/schema/transformfuncs.py b/sir/schema/transformfuncs.py index 2e9ee6e9..e74bd89f 100644 --- a/sir/schema/transformfuncs.py +++ b/sir/schema/transformfuncs.py @@ -1,5 +1,7 @@ # Copyright (c) 2014, 2015 Wieland Hoffmann # License: MIT, see LICENSE for details +from datetime import datetime + from sir.wscompat.convert import partialdate_to_string @@ -43,6 +45,10 @@ def fill_none(values): return values +def integer_count_all(records): + return int(len(records)) + + def integer_sum(values): return int(sum(values)) @@ -101,3 +107,12 @@ def boolean(values): def url_type(values): types = set(URL_LINK_TABLE_TO_ENTITYTYPE[value] for value in values) return types + + +def uuid_set_to_str_set(values): + return {str(x) for x in values} + + +def datetime_to_timestamp(values): + if values: + return int(values.pop().timestamp()) diff --git a/sir/sir.py b/sir/sir.py index 7d3d3e1b..ec1fe35b 100644 --- a/sir/sir.py +++ b/sir/sir.py @@ -1,2 +1,2 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # coding: utf-8 diff --git a/sir/trigger_generation/__init__.py b/sir/trigger_generation/__init__.py index 209c9da8..ab428c41 100644 --- a/sir/trigger_generation/__init__.py +++ b/sir/trigger_generation/__init__.py @@ -18,7 +18,7 @@ def generate_func(args): trigger_filename=args["trigger_file"], function_filename=args["function_file"], broker_id=args["broker_id"], - entities = args["entity_type"] or SCHEMA.keys() + entities=args["entity_type"] or SCHEMA.keys() ) @@ -76,7 +76,7 @@ def get_trigger_tables(entities): for entity in [SCHEMA[name] for name in entities]: # Entity table itself mapped_class = class_mapper(entity.model) - tables[mapped_class.mapped_table.name] = { + tables[mapped_class.persist_selectable.name] = { "model": entity.model, "is_direct": True, "has_gid": mapped_class.has_property('gid'), @@ -87,7 +87,7 @@ def get_trigger_tables(entities): for path in field.paths if field.trigger]): model = last_model_in_path(entity.model, path) if model is not None: - table_name = class_mapper(model).mapped_table.name + table_name = class_mapper(model).persist_selectable.name if table_name not in tables: tables[table_name] = { "model": model, @@ -106,7 +106,7 @@ def write_triggers(trigger_file, function_file, model, is_direct, has_gid, **gen """ # Mapper defines correlation of model class attributes to database table columns mapper = class_mapper(model) - table_name = mapper.mapped_table.name + table_name = mapper.persist_selectable.name fk_columns = [list(r.local_columns)[0].name for r in mapper.relationships if r.direction.name == 'MANYTOONE'] if is_direct: @@ -160,7 +160,7 @@ def write_header(f): :param file f: File to write the header into. """ f.write("-- Automatically generated, do not edit!\n") - f.write("\set ON_ERROR_STOP 1\n") + f.write("\\set ON_ERROR_STOP 1\n") f.write("BEGIN;\n\n") diff --git a/sir/trigger_generation/paths.py b/sir/trigger_generation/paths.py index 832625da..dacea08d 100644 --- a/sir/trigger_generation/paths.py +++ b/sir/trigger_generation/paths.py @@ -1,10 +1,11 @@ # Copyright (c) 2015, 2017 Wieland Hoffmann, MetaBrainz Foundation # License: MIT, see LICENSE for details +import mbdata from sqlalchemy.orm import class_mapper, aliased from sqlalchemy.orm.query import Query -from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.orm.properties import ColumnProperty, RelationshipProperty from sqlalchemy.orm.descriptor_props import CompositeProperty +from sqlalchemy_utils import get_mapper def generate_query(model, path, filters=None): @@ -16,13 +17,11 @@ def generate_query(model, path, filters=None): :param [sqlalchemy.sql.expression.BinaryExpression] filters: :rtype: A :ref:`sqlalchemy.orm.query.Query` object """ - - # We start with the query selecting the ids of the models we want to return. - query = Query(model.id) if path: + curr_model = aliased(model) # In case path is not blank, we need to alias the model id while joining # to prevent referencing the same table again. - query = (Query(aliased(model).id)) + query = Query(curr_model.id) # The below is a fix in case the same table is joined # multiple times. In that case, we alias everything except # the last path and then filter on the last path. @@ -30,10 +29,20 @@ def generate_query(model, path, filters=None): last_path = path_list[-1] path_list = path_list[:-1] if path_list: - query = query.join(*path_list, aliased=True) - # The last path is purposfully left out from being aliased to make it easier - # to contrunct filter conditions. - query = query.join(last_path, from_joinpoint=True) + for elem in path_list: + curr_elem = getattr(curr_model, elem) + curr_alias = aliased(curr_elem.mapper.class_) + query = query.join(curr_elem.of_type(curr_alias)) + curr_model = get_mapper(curr_alias).class_ + + # The last path is purposefully left out from being aliased to make it + # easier to construct filter conditions. + last_elem = getattr(curr_model, last_path) + query = query.join(last_elem) + else: + # We start with the query selecting the ids of the models we want to + # return. + query = Query(model.id) if filters is not None: if isinstance(filters, list): query = query.filter(*filters) @@ -77,7 +86,7 @@ def unique_split_paths(paths): for path in paths: splits = path.split(".") split_length = len(splits) - for i in xrange(1, split_length + 1): + for i in range(1, split_length + 1): join = ".".join(splits[:i]) if join not in seen_paths: seen_paths.add(join) @@ -99,7 +108,7 @@ def last_model_in_path(model, path): # If this is not a column managed by SQLAlchemy, ignore it # TODO(roman): Document when this might happen - if not isinstance(column, InstrumentedAttribute): + if isinstance(column, (str, mbdata.models.Base)): # Let's assume some other path also covers this table return None @@ -134,10 +143,10 @@ def second_last_model_in_path(model, path): :param str path: The path itself. """ if path is None: - return (None, None) + return None, None current_model = model new_path = ".".join(path.split(".")[:-1]) if new_path == "": - return (current_model, "") + return current_model, "" else: - return (last_model_in_path(model, new_path), new_path) + return last_model_in_path(model, new_path), new_path diff --git a/sir/util.py b/sir/util.py index f2f6b40d..957c4a33 100644 --- a/sir/util.py +++ b/sir/util.py @@ -1,11 +1,15 @@ # Copyright (c) 2014 Wieland Hoffmann # License: MIT, see LICENSE for details -from __future__ import absolute_import + import amqp import logging import pysolr -import urllib2 +import urllib.request, urllib.error, urllib.parse + +import requests +from requests.adapters import HTTPAdapter +from urllib3 import Retry from . import config from .schema import SCHEMA @@ -36,20 +40,30 @@ def __str__(self): self.actual) -def db_session(): +def engine(): """ - Creates a new :class:`sqla:sqlalchemy.orm.session.sessionmaker`. + Create a new :class:`sqla:sqlalchemy.engine.Engine`. - :rtype: :class:`sqla:sqlalchemy.orm.session.sessionmaker` + :rtype: :class:`sqla:sqlalchemy.engine.Engine` """ cget = partial(config.CFG.get, "database") cdict = {"username": cget("user")} for key in ["password", "host", "port"]: cdict[key] = cget(key) cdict["database"] = cget("dbname") - e = create_engine(URL("postgresql", **cdict), server_side_cursors=False) - S = sessionmaker(bind=e) - return S + return create_engine( + URL.create("postgresql", **cdict), + server_side_cursors=False + ) + + +def db_session(): + """ + Creates a new :class:`sqla:sqlalchemy.orm.session.sessionmaker`. + + :rtype: :class:`sqla:sqlalchemy.orm.session.sessionmaker` + """ + return sessionmaker(bind=engine()) @contextmanager @@ -70,6 +84,21 @@ def db_session_ctx(Session): session.close() +def get_requests_session(): + """ Configure a requests session for enforcing common retry strategy. """ + retry_strategy = Retry( + total=config.CFG.getint("solr", "retries", fallback=3), + status_forcelist=[429, 500, 502, 503, 504], + allowed_methods=["HEAD", "GET", "OPTIONS"], + backoff_factor=config.CFG.getint("solr", "backoff_factor", fallback=1), + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + http = requests.Session() + http.mount("https://", adapter) + http.mount("http://", adapter) + return http + + def solr_connection(core): """ Creates a :class:`solr:solr.Solr` connection for the core ``core``. @@ -83,12 +112,15 @@ def solr_connection(core): core_uri = solr_uri + "/" + core ping_uri = core_uri + "/admin/ping" + session = requests.Session() + logger.debug("Setting up a connection to %s", solr_uri) logger.debug("Pinging %s", ping_uri) - urllib2.urlopen(ping_uri) + response = session.get(ping_uri) + response.raise_for_status() logger.debug("Connection to the Solr core at %s", core_uri) - return pysolr.Solr(core_uri) + return pysolr.Solr(core_uri, session=session) def solr_version_check(core): @@ -103,7 +135,7 @@ def solr_version_check(core): """ expected_version = SCHEMA[core].version solr_uri = config.CFG.get("solr", "uri") - u = urllib2.urlopen("%s/%s/schema/version" % (solr_uri, core)) + u = urllib.request.urlopen("%s/%s/schema/version" % (solr_uri, core)) content = loads(u.read()) seen_version = content["version"] if not seen_version == expected_version: @@ -120,7 +152,7 @@ def check_solr_cores_version(cores): :raises sir.util.VersionMismatchException: If the version in Solr is different from the supported one """ - map(solr_version_check, cores) + list(map(solr_version_check, cores)) def create_amqp_connection(): diff --git a/sir/wscompat/convert.py b/sir/wscompat/convert.py index 884776a2..0ac8a281 100644 --- a/sir/wscompat/convert.py +++ b/sir/wscompat/convert.py @@ -1,11 +1,7 @@ # Copyright (c) Wieland Hoffmann # License: MIT, see LICENSE for details from sir.wscompat.modelfix import fix -try: - # Python 3 - from functools import lru_cache -except ImportError: - from backports.functools_lru_cache import lru_cache +from functools import lru_cache from mbrng import models fix() @@ -44,6 +40,10 @@ def partialdate_to_string(obj): return formatstring % tuple(args) +def partialdate_to_def_incomplete_date(obj): + return models.def_incomplete_date(partialdate_to_string(obj)) + + def datetime_to_string(obj): """ :type obj: :class:`datetime.time` @@ -83,10 +83,12 @@ def calculate_type(primary_type, secondary_types): def convert_relation(obj, direction="backward", **kwargs): - relation = models.relation(direction=direction, - type_id=obj.link.link_type.gid, - type_=obj.link.link_type.name, - **kwargs) + relation = models.relation( + direction=models.def_direction(direction), + type_id=str(obj.link.link_type.gid), + type_=obj.link.link_type.name, + **kwargs + ) if len(obj.link.attributes) > 0: attribute_list = models.attribute_listType() @@ -101,27 +103,27 @@ def convert_iso_3166_1_code_list(obj): """ :type obj: :class:`[mbdata.models.ISO31661]` """ - l = models.iso_3166_1_code_list() - [l.add_iso_3166_1_code(c.code) for c in obj] - return l + return models.iso_3166_1_code_list( + [models.def_iso_3166_1_code(c.code) for c in obj] + ) def convert_iso_3166_2_code_list(obj): """ :type obj: :class:`[mbdata.models.ISO31662]` """ - l = models.iso_3166_2_code_list() - [l.add_iso_3166_2_code(c.code) for c in obj] - return l + return models.iso_3166_2_code_list( + [models.def_iso_3166_2_code(c.code) for c in obj] + ) def convert_iso_3166_3_code_list(obj): """ :type obj: :class:`[mbdata.models.ISO31663]` """ - l = models.iso_3166_3_code_list() - [l.add_iso_3166_3_code(c.code) for c in obj] - return l + return models.iso_3166_3_code_list( + [models.def_iso_3166_3_code(c.code) for c in obj] + ) @lru_cache() @@ -129,12 +131,12 @@ def convert_area_inner(obj): """ :type obj: :class:`mbdata.models.Area` """ - area = models.def_area_element_inner(id=obj.gid, name=obj.name, + area = models.def_area_element_inner(id=str(obj.gid), name=obj.name, sort_name=obj.name) if obj.type is not None: area.set_type(obj.type.name) - area.set_type_id(obj.type.gid) + area.set_type_id(str(obj.type.gid)) lifespan = convert_life_span(obj.begin_date, obj.end_date, obj.ended) area.set_life_span(lifespan) @@ -146,7 +148,7 @@ def convert_area_simple(obj): """ :type obj: :class:`mbdata.models.Area` """ - area = models.def_area_element_inner(id=obj.gid, name=obj.name) + area = models.def_area_element_inner(id=str(obj.gid), name=obj.name) return area @@ -155,7 +157,7 @@ def convert_area_for_release_event(obj): """ :type obj: :class:`mbdata.models.Area` """ - area = models.def_area_element_inner(id=obj.gid, name=obj.name, + area = models.def_area_element_inner(id=str(obj.gid), name=obj.name, sort_name=obj.name, iso_3166_1_code_list=convert_iso_3166_1_code_list(obj.iso_3166_1_codes)) # noqa return area @@ -166,7 +168,7 @@ def convert_area_relation(obj): :type obj: :class:`mbdata.models.LinkAreaArea` """ relation = convert_relation(obj, - target=models.target(valueOf_=obj.area0.gid), + target=models.target(valueOf_=str(obj.area0.gid)), area=convert_area_inner(obj.area0)) return relation @@ -217,7 +219,7 @@ def convert_artist_credit(obj, include_aliases=True): """ :type obj: :class:`mbdata.models.ArtistCredit` """ - ac = models.artist_credit() + ac = models.artist_credit(id=str(obj.gid)) [ac.add_name_credit(convert_name_credit(nc, include_aliases)) for nc in obj.artists] return ac @@ -233,7 +235,7 @@ def convert_alias(obj): alias.set_valueOf_(obj.name) if obj.type is not None: alias.set_type(obj.type.name) - alias.set_type_id(obj.type.gid) + alias.set_type_id(str(obj.type.gid)) if obj.primary_for_locale: alias.set_primary("primary") if obj.begin_date_year is not None: @@ -278,7 +280,7 @@ def convert_attribute(obj): """ attribute = models.attributeType() attribute.set_valueOf_(obj.attribute_type.name) - attribute.set_type_id(obj.attribute_type.gid) + attribute.set_type_id(str(obj.attribute_type.gid)) return attribute @@ -287,7 +289,7 @@ def convert_artist_simple(obj, include_aliases=True): """ :type obj: :class:`sir.schema.modelext.CustomArtist` """ - artist = models.artist(id=obj.gid, name=obj.name) + artist = models.artist(id=str(obj.gid), name=obj.name) if obj.comment: artist.set_disambiguation(obj.comment) if obj.sort_name is not None: @@ -323,7 +325,7 @@ def convert_recording_simple(obj): """ :type obj: :class:`sir.schema.modelext.CustomRecording` """ - recording = models.recording(id=obj.gid, title=obj.name) + recording = models.recording(id=str(obj.gid), title=obj.name) if obj.video: recording.set_video("true") return recording @@ -353,9 +355,7 @@ def convert_ipi_list(obj): :type obj: :class:`[mbdata.models.ArtistIPI]` or :class:`[mbdata.models.LabelIPI]` """ - ipi_list = models.ipi_list() - [ipi_list.add_ipi(i.ipi) for i in obj] - return ipi_list + return models.ipi_list([models.def_ipi(i.ipi) for i in obj]) def convert_isni_list(obj): @@ -363,9 +363,7 @@ def convert_isni_list(obj): :type obj: :class:`[mbdata.models.ArtistISNI]` or :class:`[mbdata.models.LabelISNI]` """ - isni_list = models.isni_list() - [isni_list.add_isni(i.isni) for i in obj] - return isni_list + return models.isni_list([models.def_isni(i.isni) for i in obj]) def convert_isrc(obj): @@ -405,7 +403,7 @@ def convert_label_info(obj): li.set_catalog_number(obj.catalog_number) if obj.label is not None: label = models.label() - label.set_id(obj.label.gid) + label.set_id(str(obj.label.gid)) label.set_name(obj.label.name) li.set_label(label) return li @@ -443,10 +441,10 @@ def convert_life_span(begin_date, end_date, ended): lifespan = models.life_span() if begin_date.year is not None: - lifespan.set_begin(partialdate_to_string(begin_date)) + lifespan.set_begin(partialdate_to_def_incomplete_date(begin_date)) if end_date.year is not None: - lifespan.set_end(partialdate_to_string(end_date)) + lifespan.set_end(partialdate_to_def_incomplete_date(end_date)) if ended: lifespan.set_ended("true") @@ -460,7 +458,7 @@ def convert_medium(obj, disc_list=True): """ :type obj: :class:`mbdata.models.Medium` """ - m = models.medium() + m = models.medium(id=str(obj.gid)) if obj.format is not None: m.set_format(convert_format(obj.format)) @@ -469,7 +467,7 @@ def convert_medium(obj, disc_list=True): dl = models.disc_list(count=len(obj.cdtocs)) m.set_disc_list(dl) - tl = models.track_listType6(count=obj.track_count) + tl = models.data_track_list(count=obj.track_count) m.set_track_list(tl) return m @@ -484,7 +482,7 @@ def convert_medium_from_track(obj): m.set_position(medium.position) - track = models.def_track_data(id=obj.gid, length=obj.length, + track = models.def_track_data(id=str(obj.gid), length=obj.length, number=obj.number, title=obj.name) tl = m.track_list @@ -530,7 +528,7 @@ def convert_place(obj): """ :type obj: :class:`mbdata.models.Place` """ - place = models.place(id=obj.gid, name=obj.name) + place = models.place(id=str(obj.gid), name=obj.name) if obj.address: place.set_address(obj.address) @@ -552,7 +550,7 @@ def convert_place(obj): if obj.type is not None: place.set_type(obj.type.name) - place.set_type_id(obj.type.gid) + place.set_type_id(str(obj.type.gid)) return place @@ -561,7 +559,7 @@ def convert_place_simple(obj): """ :type obj: :class:`mbdata.models.Place` """ - place = models.place(id=obj.gid, name=obj.name) + place = models.place(id=str(obj.gid), name=obj.name) return place @@ -588,8 +586,10 @@ def convert_release_event(obj): """ :type obj: :class:`mbdata.models.ReleaseCountry` """ - re = models.release_event(area=convert_area_for_release_event(obj.country.area), # noqa - date=partialdate_to_string(obj.date)) + re = models.release_event( + area=convert_area_for_release_event(obj.country.area), # noqa + date=partialdate_to_def_incomplete_date(obj.date) + ) return re @@ -608,7 +608,7 @@ def convert_release_from_track(obj): """ medium = obj.medium rel = medium.release - release = models.release(id=rel.gid, title=rel.name) + release = models.release(id=str(rel.gid), title=rel.name) # The lucene search server skips this if the release artist credit is the # same as the recording artist credit, but we've already built it so just @@ -648,13 +648,13 @@ def convert_release_group_for_release(obj): """ :type obj: :class:`mbdata.models.ReleaseGroup` """ - rg = models.release_group(id=obj.gid, title=obj.name) + rg = models.release_group(id=str(obj.gid), title=obj.name) if obj.type is not None: rg.set_primary_type(convert_release_group_primary_type(obj.type)) type_ = calculate_type(obj.type, obj.secondary_types) rg.set_type(type_.name) - rg.set_type_id(type_.gid) + rg.set_type_id(str(type_.gid)) if len(obj.secondary_types) > 0: rg.set_secondary_type_list( @@ -670,13 +670,13 @@ def convert_release_group_simple(obj): """ :type obj: :class:`mbdata.models.ReleaseGroup` """ - rg = models.release_group(id=obj.gid, title=obj.name) + rg = models.release_group(id=str(obj.gid), title=obj.name) if obj.type is not None: rg.set_primary_type(convert_release_group_primary_type(obj.type)) type_ = calculate_type(obj.type, obj.secondary_types) rg.set_type(type_.name) - rg.set_type_id(type_.gid) + rg.set_type_id(str(type_.gid)) if len(obj.secondary_types) > 0: rg.set_secondary_type_list( @@ -706,7 +706,7 @@ def convert_release_list_for_release_groups(obj): release_list = models.release_list(count=len(obj)) for r in obj: release = models.release() - release.set_id(r.gid) + release.set_id(str(r.gid)) release.set_title(r.name) if r.status is not None: release.set_status(convert_release_status(r.status)) @@ -720,7 +720,7 @@ def convert_secondary_type(obj): :type obj: :class:`mbdata.models.ReleaseGroupSecondaryTypeJoin` """ secondary_type = models.secondary_type(valueOf_=obj.secondary_type.name) - secondary_type.set_id(obj.secondary_type.gid) + secondary_type.set_id(str(obj.secondary_type.gid)) return secondary_type @@ -746,7 +746,7 @@ def convert_one_annotation(obj, type_, entity): """ :type obj: :class:`mbdata.models.Annotation` """ - return models.annotation(type_, entity.gid, entity.name, obj.text) + return models.annotation(type_, str(entity.gid), entity.name, obj.text) def convert_annotation(obj): @@ -792,7 +792,7 @@ def convert_area(obj): :type obj: :class:`mbdata.models.Area` """ arealist = models.area_list() - area = models.def_area_element_inner(id=obj.gid, name=obj.name, + area = models.def_area_element_inner(id=str(obj.gid), name=obj.name, sort_name=obj.name) if len(obj.aliases) > 0: @@ -803,7 +803,7 @@ def convert_area(obj): if obj.type is not None: area.set_type(obj.type.name) - area.set_type_id(obj.type.gid) + area.set_type_id(str(obj.type.gid)) lifespan = convert_life_span(obj.begin_date, obj.end_date, obj.ended) area.set_life_span(lifespan) @@ -831,7 +831,7 @@ def convert_artist(obj): """ :type obj: :class:`sir.schema.modelext.CustomArtist` """ - artist = models.artist(id=obj.gid, name=obj.name, + artist = models.artist(id=str(obj.gid), name=obj.name, sort_name=obj.sort_name) if obj.comment: @@ -842,7 +842,7 @@ def convert_artist(obj): if obj.type is not None: artist.set_type(obj.type.name) - artist.set_type_id(obj.type.gid) + artist.set_type_id(str(obj.type.gid)) if obj.begin_area is not None: artist.set_begin_area(convert_area_inner(obj.begin_area)) @@ -850,7 +850,9 @@ def convert_artist(obj): if obj.area is not None: artist.set_area(convert_area_inner(obj.area)) if len(obj.area.iso_3166_1_codes) > 0: - artist.set_country(obj.area.iso_3166_1_codes[0].code) + artist.set_country( + models.def_iso_3166_1_code(obj.area.iso_3166_1_codes[0].code) + ) if obj.end_area is not None: artist.set_end_area(convert_area_inner(obj.end_area)) @@ -917,21 +919,21 @@ def convert_event(obj): """ :type obj: :class:`sir.schema.modelext.CustomEvent` """ - event = models.event(id=obj.gid, name=obj.name) + event = models.event(id=str(obj.gid), name=obj.name) if obj.comment: event.set_disambiguation(obj.comment) if obj.type is not None: event.set_type(obj.type.name) - event.set_type_id(obj.type.gid) + event.set_type_id(str(obj.type.gid)) lifespan = convert_life_span(obj.begin_date, obj.end_date, obj.ended) if lifespan.get_begin() is not None or lifespan.get_end() is not None: event.set_life_span(lifespan) if obj.time is not None: - event.set_time(datetime_to_string(obj.time)) + event.set_time(models.def_time(datetime_to_string(obj.time))) if obj.area_links: event.add_relation_list(convert_event_area_relation_list(obj.area_links)) @@ -955,7 +957,7 @@ def convert_instrument(obj): """ :type obj: :class:`sir.schema.modelext.CustomInstrument` """ - instrument = models.instrument(id=obj.gid, name=obj.name) + instrument = models.instrument(id=str(obj.gid), name=obj.name) if obj.comment: instrument.set_disambiguation(obj.comment) @@ -965,7 +967,7 @@ def convert_instrument(obj): if obj.type is not None: instrument.set_type(obj.type.name) - instrument.set_type_id(obj.type.gid) + instrument.set_type_id(str(obj.type.gid)) if len(obj.aliases) > 0: instrument.set_alias_list(convert_alias_list(obj.aliases)) @@ -980,18 +982,20 @@ def convert_label(obj): """ :type obj: :class:`sir.schema.modelext.CustomLabel` """ - label = models.label(id=obj.gid, name=obj.name, sort_name=obj.name) + label = models.label(id=str(obj.gid), name=obj.name, sort_name=obj.name) if obj.type is not None: label.set_type(obj.type.name) - label.set_type_id(obj.type.gid) + label.set_type_id(str(obj.type.gid)) if obj.area is not None: label.set_area(convert_area_inner(obj.area)) if len(obj.area.iso_3166_1_codes) > 0: - label.set_country(obj.area.iso_3166_1_codes[0].code) + label.set_country( + models.def_iso_3166_1_code(obj.area.iso_3166_1_codes[0].code) + ) - if obj.label_code > 0: + if obj.label_code is not None and obj.label_code > 0: label.set_label_code(obj.label_code) if len(obj.aliases) > 0: @@ -1020,14 +1024,17 @@ def convert_recording(obj): """ :type obj: :class:`sir.schema.modelext.CustomRecording` """ - recording = models.recording(id=obj.gid, title=obj.name, + recording = models.recording(id=str(obj.gid), title=obj.name, artist_credit=convert_artist_credit(obj.artist_credit)) # noqa if obj.comment: recording.set_disambiguation(obj.comment) - if obj.first_release is not None and obj.first_release.date is not None: - recording.set_first_release_date(partialdate_to_string(obj.first_release.date)) + if obj.first_release_date and len(obj.first_release_date) > 0\ + and obj.first_release_date[0].date: + recording.set_first_release_date( + partialdate_to_def_incomplete_date(obj.first_release_date[0].date) + ) recording.set_length(obj.length) @@ -1054,7 +1061,7 @@ def convert_release(obj): """ :type obj: :class:`mbdata.models.Release` """ - release = models.release(id=obj.gid, title=obj.name, + release = models.release(id=str(obj.gid), title=obj.name, artist_credit=convert_artist_credit(obj.artist_credit, include_aliases=False)) @@ -1104,13 +1111,13 @@ def convert_release(obj): if obj.script is not None: if tr is None: tr = models.text_representation() - tr.set_script(obj.script.iso_code) + tr.set_script(models.def_iso_15924(obj.script.iso_code)) if tr is not None: release.set_text_representation(tr) - if obj.meta.amazon_asin is not None: - release.set_asin(obj.meta.amazon_asin) + if obj.asin and len(obj.asin) > 0 and obj.asin[0].amazon_asin: + release.set_asin(obj.asin[0].amazon_asin) return release @@ -1121,18 +1128,23 @@ def convert_release_group(obj): """ rg = models.release_group(artist_credit=convert_artist_credit(obj.artist_credit), # noqa release_list=convert_release_list_for_release_groups(obj.releases), # noqa - id=obj.gid, title=obj.name) + id=str(obj.gid), title=obj.name) if obj.comment: rg.set_disambiguation(obj.comment) - if obj.meta.first_release_date: - rg.set_first_release_date(partialdate_to_string(obj.meta.first_release_date)) + if obj.first_release_date and len(obj.first_release_date) > 0\ + and obj.first_release_date[0].first_release_date: + rg.set_first_release_date( + partialdate_to_def_incomplete_date( + obj.first_release_date[0].first_release_date + ) + ) if obj.type is not None: rg.set_primary_type(convert_release_group_primary_type(obj.type)) type_ = calculate_type(obj.type, obj.secondary_types) rg.set_type(type_.name) - rg.set_type_id(type_.gid) + rg.set_type_id(str(type_.gid)) if len(obj.secondary_types) > 0: rg.set_secondary_type_list( @@ -1147,7 +1159,7 @@ def convert_release_group(obj): def convert_release_relation(obj): relation = convert_relation(obj) release_obj = obj.release - release = models.release(id=release_obj.gid, title=release_obj.name) + release = models.release(id=str(release_obj.gid), title=release_obj.name) if release_obj.comment: release.set_disambiguation(release_obj.comment) relation.set_release(release) @@ -1164,7 +1176,7 @@ def convert_series(obj): """ :param obj: :class:`mbdata.models.Series """ - series = models.series(id=obj.gid, name=obj.name) + series = models.series(id=str(obj.gid), name=obj.name) if obj.comment: series.set_disambiguation(obj.comment) @@ -1177,7 +1189,7 @@ def convert_series(obj): if obj.type: series.set_type(obj.type.name) - series.set_type_id(obj.type.gid) + series.set_type_id(str(obj.type.gid)) return series @@ -1197,7 +1209,7 @@ def convert_url(obj): """ :type obj: :class`mbdata_models.URL` """ - url = models.url(id=obj.gid, resource=obj.url) + url = models.url(id=str(obj.gid), resource=obj.url) if obj.artist_links: url.add_relation_list(convert_artist_relation_list(obj.artist_links)) if obj.release_links: @@ -1209,7 +1221,7 @@ def convert_work(obj): """ :type obj: :class:`sir.schema.modelext.CustomWork` """ - work = models.work(id=obj.gid, title=obj.name) + work = models.work(id=str(obj.gid), title=obj.name) if len(obj.aliases) > 0: work.set_alias_list(convert_alias_list(obj.aliases)) if len(obj.artist_links) > 0: @@ -1228,7 +1240,7 @@ def convert_work(obj): work.set_language('mul') if obj.type: work.set_type(obj.type.name) - work.set_type_id(obj.type.gid) + work.set_type_id(str(obj.type.gid)) if obj.iswcs: work.set_iswc_list(convert_iswc_list(obj.iswcs)) @@ -1239,28 +1251,28 @@ def convert_release_group_primary_type(obj): """ :type obj: :class:`mbdata.models.ReleaseGroupPrimaryType` """ - return models.primary_type(valueOf_=obj.name, id=obj.gid) + return models.primary_type(valueOf_=obj.name, id=str(obj.gid)) def convert_release_packaging(obj): """ :type obj: :class:`mbdata.models.ReleasePackaging` """ - return models.packaging(valueOf_=obj.name, id=obj.gid) + return models.packaging(valueOf_=obj.name, id=str(obj.gid)) def convert_release_status(obj): """ :type obj: :class:`mbdata.models.ReleaseStatus` """ - return models.status(valueOf_=obj.name, id=obj.gid) + return models.status(valueOf_=obj.name, id=str(obj.gid)) def convert_gender(obj): """ :type obj: :class:`mbdata.models.Gender` """ - return models.gender(valueOf_=obj.name.lower(), id=obj.gid) + return models.gender(valueOf_=obj.name.lower(), id=str(obj.gid)) def convert_format(obj): diff --git a/sql/CreateTriggers.sql b/sql/CreateTriggers.sql index 95d93939..1c72d766 100644 --- a/sql/CreateTriggers.sql +++ b/sql/CreateTriggers.sql @@ -621,9 +621,9 @@ CREATE TRIGGER search_recording_alias_delete BEFORE DELETE ON musicbrainz.record CREATE TRIGGER search_artist_credit_insert AFTER INSERT ON musicbrainz.artist_credit FOR EACH ROW EXECUTE PROCEDURE search_artist_credit_insert(); -CREATE TRIGGER search_artist_credit_update AFTER UPDATE OF name ON musicbrainz.artist_credit +CREATE TRIGGER search_artist_credit_update AFTER UPDATE OF gid, name ON musicbrainz.artist_credit FOR EACH ROW - WHEN ((OLD.name) IS DISTINCT FROM (NEW.name)) + WHEN ((OLD.gid, OLD.name) IS DISTINCT FROM (NEW.gid, NEW.name)) EXECUTE PROCEDURE search_artist_credit_update(); CREATE TRIGGER search_artist_credit_delete BEFORE DELETE ON musicbrainz.artist_credit @@ -654,9 +654,9 @@ CREATE TRIGGER search_track_delete BEFORE DELETE ON musicbrainz.track CREATE TRIGGER search_medium_insert AFTER INSERT ON musicbrainz.medium FOR EACH ROW EXECUTE PROCEDURE search_medium_insert(); -CREATE TRIGGER search_medium_update AFTER UPDATE OF format, position, release, track_count ON musicbrainz.medium +CREATE TRIGGER search_medium_update AFTER UPDATE OF format, gid, position, release, track_count ON musicbrainz.medium FOR EACH ROW - WHEN ((OLD.format, OLD.position, OLD.release, OLD.track_count) IS DISTINCT FROM (NEW.format, NEW.position, NEW.release, NEW.track_count)) + WHEN ((OLD.format, OLD.gid, OLD.position, OLD.release, OLD.track_count) IS DISTINCT FROM (NEW.format, NEW.gid, NEW.position, NEW.release, NEW.track_count)) EXECUTE PROCEDURE search_medium_update(); CREATE TRIGGER search_medium_delete BEFORE DELETE ON musicbrainz.medium @@ -819,9 +819,9 @@ CREATE TRIGGER search_language_delete BEFORE DELETE ON musicbrainz.language CREATE TRIGGER search_release_packaging_insert AFTER INSERT ON musicbrainz.release_packaging FOR EACH ROW EXECUTE PROCEDURE search_release_packaging_insert(); -CREATE TRIGGER search_release_packaging_update AFTER UPDATE OF name ON musicbrainz.release_packaging +CREATE TRIGGER search_release_packaging_update AFTER UPDATE OF gid, name ON musicbrainz.release_packaging FOR EACH ROW - WHEN ((OLD.name) IS DISTINCT FROM (NEW.name)) + WHEN ((OLD.gid, OLD.name) IS DISTINCT FROM (NEW.gid, NEW.name)) EXECUTE PROCEDURE search_release_packaging_update(); CREATE TRIGGER search_release_packaging_delete BEFORE DELETE ON musicbrainz.release_packaging diff --git a/test.sh b/test.sh new file mode 100755 index 00000000..913b8806 --- /dev/null +++ b/test.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# Run tests and return 0 if these passed successfully. +# +# Usage: +# ./test.sh +# Or: +# DOCKER_CMD='sudo podman' ./test.sh +# Or: +# DOCKER_COMPOSE_CMD='sudo docker compose' ./test.sh + +set -o errexit -o nounset + +cd "$(dirname "${BASH_SOURCE[0]}")/" + +SCRIPT_NAME=$(basename "$0") + +# Set Docker/Compose commands + +if [ -z ${DOCKER_CMD:+smt} ] +then +case "$OSTYPE" in + darwin*) # Mac OS X + DOCKER_CMD='docker' + ;; + linux*) + if groups | grep -Eqw 'docker|root' + then + DOCKER_CMD='docker' + elif groups | grep -Eqw 'sudo|wheel' + then + DOCKER_CMD='sudo docker' + else + echo >&2 "$SCRIPT_NAME: cannot set docker command: please either" + echo >&2 " * add the user '$USER' to the group 'docker' or 'sudo'" + echo >&2 " * or set the variable \$DOCKER_CMD" + exit 77 # EX_NOPERM + fi + ;; + *) + echo >&2 "$SCRIPT_NAME: cannot detect platform to set docker command" + echo >&2 "Try setting the variable \$DOCKER_CMD appropriately" + exit 71 # EX_OSERR + ;; + esac +fi + +DOCKER_COMPOSE_CMD=${DOCKER_COMPOSE_CMD:-${DOCKER_CMD} compose} + +# Run tests + +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test up -d musicbrainz_db +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test build +set +o errexit +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test run test \ + dockerize -wait tcp://musicbrainz_db:5432 -timeout 600s \ + bash -c "pytest --junitxml=/data/test_report.xml \ + --cov=sir \ + --cov-report xml:/data/coverage.xml \ + --cov-report html:/data/coverage-html \ + $*" +RET=$? +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test down +exit $RET diff --git a/test/helpers.py b/test/helpers.py deleted file mode 100644 index 76b23311..00000000 --- a/test/helpers.py +++ /dev/null @@ -1,2 +0,0 @@ -class Object(object): - pass \ No newline at end of file diff --git a/test/models.py b/test/models.py index bcc6e316..a1767ef8 100644 --- a/test/models.py +++ b/test/models.py @@ -1,7 +1,6 @@ from collections import namedtuple -from sqlalchemy import Column, ForeignKey, Integer -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import composite, relationship +from sqlalchemy import Column, ForeignKey, Integer, String +from sqlalchemy.orm import composite, relationship, declarative_base Base = declarative_base() @@ -19,7 +18,6 @@ class B(Base): foo = Column(Integer) c_id = Column('c', Integer, ForeignKey("table_c.id")) composite_column = composite(Comp, foo, c_id) - c = relationship("C") class C(Base): @@ -30,4 +28,16 @@ class C(Base): __tablename__ = "table_c" id = Column(Integer, primary_key=True) bar = Column(Integer) - bs = relationship("B") + bs = relationship("B", backref="c") + + +class D(Base): + __tablename__ = "table_d" + + pk1 = Column(Integer, primary_key=True) + pk2 = Column(Integer, primary_key=True) + foo = Column(String) + key = Column(String) + key2 = Column(String) + foo_id = Column(Integer) + position = Column(Integer) diff --git a/test/sql/annotation.sql b/test/sql/annotation.sql new file mode 100644 index 00000000..b7c4f04b --- /dev/null +++ b/test/sql/annotation.sql @@ -0,0 +1,37 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'); + +INSERT INTO area (id, gid, name, type) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1); +INSERT INTO country_area (area) VALUES (221), (222); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'), (222, 'US'); + +INSERT INTO artist + (id, gid, name, sort_name, type, gender, area, + begin_area, end_area, + begin_date_year, begin_date_month, begin_date_day, + end_date_year, end_date_month, end_date_day, comment, + last_updated) + VALUES + (3, '745c079d-374e-4436-9448-da92dedef3ce', 'Test Artist', 'Artist, Test', 1, 1, 221, 221, 221, + 2008, 01, 02, 2009, 03, 04, 'Yet Another Test Artist', + '2009-07-09'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (4, '945c079d-374e-4436-9448-da92dedef3cf', 'Minimal Artist', 'Minimal Artist'), + (5, 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', 'Annotated Artist A', 'Annotated Artist A'), + (6, 'ca4c2228-227c-4904-932a-dff442c091ea', 'Annotated Artist B', 'Annotated Artist B'); + +UPDATE artist_meta SET rating=70, rating_count=4 WHERE id=3; + +INSERT INTO annotation (id, editor, text) VALUES (1, 1, 'Test annotation 1'); +INSERT INTO annotation (id, editor, text) VALUES (2, 1, 'Test annotation 2'); +INSERT INTO annotation (id, editor, text) VALUES (3, 1, 'Duplicate annotation'); +INSERT INTO annotation (id, editor, text) VALUES (4, 1, 'Duplicate annotation'); + +INSERT INTO artist_annotation (artist, annotation) VALUES (3, 1), (4, 2), (5, 3), (6, 4); + +INSERT INTO artist_gid_redirect VALUES ('a4ef1d08-962e-4dd6-ae14-e42a6a97fc11', 3); diff --git a/test/sql/area.sql b/test/sql/area.sql new file mode 100644 index 00000000..5419e27e --- /dev/null +++ b/test/sql/area.sql @@ -0,0 +1,16 @@ +INSERT INTO area (id, gid, name, type) VALUES + ( 13, '106e0bec-b638-3b37-b731-f53d507dc00e', 'Australia', 1), + ( 81, '85752fda-13c4-31a3-bee5-0e5cb1f51dad', 'Germany', 1), + (107, '2db42837-c832-3c27-b4a3-08198f75693c', 'Japan', 1), + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1), + (241, '89a675c2-3e37-3518-b83c-418bad59a85a', 'Europe', 1), + (5126, '3f179da4-83c6-4a28-a627-e46b4a8ff1ed', 'Sydney', 3); +INSERT INTO country_area (area) VALUES ( 13), ( 81), (107), (221), (222), (241); +INSERT INTO iso_3166_1 (area, code) VALUES ( 13, 'AU'), ( 81, 'DE'), (107, 'JP'), (221, 'GB'), (222, 'US'), (241, 'XE'); + +INSERT INTO area_alias (id, name, sort_name, area, edits_pending) + VALUES (1, 'オーストラリア', 'オーストラリア', 13, 0); + +INSERT INTO link VALUES (118734, 356, NULL, NULL, NULL, NULL, NULL, NULL, 0, '2013-05-17 20:05:50.534145+00', FALSE); +INSERT INTO l_area_area VALUES (4892, 118734, 13, 5126, 0, '2013-05-24 20:32:44.702487+00', 0, '', ''); diff --git a/test/sql/artist.sql b/test/sql/artist.sql new file mode 100644 index 00000000..b7c4f04b --- /dev/null +++ b/test/sql/artist.sql @@ -0,0 +1,37 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'); + +INSERT INTO area (id, gid, name, type) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1); +INSERT INTO country_area (area) VALUES (221), (222); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'), (222, 'US'); + +INSERT INTO artist + (id, gid, name, sort_name, type, gender, area, + begin_area, end_area, + begin_date_year, begin_date_month, begin_date_day, + end_date_year, end_date_month, end_date_day, comment, + last_updated) + VALUES + (3, '745c079d-374e-4436-9448-da92dedef3ce', 'Test Artist', 'Artist, Test', 1, 1, 221, 221, 221, + 2008, 01, 02, 2009, 03, 04, 'Yet Another Test Artist', + '2009-07-09'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (4, '945c079d-374e-4436-9448-da92dedef3cf', 'Minimal Artist', 'Minimal Artist'), + (5, 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', 'Annotated Artist A', 'Annotated Artist A'), + (6, 'ca4c2228-227c-4904-932a-dff442c091ea', 'Annotated Artist B', 'Annotated Artist B'); + +UPDATE artist_meta SET rating=70, rating_count=4 WHERE id=3; + +INSERT INTO annotation (id, editor, text) VALUES (1, 1, 'Test annotation 1'); +INSERT INTO annotation (id, editor, text) VALUES (2, 1, 'Test annotation 2'); +INSERT INTO annotation (id, editor, text) VALUES (3, 1, 'Duplicate annotation'); +INSERT INTO annotation (id, editor, text) VALUES (4, 1, 'Duplicate annotation'); + +INSERT INTO artist_annotation (artist, annotation) VALUES (3, 1), (4, 2), (5, 3), (6, 4); + +INSERT INTO artist_gid_redirect VALUES ('a4ef1d08-962e-4dd6-ae14-e42a6a97fc11', 3); diff --git a/test/sql/cdstub.sql b/test/sql/cdstub.sql new file mode 100644 index 00000000..9a3f5029 --- /dev/null +++ b/test/sql/cdstub.sql @@ -0,0 +1,10 @@ +INSERT INTO release_raw (id, title, artist, added, last_modified, lookup_count, modify_count, source, barcode, comment) + VALUES (1, 'Test Stub', 'Test Artist', '2000-01-01 0:00', '2001-01-01 0:00', 10, 1, 0, '837101029192', 'this is a comment'); + +INSERT INTO track_raw (release, title, artist, sequence) + VALUES (1, 'Track title 1', '', 0); +INSERT INTO track_raw (release, title, artist, sequence) + VALUES (1, 'Track title 2', '', 1); + +INSERT INTO cdtoc_raw (release, discid, track_count, leadout_offset, track_offset) + VALUES (1, 'YfSgiOEayqN77Irs.VNV.UNJ0Zs-', 2, 20000, '{150,10000}'); diff --git a/test/sql/editor.sql b/test/sql/editor.sql new file mode 100644 index 00000000..9049b486 --- /dev/null +++ b/test/sql/editor.sql @@ -0,0 +1,47 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'), + (2, 'Alice', '{CLEARTEXT}secret1', 0, 'alice@example.com', 'http://example.com', + 'second biography', '2007-07-23', '2007-10-20', now(), 'e7f46e4f25ae38fcc952ef2b7edf0de9'), + (3, 'kuno', '{CLEARTEXT}byld', 0, 'kuno@example.com', 'http://frob.nl', + 'donation check test user', '2010-03-25', '2010-03-25', now(), '00863261763ed5029ea051f87c4bbec3'); + +INSERT INTO editor_preference (editor, name, value) + VALUES (1, 'datetime_format', '%m/%d/%Y %H:%M:%S'), + (1, 'timezone', 'UTC'), + (2, 'datetime_format', '%m/%d/%Y %H:%M:%S'), + (2, 'timezone', 'UTC'), + (2, 'public_ratings', '0'), + (2, 'public_tags', '0'); + +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, 'a9d99e40-72d7-11de-8a39-0800200c9a66', 'Name', 1); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) + VALUES (1, 1, 'Name', 0, ''); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (1, '3b4faa80-72d9-11de-8a39-0800200c9a66', 'Arrival', 1); + +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (1, 'f34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1), + (2, 'a34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1), + (3, 'b34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1), + (4, 'c34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1); + +INSERT INTO editor_subscribe_editor (editor, subscribed_editor, last_edit_sent) + VALUES (2, 1, 3); + +INSERT INTO editor_collection (id, gid, editor, name, public, type) + VALUES (1, 'f34c079d-374e-4436-9448-da92dedef3ce', 2, 'kunos collection', FALSE, 1), + (2, 'd34c079d-374e-4436-9448-da92dedef3ce', 1, 'new_collection', TRUE, 1); + +INSERT INTO editor_collection_release (collection, release) + VALUES (1, 1), (1, 2); + +INSERT INTO annotation (editor) VALUES (2); -- so Alice is not fully deleted + +INSERT INTO old_editor_name (name) VALUES ('im_gone'); diff --git a/test/sql/event.sql b/test/sql/event.sql new file mode 100644 index 00000000..49579c96 --- /dev/null +++ b/test/sql/event.sql @@ -0,0 +1,23 @@ +INSERT INTO area (id, gid, name, type) +VALUES (3983, 'b9576171-3434-4d1b-8883-165ed6e65d2f', 'Kensington and Chelsea', 2) + , (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1) + , (38, '71bbafaa-e825-3e15-8ca9-017dcad1748b', 'Canada', 1); + +INSERT INTO country_area (area) VALUES ( 38), (221); +INSERT INTO iso_3166_1 (area, code) VALUES ( 38, 'CA'), (221, 'GB'); + +INSERT INTO place (id, gid, name, type, address, area, coordinates, begin_date_year) + VALUES (729, '4352063b-a833-421b-a420-e7fb295dece0', 'Royal Albert Hall', 2, 'Kensington Gore, London SW7 2AP', 3983, '(51.50105,-0.17748)', 1871); + +INSERT INTO event (id, gid, name, begin_date_year, begin_date_month, begin_date_day, end_date_year, end_date_month, end_date_day, time, type, cancelled, setlist, comment, ended) + VALUES (59357, 'ca1d24c1-1999-46fd-8a95-3d4108df5cb2', 'BBC Open Music Prom', 2022, 9, 1, 2022, 9, 1, '19:30:00', 1, 'f', NULL, '2022, Prom 60', 't'); + +INSERT INTO artist (id, gid, name, sort_name, begin_date_year, begin_date_month, type, area, gender) + VALUES (1294951, 'f72a5b32-449f-4090-9a2a-ebbdd8d3c2e5', 'Kwamé Ryan', 'Ryan, Kwamé', 1970, NULL, 1, 38, 1) + , (831634, 'dfeba5ea-c967-4ad2-9cdd-3cffb4320143', 'BBC Concert Orchestra', 'BBC Concert Orchestr', 1952, 1, 5, 221, NULL); + +INSERT INTO link (id, link_type) VALUES (199471, 794), (199854, 807), (199871, 806); + +INSERT INTO l_event_place (id, link, entity0, entity1) VALUES (51345, 199471, 59357, 729); + +INSERT INTO l_artist_event (id, link, entity0, entity1) VALUES (160762, 199854, 831634, 59357), (160763, 199871, 1294951, 59357); diff --git a/test/sql/instrument.sql b/test/sql/instrument.sql new file mode 100644 index 00000000..4de8b19f --- /dev/null +++ b/test/sql/instrument.sql @@ -0,0 +1,41 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'); + +INSERT INTO instrument + (id, gid, name, type, comment, + description, last_updated) + VALUES + (3, '745c079d-374e-4436-9448-da92dedef3ce', 'Test Instrument', 2, + 'Yet Another Test Instrument', 'This is a description!', '2009-07-09'); + +INSERT INTO instrument (id, gid, name) + VALUES (4, '945c079d-374e-4436-9448-da92dedef3cf', 'Minimal Instrument'), + (5, 'a56d18ae-485f-5547-a559-eba3efef04d0', 'Minimal Instrument 2'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (1, '5441c29d-3602-4898-b1a1-b77fa23b8e50', 'David Bowie', 'David Bowie'); + + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'David Bowie', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name) VALUES (1, 0, 1, 'David Bowie'); + +INSERT INTO recording (id, gid, name, artist_credit, length) VALUES + (1, '722190f8-f718-482f-a8bc-a8d479426a30', '“Heroes”', 1, 370000); + +INSERT INTO annotation (id, editor, text) VALUES (1, 1, 'Test annotation 1'); +INSERT INTO annotation (id, editor, text) VALUES (2, 1, 'Test annotation 2'); + +INSERT INTO instrument_annotation (instrument, annotation) VALUES (3, 1); +INSERT INTO instrument_annotation (instrument, annotation) VALUES (4, 2); + +INSERT INTO instrument_gid_redirect VALUES ('a4ef1d08-962e-4dd6-ae14-e42a6a97fc11', 3); + +INSERT INTO link (id, link_type, attribute_count) VALUES (1, 148, 2); +INSERT INTO link_attribute (link, attribute_type) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = '945c079d-374e-4436-9448-da92dedef3cf')); +INSERT INTO link_attribute_credit (link, attribute_type, credited_as) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = '945c079d-374e-4436-9448-da92dedef3cf'), 'blah instrument'); +INSERT INTO link_attribute (link, attribute_type) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = 'a56d18ae-485f-5547-a559-eba3efef04d0')); +INSERT INTO link_attribute_credit (link, attribute_type, credited_as) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = 'a56d18ae-485f-5547-a559-eba3efef04d0'), 'stupid instrument'); +INSERT INTO l_artist_recording (id, link, entity0, entity1) VALUES (4, 1, 1, 1); diff --git a/test/sql/label.sql b/test/sql/label.sql new file mode 100644 index 00000000..ba0dbf83 --- /dev/null +++ b/test/sql/label.sql @@ -0,0 +1,20 @@ +INSERT INTO area (id, gid, name, type, begin_date_year, end_date_year, ended) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1, NULL, NULL, 'f'), + (243, '32f90933-b4b4-3248-b98c-e573d5329f57', 'Soviet Union', 1, 1922, 1991, 't'); +INSERT INTO country_area (area) VALUES (221), (243); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'), (243, 'SU'); + +INSERT INTO label (id, gid, name, type, area, label_code, + begin_date_year, begin_date_month, begin_date_day, + end_date_year, end_date_month, end_date_day, comment) + VALUES (3, '46f0f4cd-8aab-4b33-b698-f459faf64190', 'Warp Records', 3, 221, 2070, 1989, 02, 03, 2008, 05, 19, 'Sheffield based electronica label'), + (135155, '449ddb7e-4e92-41eb-a683-5bbcc7fd7d4a', 'U.S.S.R. Ministry of Culture', NULL, 243, NULL, 1953, 3, 15, 1991, 11, 27, ''); + +INSERT INTO label (id, gid, name) + VALUES (2, 'f2a9a3c0-72e3-11de-8a39-0800200c9a66', 'To Merge'); + +INSERT INTO editor (id, name, password, ha1) VALUES (1, 'editor', '{CLEARTEXT}pass', '3f3edade87115ce351d63f42d92a1834'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 1, 'Label Annotation', 'Changes'); +INSERT INTO label_annotation (label, annotation) VALUES (3, 1); + +INSERT INTO label_gid_redirect (gid, new_id) VALUES ('efdf3fe9-c293-4acd-b4b2-8d2a7d4f9592', 3); diff --git a/test/sql/place.sql b/test/sql/place.sql new file mode 100644 index 00000000..52a074d3 --- /dev/null +++ b/test/sql/place.sql @@ -0,0 +1,11 @@ +INSERT INTO area (id, gid, name, type) VALUES + (241, '89a675c2-3e37-3518-b83c-418bad59a85a', 'Europe', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1); + +INSERT INTO country_area (area) VALUES (222), (241); +INSERT INTO iso_3166_1 (area, code) VALUES (222, 'US'), (241, 'XE'); + +INSERT INTO place (id, gid, name, type, address, area, coordinates, comment, edits_pending, last_updated, begin_date_year, begin_date_month, begin_date_day, end_date_year, end_date_month, end_date_day, ended) VALUES (1, 'df9269dd-0470-4ea2-97e8-c11e46080edd', 'A Test Place', 2, 'An Address', 241, '(0.323,1.234)', 'A PLACE!', 0, '2013-09-07 14:40:22.041309+00', 2013, NULL, NULL, NULL, NULL, NULL, '0'); + +INSERT INTO place_alias (id, name, sort_name, place, edits_pending) + VALUES (1, 'A Test Alias', 'A Test Alias', 1, 0); diff --git a/test/sql/recording.sql b/test/sql/recording.sql new file mode 100644 index 00000000..dfb478c3 --- /dev/null +++ b/test/sql/recording.sql @@ -0,0 +1,44 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, '945c079d-374e-4436-9448-da92dedef3cf', 'Artist', 'Artist'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Artist', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name, join_phrase) + VALUES (1, 0, 1, 'Artist', ''); + +INSERT INTO recording (id, gid, name, artist_credit, length) + VALUES (1, '54b9d183-7dab-42ba-94a3-7388a66604b8', 'King of the Mountain', 1, 293720), + (2, '659f405b-b4ee-4033-868a-0daa27784b89', 'π', 1, 369680), + (3, 'ae674299-2824-4500-9516-653ac1bc6f80', 'Bertie', 1, 258839), + (4, 'b1d58a57-a0f3-4db8-aa94-868cdc7bc3bb', 'Mrs. Bartolozzi', 1, 358960), + (5, '44f52946-0c98-47ba-ba60-964774db56f0', 'How to Be Invisible', 1, 332613), + (6, '07614140-8bb8-4db9-9dcc-0917c3a8471b', 'Joanni', 1, 296160); + +INSERT INTO release_group (id, gid, name, artist_credit, type) VALUES (1, '7c3218d7-75e0-4e8c-971f-f097b6c308c5', 'Aerial', 1, 1); + +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (1, 'f205627f-b70a-409d-adbe-66289b614e80', 'Aerial', 1, 1), + (2, '9b3d9383-3d2a-417f-bfbb-56f7c15f075b', 'Aerial', 1, 1), + (3, 'ab3d9383-3d2a-417f-bfbb-56f7c15f075b', 'Aerial', 1, 1); + +INSERT INTO release_unknown_country (release, date_year) +VALUES (1, 2007), (2, 2008); + +INSERT INTO medium_format (id, gid, name, has_discids) VALUES (123465, '52014420-cae8-11de-8a39-0800200c9a26', 'Format', TRUE); +INSERT INTO medium (id, gid, release, position, format, name) VALUES (1, '6e8ede88-4145-4412-8951-9e5ba757ea29', 1, 1, 123465, 'A Sea of Honey'); +INSERT INTO medium (id, gid, release, position, format, name) VALUES (2, '3c9ba218-abdd-40ea-a140-c4c9a50f456d', 1, 2, 123465, 'A Sky of Honey'); + +INSERT INTO track (id, gid, medium, position, number, recording, name, artist_credit, length) + VALUES (1, '66c2ebff-86a8-4e12-a9a2-1650fb97d9d8', 1, 1, 1, 1, 'King of the Mountain', 1, NULL), + (2, 'b0caa7d1-0d1e-483e-b22b-ec6ab7fada06', 1, 2, 2, 2, 'π', 1, 369680), + (3, 'f891acda-39d6-4a7f-a9d1-dd87b7c46a0a', 1, 3, 3, 3, 'Bertie', 1, 258839); + +INSERT INTO track (id, gid, medium, position, number, recording, name, artist_credit, length) + VALUES (4, '6c04d03c-4995-43be-8530-215ca911dcbf', 1, 4, 4, 4, 'Mrs. Bartolozzi', 1, 358960), + (5, '849dc232-c33a-4611-a6a5-5a0969d63422', 1, 5, 5, 5, 'How to Be Invisible', 1, 332613); + +INSERT INTO link (id, link_type, attribute_count, begin_date_year, begin_date_month, begin_date_day, end_date_year, end_date_month, end_date_day, ended) + VALUES (1, 151, 0, 1971, 2, NULL, 1972, 2, NULL, true); + +INSERT INTO l_artist_recording (id, link, entity0, entity1) VALUES (1, 1, 1, 1); +INSERT INTO l_artist_recording (id, link, entity0, entity1) VALUES (2, 1, 1, 2); diff --git a/test/sql/release-group.sql b/test/sql/release-group.sql new file mode 100644 index 00000000..d03833f6 --- /dev/null +++ b/test/sql/release-group.sql @@ -0,0 +1,39 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, 'a9d99e40-72d7-11de-8a39-0800200c9a66', 'Name', 1); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) + VALUES (1, 1, 'Name', 0, ''); + +INSERT INTO release_group (id, gid, name, artist_credit, type, comment, edits_pending) + VALUES (1, '7b5d22d0-72d7-11de-8a39-0800200c9a66', 'Release Group', 1, 1, 'Comment', 2); + +INSERT INTO release_group (id, gid, name, artist_credit, type, comment, edits_pending) + VALUES (2, '3b4faa80-72d9-11de-8a39-0800200c9a66', 'Release Name', 1, 1, 'Comment', 2); + +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (1, '4c767e70-72d8-11de-8a39-0800200c9a66', 'Release Name', 1, 1); + +INSERT INTO editor (id, name, password, ha1) VALUES (1, 'editor', '{CLEARTEXT}pass', '3f3edade87115ce351d63f42d92a1834'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 1, 'Annotation', 'change'); +INSERT INTO release_group_annotation (release_group, annotation) VALUES (1, 1); + +INSERT INTO release_group_gid_redirect (gid, new_id) VALUES ('77637e8c-be66-46ea-87b3-73addc722fc9', 1); + +INSERT INTO artist (id, gid, name, sort_name) + VALUES (2, '7a906020-72db-11de-8a39-0800200c9a66', 'Various Artists', 'Various Artists'); +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (2, 'Various Artists', 1, 'c44109ce-57d7-3691-84c8-37926e3d41d2'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) VALUES (2, 2, 'Various Artists', 1, ''); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (3, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2); +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (3, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2, 3); + +INSERT INTO medium (id, gid, track_count, release, position) VALUES (1, '6e26951a-e5dd-44d0-ab70-7bb14321be1e', 0, 3, 1); +INSERT INTO recording (id, artist_credit, name, gid) + VALUES (1, 2, 'Track on recording', 'b43eb990-ff5b-11de-8a39-0800200c9a66'); +INSERT INTO track (id, gid, name, artist_credit, medium, position, number, recording) + VALUES (1, '899aaf2a-a18d-4ed5-9c18-03485df72793', 'Track on recording', 1, 1, 1, 1, 1); diff --git a/test/sql/release.sql b/test/sql/release.sql new file mode 100644 index 00000000..7bb7ad97 --- /dev/null +++ b/test/sql/release.sql @@ -0,0 +1,61 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, 'a9d99e40-72d7-11de-8a39-0800200c9a66', 'Name', 'Name'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) + VALUES (1, 1, 'Name', 0, ''); + +INSERT INTO area (id, gid, name, type) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1); +INSERT INTO country_area (area) VALUES (221); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'); + +INSERT INTO release_group (id, gid, name, artist_credit, type, comment, edits_pending) + VALUES (1, '3b4faa80-72d9-11de-8a39-0800200c9a66', 'Arrival', 1, 1, 'Comment', 2); + +INSERT INTO release (id, gid, name, artist_credit, release_group, status, packaging, language, script, barcode, comment, edits_pending) VALUES (1, 'f34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1, 1, 1, 145, 3, '731453398122', 'Comment', 2); +INSERT INTO release_country (release, country, date_year, date_month, date_day) VALUES (1, 221, 2009, 5, 8); +; + +INSERT INTO release (id, gid, name, artist_credit, release_group) VALUES (2, '7a906020-72db-11de-8a39-0800200c9a66', 'Release #2', 1, 1); +; + +INSERT INTO label (id, gid, name) VALUES (1, '00a23bd0-72db-11de-8a39-0800200c9a66', 'Label'); + +INSERT INTO release_label (id, release, label, catalog_number) + VALUES (1, 1, 1, 'ABC-123'), (2, 1, 1, 'ABC-123-X'); + +INSERT INTO editor (id, name, password, privs, email, website, bio, email_confirm_date, member_since, last_login_date, ha1) VALUES (1, 'editor', '{CLEARTEXT}pass', 0, 'test@editor.org', 'http://musicbrainz.org', 'biography', '2005-10-20', '1989-07-23', now(), '3f3edade87115ce351d63f42d92a1834'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 1, 'Annotation', 'change'); +INSERT INTO release_annotation (release, annotation) VALUES (1, 1); + +INSERT INTO release_gid_redirect (gid, new_id) VALUES ('71dc55d8-0fc6-41c1-94e0-85ff2404997d', 1); + +INSERT INTO artist (id, gid, name, sort_name, comment) + VALUES (2, '7a906020-72db-11de-8a39-0800200c9a66', 'Various Artists', 'Various Artists', ''), + (3, '1a906020-72db-11de-8a39-0800200c9a66', 'Various Artists', 'Various Artists', 'Various Artists 2'); +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (2, 'Various Artists', 1, 'c44109ce-57d7-3691-84c8-37926e3d41d2'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) VALUES (2, 2, 'Various Artists', 1, ''); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (2, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2); +INSERT INTO release (id, gid, name, artist_credit, release_group) VALUES (3, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2, 2); +; + +INSERT INTO medium (id, gid, track_count, release, position) VALUES (1, 'c517968f-afd0-48e6-ab4b-dfdae888ad9d', 1, 3, 1); +INSERT INTO recording (id, artist_credit, name, gid) + VALUES (1, 2, 'Track on recording', 'b43eb990-ff5b-11de-8a39-0800200c9a66'); +INSERT INTO track (id, gid, name, artist_credit, medium, position, number, recording) + VALUES (1, '30f0fccd-602d-4fab-8d44-06536e596966', 'Track on recording', 1, 1, 1, 1, 1), + (100, 'f9864eea-5455-4a8e-ad29-e0652cfe1452', 'Track on recording', 1, 1, 2, 2, 1); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (4, '329fb554-2a81-3d8a-8e22-ec2c66810019', 'Blonde on Blonde', 2); +INSERT INTO release (id, gid, name, artist_credit, release_group) VALUES (5, '538aff00-a009-4515-a064-11a6d5a502ee', 'Blonde on Blonde', 2, 4); +; + +-- release_meta +UPDATE release_meta SET cover_art_presence = 'present' WHERE id IN (7, 8); +UPDATE release_meta SET cover_art_presence = 'darkened' WHERE id = 9; diff --git a/test/sql/series.sql b/test/sql/series.sql new file mode 100644 index 00000000..01284c36 --- /dev/null +++ b/test/sql/series.sql @@ -0,0 +1,51 @@ +INSERT INTO series (id, gid, name, comment, type, ordering_type) + VALUES (1, 'a8749d0c-4a5a-4403-97c5-f6cd018f8e6d', 'Test Recording Series', 'test comment 1', 3, 1), + (2, '2e8872b9-2745-4807-a84e-094d425ec267', 'Test Work Series', 'test comment 2', 4, 2), + (3, 'dbb23c50-d4e4-11e3-9c1a-0800200c9a66', 'Dumb Recording Series', '', 3, 1); + +INSERT INTO series_alias (id, series, name, type, sort_name) VALUES + (1, 1, 'Test Recording Series Alias', 2, 'Test Recording Series Alias'); + +INSERT INTO link (id, link_type, attribute_count) VALUES + (1, 740, 1), (2, 740, 1), (3, 740, 1), (4, 740, 1), + (5, 743, 1), (6, 743, 1), (7, 743, 1), (8, 743, 1); + +INSERT INTO link_attribute (link, attribute_type) VALUES + (1, 788), (2, 788), (3, 788), (4, 788), (5, 788), (6, 788), (7, 788), (8, 788); + +INSERT INTO link_attribute_text_value (link, attribute_type, text_value) + VALUES (1, 788, 'A1'), + (2, 788, 'A11'), + (3, 788, 'A10'), + (4, 788, 'A100'), + (5, 788, 'WTF 87'), + (6, 788, 'WTF 21'), + (7, 788, 'WTF 99'), + (8, 788, 'WTF 12'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (77, 'ac3a3195-ba87-4154-a937-bbc06aac4038', 'Some Artist', 'Some Artist'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Shared Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); + +INSERT INTO artist_credit_name (artist_credit, position, artist, name) VALUES + (1, 0, 77, 'Shared Name'); + +INSERT INTO recording (id, gid, name, artist_credit, length) VALUES + (1, '123c079d-374e-4436-9448-da92dedef3ce', 'Dancing Queen', 1, 123456), + (2, '54b9d183-7dab-42ba-94a3-7388a66604b8', 'King of the Mountain', 1, 293720), + (3, '659f405b-b4ee-4033-868a-0daa27784b89', 'π', 1, 369680), + (4, 'ae674299-2824-4500-9516-653ac1bc6f80', 'Bertie', 1, 258839); + +INSERT INTO work (id, gid, name, type) VALUES + (1, '7e0e3ea0-d674-11e3-9c1a-0800200c9a66', 'Wōrk1', 1), + (2, 'f89a8de8-f0e3-453c-9516-5bc3edd2fd88', 'Wōrk2', 1), + (3, '8234f641-4231-4b2f-a14f-c469b9b8de11', 'Wōrk3', 1), + (4, 'efe72c7d-652d-4243-b01b-152997bb730e', 'Wōrk4', 1); + +INSERT INTO l_recording_series (id, link, entity0, entity1, link_order) VALUES + (1, 1, 1, 1, 1), (2, 2, 2, 1, 2), (3, 3, 3, 3, 1), (4, 4, 4, 3, 2); + +INSERT INTO l_series_work (id, link, entity0, entity1, link_order) VALUES + (1, 5, 2, 1, 1), (2, 6, 2, 2, 2), (3, 7, 2, 3, 3), (4, 8, 2, 4, 4); diff --git a/test/sql/tag.sql b/test/sql/tag.sql new file mode 100644 index 00000000..6bf7f358 --- /dev/null +++ b/test/sql/tag.sql @@ -0,0 +1,33 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (3, 'e2a083a9-9942-4d6e-b4d2-8397320b95f7', 'Artist 1', 'Artist 1'), + (4, '2fed031c-0e89-406e-b9f0-3d192637907a', 'Artist 2', 'Artist 2'); + +INSERT INTO tag (id, name) + VALUES (1, 'musical'), + (2, 'rock'), + (3, 'jazz'), + (4, 'world music'); + +INSERT INTO editor (id, name, password, ha1) + VALUES (11, 'editor1', '{CLEARTEXT}password', '0e5b1cce99adc89b535a3c6523c5410a'), + (12, 'editor2', '{CLEARTEXT}password', '9ab932d00c88daf4a3ccf3a25e00f977'), + (13, 'editor3', '{CLEARTEXT}password', '8226c71cd2dd007dc924910793b8ca83'), + (14, 'editor4', '{CLEARTEXT}password', 'f0ab22e1a22cb1e60fea481f812450cb'), + (15, 'editor5', '{CLEARTEXT}password', '3df132c9df92678048a6b25c5ad751ef'); + +INSERT INTO artist_tag_raw (tag, artist, editor) + VALUES (1, 3, 11), + (2, 3, 12), + (2, 3, 13), + (2, 3, 14), + (1, 4, 11), + (1, 4, 12), + (1, 4, 13), + (1, 4, 14), + (1, 4, 15), + (2, 4, 11), + (2, 4, 12), + (2, 4, 13), + (3, 4, 14), + (3, 4, 15), + (4, 4, 12); diff --git a/test/sql/url.sql b/test/sql/url.sql new file mode 100644 index 00000000..e1954c98 --- /dev/null +++ b/test/sql/url.sql @@ -0,0 +1,12 @@ +INSERT INTO url (id, gid, url, last_updated, edits_pending) + VALUES (1, '9201840b-d810-4e0f-bb75-c791205f5b24', 'http://musicbrainz.org/', '2011-01-18 16:23:38+00', 0), + (2, '9b3c5c67-572a-4822-82a3-bdd3f35cf152', 'http://microsoft.com', NOW(), 0), + (3, '25d6b63a-12dc-41c9-858a-2f42ae610a7d', 'http://zh-yue.wikipedia.org/wiki/%E7%8E%8B%E8%8F%B2', '2011-01-18 16:23:38+00', 0), + (4, '7bd45cc7-6189-4712-35e1-cdf3632cf1a9', 'https://www.allmusic.com/artist/faye-wong-mn0000515659', NOW(), 0), + (5, '9b3c5c67-572a-4822-82a3-bdd3f35cf153', 'http://microsoft.fr', '2011-01-18 16:23:38+00', 2); + +INSERT INTO artist (id, gid, name, sort_name) VALUES (100, 'acd58926-4243-40bb-a2e5-c7464b3ce577', 'Faye Wong', 'Faye Wong'); +INSERT INTO link (id, link_type) VALUES (1, 179); +INSERT INTO link (id, link_type) VALUES (2, 283); +INSERT INTO l_artist_url (id, link, entity0, entity1) VALUES (1, 1, 100, 3); +INSERT INTO l_artist_url (id, link, entity0, entity1) VALUES (2, 2, 100, 4); diff --git a/test/sql/work.sql b/test/sql/work.sql new file mode 100644 index 00000000..a0d9e02c --- /dev/null +++ b/test/sql/work.sql @@ -0,0 +1,45 @@ +INSERT INTO artist (id, gid, name, sort_name, comment) + VALUES (1, '5f9913b0-7219-11de-8a39-0800200c9a66', 'ABBA', 'ABBA', 'ABBA 1'), + (2, '5f9913b0-7219-11de-8a39-0800200c9a67', 'ABBA', 'ABBA', 'ABBA 2'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'ABBA', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'), + (2, 'ABBA', 1, 'c44109ce-57d7-3691-84c8-37926e3d41d2'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name, join_phrase) + VALUES (1, 0, 1, 'ABBA', ''), (2, 0, 2, 'ABBA', ''); + +INSERT INTO work (id, gid, name, type, edits_pending, comment) + VALUES (1, '745c079d-374e-4436-9448-da92dedef3ce', 'Dancing Queen', + 1, 0, 'Work'); +INSERT INTO iswc (id, work, iswc) VALUES (1, 1, 'T-000.000.001-0'); + +INSERT INTO work (id, gid, name, type, edits_pending, comment) + VALUES (5, '755c079d-374e-4436-9448-da92dedef3ce', 'Test', + 1, 0, 'Work'); +INSERT INTO iswc (id, work, iswc) VALUES (2, 5, 'T-500.000.001-0'), (3, 5, 'T-500.000.002-0'); + +INSERT INTO work (id, gid, name, type, edits_pending, comment) + VALUES (10, '105c079d-374e-4436-9448-da92dedef3ce', 'Test', + 1, 0, 'Work'); + +INSERT INTO work (id, gid, name) VALUES (2, '745c079d-374e-4436-9448-da92dedef3cf', 'Test'); +INSERT INTO iswc (id, work, iswc) VALUES (4, 2, 'T-000.000.002-0'); + +INSERT INTO work_gid_redirect VALUES ('28e73402-5666-4d74-80ab-c3734dc699ea', 1); + +INSERT INTO work (id, gid, name, type) VALUES (15, '640b17f5-4aa3-3fb1-8c6c-4792458e8a56', 'Blue Lines', 17); + +INSERT INTO artist (id, gid, name, sort_name) VALUES (4, '10adbe5e-a2c0-4bf3-8249-2b4cbf6e6ca8', 'Massive Attack', 'Massive Attack'); +INSERT INTO artist_credit (id, name, artist_count, gid) VALUES (4, 'Massive Attack', 1, '261f02c2-75a6-313f-9dd8-1716f73f3ce8'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name) VALUES (4, 0, 4, 'Massive Attack'); + +INSERT INTO recording (id, gid, name, artist_credit, length) +VALUES (15, 'bef81f8f-4bcf-4308-bd66-e57018169a94', 'Blue Lines', 4, 261533), + (754567, 'a2383c02-2430-4294-9177-ef799a6eca31', 'Blue Lines', 4, 265546); + +INSERT INTO link (id, link_type) VALUES (27124, 278); +INSERT INTO l_recording_work (id, link, entity0, entity1) VALUES (279733, 27124, 15, 15), (966013, 27124, 754567, 15); + +INSERT INTO editor (id, name, password, ha1) VALUES (100, 'annotation_editor', '{CLEARTEXT}password', '41bd7f7951ccec2448f74bed1b7bc6cb'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 100, 'Annotation', 'change'); +INSERT INTO work_annotation (work, annotation) VALUES (1, 1); diff --git a/test/test_amqp_handler.py b/test/test_amqp_handler.py index d26e9f77..d5bbf1d6 100644 --- a/test/test_amqp_handler.py +++ b/test/test_amqp_handler.py @@ -2,8 +2,7 @@ # coding: utf-8 # Copyright (c) 2014 Wieland Hoffmann # License: MIT, see LICENSE for details -import mock -import unittest +from unittest import mock, TestCase from amqp.basic_message import Message as Amqp_Message from logging import basicConfig, CRITICAL @@ -17,7 +16,7 @@ basicConfig(level=CRITICAL) -class AmqpTestCase(unittest.TestCase): +class AmqpTestCase(TestCase): def setUp(self): self.maxDiff = None @@ -115,6 +114,11 @@ def setUp(self): self.handler.cores[self.entity_type] = mock.Mock() + for entity_type, entity in SCHEMA.items(): + patcher = mock.patch.object(entity, 'build_entity_query') + patcher.start() + self.addCleanup(patcher.stop) + def test_delete_callback(self): entity_gid = u"90d7709d-feba-47e6-a2d1-8770da3c3d9c" self.message = Amqp_Message( @@ -140,7 +144,7 @@ def test_index_by_fk_1(self): self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() - entity.build_entity_query = mock.MagicMock() + self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 6) @@ -165,7 +169,7 @@ def test_index_by_fk_1(self): 'FROM musicbrainz.area \n' 'WHERE musicbrainz.area.id = :id_1'] - self.assertEqual(expected_queries, actual_queries) + self.assertCountEqual(expected_queries, actual_queries) def test_index_by_fk_2(self): columns = {'id': '1'} @@ -174,7 +178,7 @@ def test_index_by_fk_2(self): self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() - entity.build_entity_query = mock.MagicMock() + self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 1) @@ -193,7 +197,7 @@ def test_index_by_fk_3(self): self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() - entity.build_entity_query = mock.MagicMock() + self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 1) diff --git a/test/test_config.py b/test/test_config.py new file mode 100644 index 00000000..5be5fa3f --- /dev/null +++ b/test/test_config.py @@ -0,0 +1,15 @@ +import os +import unittest + +from sir import config + + +class ConfigParserTest(unittest.TestCase): + + def test_interpolation(self): + os.environ["SKIP"] = "foobar" + os.environ["PGPASSWORD"] = "dummy password" + config.read_config() + self.assertEqual(config.CFG["database"]["host"], "musicbrainz_db") + self.assertEqual(config.CFG["database"]["password"], "dummy password") + self.assertEqual(config.CFG["solr"]["uri"], "foobar") diff --git a/test/test_indexing.py b/test/test_indexing.py index 4d486ef6..94bf7b14 100644 --- a/test/test_indexing.py +++ b/test/test_indexing.py @@ -1,31 +1,39 @@ -import mock -import unittest - -import sir.indexing +from unittest import mock, TestCase from multiprocessing import Queue -from sir.indexing import queue_to_solr, send_data_to_solr, FAILED + +import pysolr +import requests from pysolr import SolrError -class QueueToSolrTest(unittest.TestCase): +import sir.indexing +from sir.indexing import queue_to_solr, send_data_to_solr, FAILED + + +class QueueToSolrTest(TestCase): def setUp(self): - self.solr_connection = mock.Mock() self.queue = Queue() self.queue.put({"foo": "bar"}) self.queue.put(None) - def test_normal_send(self): - queue_to_solr(self.queue, 1, self.solr_connection) - expected = [mock.call([{"foo": "bar"}]), mock.call([]), ] - calls = self.solr_connection.add.call_args_list - self.assertEqual(calls, expected) - - def test_queue_drained_send(self): - queue_to_solr(self.queue, 2, self.solr_connection) - self.solr_connection.add.assert_called_once_with([{"foo": "bar"}]) + @mock.patch.object(requests.Session, "get") + @mock.patch.object(pysolr.Solr, "commit") + @mock.patch.object(pysolr.Solr, "add") + def test_normal_send(self, mock_add, mock_commit, mock_get): + queue_to_solr(self.queue, 1, "test") + expected = [mock.call([{"foo": "bar"}]), mock.call([]),] + mock_add.assert_has_calls(expected) + mock_commit.assert_called() + @mock.patch.object(requests.Session, "get") + @mock.patch.object(pysolr.Solr, "commit") + @mock.patch.object(pysolr.Solr, "add") + def test_queue_drained_send(self, mock_add, mock_commit, mock_get): + queue_to_solr(self.queue, 2, "test") + mock_add.assert_called_once_with([{"foo": "bar"}]) + mock_commit.assert_called() -class SendDataToSolrTest(unittest.TestCase): +class SendDataToSolrTest(TestCase): def setUp(self): self.solr_connection = mock.MagicMock() self.solr_connection.add = mock.MagicMock() @@ -42,7 +50,7 @@ def test_fail_send(self): send_data_to_solr(self.solr_connection, [{"foo": "bar"}]) self.assertTrue(FAILED.value) -class LiveIndexFailTest(unittest.TestCase): +class LiveIndexFailTest(TestCase): def setUp(self): self.imp = sir.indexing._multiprocessed_import = mock.MagicMock() FAILED.value = False diff --git a/test/test_indexing_real_data.py b/test/test_indexing_real_data.py new file mode 100644 index 00000000..437cc6c2 --- /dev/null +++ b/test/test_indexing_real_data.py @@ -0,0 +1,728 @@ +import os +import unittest +from pprint import pprint +from queue import Queue +from datetime import datetime, timezone + +from sqlalchemy import text +from sqlalchemy.orm import Session + +from sir import querying, util, config +from sir.indexing import index_entity +from sir.schema import SCHEMA + + +class IndexingTestCase(unittest.TestCase): + TEST_SQL_FILES_DIR = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'sql') + + @classmethod + def setUpClass(cls): + config.read_config() + + def setUp(self): + self.connection = util.engine().connect() + self.transaction = self.connection.begin() + self.session = Session(bind=self.connection) + self.maxDiff = None + + def tearDown(self): + self.session.close() + self.transaction.rollback() + self.connection.close() + + def _test_index_entity(self, entity, expected_messages, key="mbid"): + with open( + os.path.join(self.TEST_SQL_FILES_DIR, f"{entity}.sql"), + encoding="utf-8" + ) as f: + self.session.execute(text(f.read())) + + bounds = querying.iter_bounds( + self.session, SCHEMA[entity].model, 100, 0 + ) + + queue = Queue() + index_entity(self.session, entity, bounds[0], queue) + + received_messages = [] + while not queue.empty(): + received_messages.append(queue.get_nowait()) + pprint(received_messages, indent=4) + + self.assertEqual(len(expected_messages), len(received_messages)) + expected = {x[key]: x for x in expected_messages} + received = {x[key]: x for x in received_messages} + for expected_key, expected_val in expected.items(): + self.assertIn(expected_key, received) + received_val = received[expected_key] + self.assertCountEqual(expected_val.keys(), received_val.keys()) + for k, v in expected_val.items(): + if isinstance(v, list): + self.assertCountEqual(v, received_val[k]) + else: + self.assertEqual(v, received_val[k]) + + def test_index_area(self): + expected = [ + { + '_store': 'EuropeEuropeXEfalse', + 'area': u'Europe', + 'iso1': u'XE', + 'ended': 'false', + 'mbid': '89a675c2-3e37-3518-b83c-418bad59a85a', + 'type': u'Country' + }, + { + '_store': 'United StatesUnited StatesUSfalse', + 'area': u'United States', + 'iso1': u'US', + 'ended': 'false', + 'mbid': '489ce91b-6658-3307-9877-795b68554c98', + 'type': u'Country' + }, + { + '_store': 'United KingdomUnited KingdomGBfalse', + 'area': u'United Kingdom', + 'iso1': u'GB', + 'ended': 'false', + 'mbid': '8a754a16-0027-3a29-b6d7-2b40ea0481ed', + 'type': u'Country' + }, + { + '_store': 'JapanJapanJPfalse', + 'area': u'Japan', + 'iso1': u'JP', + 'ended': 'false', + 'mbid': '2db42837-c832-3c27-b4a3-08198f75693c', + 'type': u'Country' + }, + { + '_store': 'GermanyGermanyDEfalse', + 'area': u'Germany', + 'iso1': u'DE', + 'ended': 'false', + 'mbid': '85752fda-13c4-31a3-bee5-0e5cb1f51dad', + 'type': u'Country' + }, + { + '_store': 'AustraliaAustraliaAUfalseオーストラリア', + 'sortname': u'\u30aa\u30fc\u30b9\u30c8\u30e9\u30ea\u30a2', + 'ended': 'false', + 'area': u'Australia', + 'iso1': u'AU', + 'alias': u'\u30aa\u30fc\u30b9\u30c8\u30e9\u30ea\u30a2', + 'mbid': '106e0bec-b638-3b37-b731-f53d507dc00e', + 'type': u'Country' + }, + { + '_store': 'SydneySydneyfalse106e0bec-b638-3b37-b731-f53d507dc00ebackwardAustraliaAustraliafalse', + 'ended': 'false', + 'mbid': '3f179da4-83c6-4a28-a627-e46b4a8ff1ed', + 'type': u'City', + 'area': u'Sydney' + } + ] + self._test_index_entity("area", expected) + + def test_index_artist(self): + expected = [ + { + 'comment': u'Yet Another Test Artist', + 'begin': '2008-01-02', + 'endarea': u'United Kingdom', + 'end': '2009-03-04', + 'sortname': u'Artist, Test', + 'artist': u'Test Artist', + 'country': u'GB', + 'area': u'United Kingdom', + 'ended': 'true', + 'mbid': '745c079d-374e-4436-9448-da92dedef3ce', + 'gender': u'Male', + '_store': 'Test ArtistArtist, TestmaleGBUnited KingdomUnited KingdomfalseUnited KingdomUnited KingdomfalseUnited KingdomUnited KingdomfalseYet Another Test Artist2008-01-022009-03-04true', + 'type': u'Person', + 'beginarea': u'United Kingdom' + }, + { + 'ended': 'false', + 'mbid': 'ca4c2228-227c-4904-932a-dff442c091ea', + '_store': 'Annotated Artist BAnnotated Artist Bfalse', + 'sortname': u'Annotated Artist B', + 'artist': u'Annotated Artist B' + }, + { + 'ended': 'false', + 'mbid': 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', + '_store': 'Annotated Artist AAnnotated Artist Afalse', + 'sortname': u'Annotated Artist A', + 'artist': u'Annotated Artist A' + }, + { + 'ended': 'false', + 'mbid': '945c079d-374e-4436-9448-da92dedef3cf', + '_store': 'Minimal ArtistMinimal Artistfalse', + 'sortname': u'Minimal Artist', + 'artist': u'Minimal Artist' + } + ] + self._test_index_entity("artist", expected) + + def test_index_editor(self): + expected = [ + { + 'bio': u'ModBot is a bot used by the MusicBrainz Server to perform a variety of automated functions. \\r+', + '_store': 'ModBotModBot is a bot used by the MusicBrainz Server to perform a variety of automated functions. \\r+', + 'id': 4, 'editor': u'ModBot' + }, + { + 'bio': u'biography', + '_store': 'new_editorbiography', + 'id': 1, + 'editor': u'new_editor' + }, + { + 'bio': u'second biography', + '_store': 'Alicesecond biography', + 'id': 2, + 'editor': u'Alice' + }, + { + 'bio': u'donation check test user', + '_store': 'kunodonation check test user', + 'id': 3, + 'editor': u'kuno' + } + ] + self._test_index_entity("editor", expected, key="id") + + def test_index_instrument(self): + # Klavier/piano is present in the test database by default so account for that + expected = [ + { + '_store': 'pianoKlavier', + 'alias': 'Klavier', + 'instrument': 'piano', + 'mbid': 'b3eac5f9-7859-4416-ac39-7154e2e8d348', + 'type': 'String instrument' + }, + { + 'comment': u'Yet Another Test Instrument', + '_store': 'Test InstrumentYet Another Test InstrumentThis is a description!', + 'description': u'This is a description!', + 'instrument': u'Test Instrument', + 'mbid': '745c079d-374e-4436-9448-da92dedef3ce', + 'type': u'String instrument' + }, + { + 'instrument': u'Minimal Instrument 2', + 'mbid': 'a56d18ae-485f-5547-a559-eba3efef04d0', + '_store': 'Minimal Instrument 2' + }, + { + 'instrument': u'Minimal Instrument', + 'mbid': '945c079d-374e-4436-9448-da92dedef3cf', + '_store': 'Minimal Instrument' + } + ] + self._test_index_entity("instrument", expected) + + def test_index_label(self): + expected = [ + { + 'comment': u'Sheffield based electronica label', + 'begin': '1989-02-03', + 'code': 2070, + 'end': '2008-05-19', + 'area': u'United Kingdom', + 'country': u'GB', + 'label': u'Warp Records', + 'ended': 'true', + 'mbid': '46f0f4cd-8aab-4b33-b698-f459faf64190', + '_store': 'Warp RecordsWarp Records2070Sheffield based electronica labelGBUnited KingdomUnited Kingdomfalse1989-02-032008-05-19true', + 'type': u'Production' + }, + { + 'ended': 'false', + 'mbid': 'f2a9a3c0-72e3-11de-8a39-0800200c9a66', + '_store': 'To MergeTo Mergefalse', + 'label': u'To Merge' + }, + { + 'begin': '1953-03-15', + 'end': '1991-11-27', + 'area': u'Soviet Union', + 'country': u'SU', + 'label': u'U.S.S.R. Ministry of Culture', + 'ended': 'true', + 'mbid': '449ddb7e-4e92-41eb-a683-5bbcc7fd7d4a', + '_store': 'U.S.S.R. Ministry of CultureU.S.S.R. Ministry of CultureSUSoviet UnionSoviet Union19221991true1953-03-151991-11-27true' + } + ] + self._test_index_entity("label", expected) + + def test_index_place(self): + expected = [ + { + 'comment': u'A PLACE!', + 'begin': '2013', + '_store': 'A Test PlaceA PLACE!An Address0.3231.234EuropeEuropefalse2013falseA Test Alias', + 'area': u'Europe', + 'long': 1.234, + 'alias': u'A Test Alias', + 'mbid': 'df9269dd-0470-4ea2-97e8-c11e46080edd', + 'ended': 'false', + 'address': u'An Address', + 'lat': 0.323, + 'place': u'A Test Place', + 'type': u'Venue' + } + ] + self._test_index_entity("place", expected) + + def test_index_recording(self): + expected = [ + { + 'primarytype': u'Album', + 'firstreleasedate': '2007', + '_store': 'King of the MountainArtistArtistArtist2007AerialAerialAlbum51Format1King of the Mountain', + 'tracks': 5, + 'format': u'Format', + 'creditname': u'Artist', + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'artist': u'Artist', + 'mbid': '54b9d183-7dab-42ba-94a3-7388a66604b8', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'number': u'1', + 'recording': u'King of the Mountain', + 'tid': '66c2ebff-86a8-4e12-a9a2-1650fb97d9d8', + 'artistname': u'Artist', + 'video': 'f', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'tracksrelease': 5, + 'release': u'Aerial', + 'position': 1, + 'tnum': 1 + }, + { + '_store': 'Joanni296160ArtistArtistArtist', + 'qdur': 148, + 'artist': u'Artist', + 'creditname': u'Artist', + 'artistname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'recording': u'Joanni', + 'mbid': '07614140-8bb8-4db9-9dcc-0917c3a8471b', + 'video': 'f', + 'dur': 296160 + }, + { + 'tnum': 5, + 'primarytype': u'Album', + '_store': 'How to Be Invisible332613ArtistArtistArtist2007AerialAerialAlbum51Format5How to Be Invisible332613', + 'qdur': 166, + 'number': u'5', + 'video': 'f', + 'recording': u'How to Be Invisible', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': '849dc232-c33a-4611-a6a5-5a0969d63422', + 'dur': 332613, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': '44f52946-0c98-47ba-ba60-964774db56f0', + 'release': u'Aerial', + 'position': 1 + }, + { + 'tnum': 4, + 'primarytype': u'Album', + '_store': 'Mrs. Bartolozzi358960ArtistArtistArtist2007AerialAerialAlbum51Format4Mrs. Bartolozzi358960', + 'qdur': 179, + 'number': u'4', + 'video': 'f', + 'recording': u'Mrs. Bartolozzi', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': '6c04d03c-4995-43be-8530-215ca911dcbf', + 'dur': 358960, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', + 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': 'b1d58a57-a0f3-4db8-aa94-868cdc7bc3bb', + 'release': u'Aerial', + 'position': 1 + }, + { + 'tnum': 3, + 'primarytype': u'Album', + '_store': 'Bertie258839ArtistArtistArtist2007AerialAerialAlbum51Format3Bertie258839', + 'qdur': 129, + 'number': u'3', + 'video': 'f', + 'recording': u'Bertie', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': 'f891acda-39d6-4a7f-a9d1-dd87b7c46a0a', + 'dur': 258839, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', + 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': 'ae674299-2824-4500-9516-653ac1bc6f80', + 'release': u'Aerial', + 'position': 1 + }, + { + 'tnum': 2, + 'primarytype': u'Album', + '_store': 'π369680ArtistArtistArtist2007AerialAerialAlbum51Format2π369680', + 'qdur': 184, + 'number': u'2', + 'video': 'f', + 'recording': u'\u03c0', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': 'b0caa7d1-0d1e-483e-b22b-ec6ab7fada06', + 'dur': 369680, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', + 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': '659f405b-b4ee-4033-868a-0daa27784b89', + 'release': u'Aerial', + 'position': 1 + } + ] + self._test_index_entity("recording", expected) + + def test_index_release(self): + expected = [ + { + 'primarytype': u'Album', + '_store': 'Release #2NameNameNameArrivalCommentAlbum', + 'artist': u'Name', + 'creditname': u'Name', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'mbid': '7a906020-72db-11de-8a39-0800200c9a66', + 'rgid': '3b4faa80-72d9-11de-8a39-0800200c9a66', + 'release': u'Release #2', + 'quality': -1 + }, + { + 'comment': u'Comment', + 'lang': u'deu', + 'script': u'Ugar', + '_store': 'ArrivalOfficialCommentJewel CasedeuUgarNameNameNameArrivalCommentAlbum2009-05-08GB2009-05-08United KingdomUnited KingdomGB731453398122ABC-123-XLabelABC-123Label', + 'artist': u'Name', + 'creditname': u'Name', + 'country': u'GB', + 'barcode': u'731453398122', + 'status': u'Official', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'label': u'Label', + 'packaging': u'Jewel Case', + 'date': '2009-05-08', + 'mbid': 'f34c079d-374e-4436-9448-da92dedef3ce', + 'catno': [u'ABC-123', u'ABC-123-X'], + 'rgid': '3b4faa80-72d9-11de-8a39-0800200c9a66', + 'laid': '00a23bd0-72db-11de-8a39-0800200c9a66', + 'release': u'Arrival', + 'quality': -1, + 'primarytype': u'Album' + }, + { + '_store': 'Blonde on BlondeVarious ArtistsVarious ArtistsVarious ArtistsBlonde on Blonde', + 'artist': u'Various Artists', + 'creditname': u'Various Artists', + 'artistname': u'Various Artists', + 'arid': '7a906020-72db-11de-8a39-0800200c9a66', + 'mbid': '538aff00-a009-4515-a064-11a6d5a502ee', + 'rgid': '329fb554-2a81-3d8a-8e22-ec2c66810019', + 'release': u'Blonde on Blonde', + 'quality': -1}, + { + '_store': 'Various ReleaseVarious ArtistsVarious ArtistsVarious ArtistsVarious Release3', + 'tracks': 3, 'artist': u'Various Artists', + 'creditname': u'Various Artists', + 'artistname': u'Various Artists', + 'arid': '7a906020-72db-11de-8a39-0800200c9a66', + 'tracksmedium': 3, + 'mbid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + 'mediumid': 'c517968f-afd0-48e6-ab4b-dfdae888ad9d', + 'rgid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + 'release': u'Various Release', + 'mediums': 1, + 'quality': -1 + } + ] + self._test_index_entity("release", expected) + + def test_index_release_group(self): + expected = [ + { + 'reid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + '_store': 'Various ReleaseVarious ArtistsVarious ArtistsVarious ArtistsVarious Release', + 'releases': 1, + 'artist': u'Various Artists', + 'creditname': u'Various Artists', + 'artistname': u'Various Artists', + 'arid': '7a906020-72db-11de-8a39-0800200c9a66', + 'releasegroup': u'Various Release', + 'mbid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + 'release': u'Various Release' + }, + { + 'comment': u'Comment', + 'reid': '4c767e70-72d8-11de-8a39-0800200c9a66', + '_store': 'Release GroupCommentAlbumNameName1Release Name', + 'releases': 1, + 'artist': u'Name', + 'creditname': u'Name', + 'primarytype': u'Album', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'releasegroup': u'Release Group', + 'mbid': '7b5d22d0-72d7-11de-8a39-0800200c9a66', + 'release': u'Release Name' + }, + { + 'comment': u'Comment', + 'primarytype': u'Album', + '_store': 'Release NameCommentAlbumNameName1', + 'artist': u'Name', + 'creditname': u'Name', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'releasegroup': u'Release Name', + 'mbid': '3b4faa80-72d9-11de-8a39-0800200c9a66' + } + ] + self._test_index_entity("release-group", expected) + + def test_index_series(self): + expected = [ + { + 'series': u'Dumb Recording Series', + 'mbid': 'dbb23c50-d4e4-11e3-9c1a-0800200c9a66', + 'type': u'Recording series', + '_store': 'Dumb Recording Series' + }, + { + 'comment': u'test comment 1', + '_store': 'Test Recording Seriestest comment 1Test Recording Series Alias', + 'series': u'Test Recording Series', + 'alias': u'Test Recording Series Alias', + 'mbid': 'a8749d0c-4a5a-4403-97c5-f6cd018f8e6d', + 'type': u'Recording series', + }, + { + 'comment': u'test comment 2', + 'series': u'Test Work Series', + 'mbid': '2e8872b9-2745-4807-a84e-094d425ec267', + 'type': u'Work series', + '_store': 'Test Work Seriestest comment 2' + } + ] + self._test_index_entity("series", expected) + + def test_index_tag(self): + expected = [ + { + 'tag': u'musical', + '_store': 'musical', + 'id': 1 + }, + { + 'tag': u'rock', + '_store': 'rock', + 'id': 2 + }, + { + 'tag': u'jazz', + '_store': 'jazz', + 'id': 3 + }, + { + 'tag': u'world music', + '_store': 'world music', + 'id': 4 + } + ] + self._test_index_entity("tag", expected, key="id") + + def test_index_url(self): + expected = [ + { + 'url': u'http://musicbrainz.org/', + 'mbid': '9201840b-d810-4e0f-bb75-c791205f5b24', + '_store': 'http://musicbrainz.org/' + }, + { + 'url': u'http://microsoft.com', + 'mbid': '9b3c5c67-572a-4822-82a3-bdd3f35cf152', + '_store': 'http://microsoft.com' + }, + { + 'targettype': 'artist', + '_store': 'http://zh-yue.wikipedia.org/wiki/%E7%8E%8B%E8%8F%B2backwardFaye WongFaye Wong', + 'url': u'http://zh-yue.wikipedia.org/wiki/%E7%8E%8B%E8%8F%B2', + 'targetid': 'acd58926-4243-40bb-a2e5-c7464b3ce577', + 'mbid': '25d6b63a-12dc-41c9-858a-2f42ae610a7d', + 'relationtype': u'wikipedia' + }, + { + 'targettype': 'artist', + '_store': 'https://www.allmusic.com/artist/faye-wong-mn0000515659backwardFaye WongFaye Wong', + 'url': u'https://www.allmusic.com/artist/faye-wong-mn0000515659', + 'targetid': 'acd58926-4243-40bb-a2e5-c7464b3ce577', + 'mbid': '7bd45cc7-6189-4712-35e1-cdf3632cf1a9', + 'relationtype': u'allmusic' + }, + { + 'url': u'http://microsoft.fr', + 'mbid': '9b3c5c67-572a-4822-82a3-bdd3f35cf153', + '_store': 'http://microsoft.fr' + } + ] + self._test_index_entity("url", expected) + + def test_index_work(self): + expected = [ + { + 'comment': u'Work', + '_store': 'TestWork', + 'mbid': '105c079d-374e-4436-9448-da92dedef3ce', + 'work': u'Test', + 'type': u'Aria' + }, + { + 'comment': u'Work', + '_store': 'TestT-500.000.001-0T-500.000.002-0Work', + 'iswc': [u'T-500.000.002-0', u'T-500.000.001-0'], + 'work': u'Test', + 'mbid': '755c079d-374e-4436-9448-da92dedef3ce', + 'type': u'Aria' + }, + { + 'comment': u'Work', + '_store': 'Dancing QueenT-000.000.001-0Work', + 'iswc': u'T-000.000.001-0', 'work': u'Dancing Queen', + 'mbid': '745c079d-374e-4436-9448-da92dedef3ce', + 'type': u'Aria' + }, + { + 'mbid': '745c079d-374e-4436-9448-da92dedef3cf', + 'work': u'Test', + '_store': 'TestT-000.000.002-0', + 'iswc': u'T-000.000.002-0' + }, + { + '_store': 'Blue LinesbackwardBlue LinesbackwardBlue Lines', + 'work': u'Blue Lines', + 'recording_count': 2, + 'recording': u'Blue Lines', + 'mbid': '640b17f5-4aa3-3fb1-8c6c-4792458e8a56', + 'rid': ['bef81f8f-4bcf-4308-bd66-e57018169a94', 'a2383c02-2430-4294-9177-ef799a6eca31'], + 'type': u'Song' + } + ] + self._test_index_entity("work", expected) + + def test_index_cdstub(self): + expected_timestamp = int( + datetime(2000, 1, 1, tzinfo=timezone.utc) + .timestamp() + ) + expected = [ + { + 'comment': u'this is a comment', + 'added': expected_timestamp, + '_store': 'Test StubTest Artist837101029192this is a comment', + 'discid': u'YfSgiOEayqN77Irs.VNV.UNJ0Zs-', + 'artist': u'Test Artist', + 'barcode': u'837101029192', + 'tracks': 2, + 'title': u'Test Stub', + 'id': 1 + } + ] + self._test_index_entity("cdstub", expected, key="id") + + def test_index_annotation(self): + expected = [ + { + '_store': '745c079d-374e-4436-9448-da92dedef3ceTest ArtistTest annotation 1', + 'name': u'Test Artist', + 'text': u'Test annotation 1', + 'entity': '745c079d-374e-4436-9448-da92dedef3ce', + 'type': 'artist', + 'id': 1 + }, + { + '_store': '945c079d-374e-4436-9448-da92dedef3cfMinimal ArtistTest annotation 2', + 'name': u'Minimal Artist', + 'text': u'Test annotation 2', + 'entity': '945c079d-374e-4436-9448-da92dedef3cf', + 'type': 'artist', + 'id': 2 + }, + { + '_store': 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958Annotated Artist ADuplicate annotation', + 'name': u'Annotated Artist A', + 'text': u'Duplicate annotation', + 'entity': 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', + 'type': 'artist', + 'id': 3 + }, + { + '_store': 'ca4c2228-227c-4904-932a-dff442c091eaAnnotated Artist BDuplicate annotation', + 'name': u'Annotated Artist B', + 'text': u'Duplicate annotation', + 'entity': 'ca4c2228-227c-4904-932a-dff442c091ea', + 'type': 'artist', + 'id': 4 + } + ] + self._test_index_entity("annotation", expected, key="id") + + def test_index_event(self): + expected = [ + { + 'comment': u'2022, Prom 60', + 'begin': '2022-09-01', + 'end': '2022-09-01', + 'artist': [u'Kwam\xe9 Ryan', u'BBC Concert Orchestra'], + 'pid': '4352063b-a833-421b-a420-e7fb295dece0', + 'arid': [ + 'f72a5b32-449f-4090-9a2a-ebbdd8d3c2e5', + 'dfeba5ea-c967-4ad2-9cdd-3cffb4320143' + ], + 'ended': 'true', + 'mbid': 'ca1d24c1-1999-46fd-8a95-3d4108df5cb2', + 'place': u'Royal Albert Hall', + '_store': 'BBC Open Music Prom2022, Prom 602022-09-012022-09-01true19:30:00backwardBBC Concert OrchestraBBC Concert OrchestrbackwardKwamé RyanRyan, KwamébackwardRoyal Albert Hall', + 'type': u'Concert', + 'event': u'BBC Open Music Prom' + } + ] + self._test_index_entity("event", expected) diff --git a/test/test_querying.py b/test/test_querying.py index fddbd481..55ab42a9 100644 --- a/test/test_querying.py +++ b/test/test_querying.py @@ -1,8 +1,7 @@ import doctest -import mock -import unittest +from unittest import mock, TestCase -from test import helpers, models +from test import models from collections import defaultdict from sqlalchemy.orm.properties import RelationshipProperty from sir.querying import iterate_path_values @@ -11,56 +10,26 @@ from sir.trigger_generation.paths import second_last_model_in_path -class DeferEverythingButTest(unittest.TestCase): - def setUp(self): - mapper = helpers.Object() - mapper.iterate_properties = [] - pk1 = helpers.Object() - pk1.name = "pk1" - pk2 = helpers.Object() - pk2.name = "pk2" - mapper.primary_key = [pk1, pk2] - - self.mapper = mapper - - prop = helpers.Object() - prop.columns = "" - self.prop = prop - self.mapper.iterate_properties.append(prop) - - self.load = mock.Mock() - self.required_columns = ["key", "key2"] - - def test_plain_column_called(self): - self.prop.key = "foo" - load = defer_everything_but(self.mapper, self.load, *self.required_columns) - load.defer.assert_called_once_with("foo") - - def test_plain_column_not_called(self): - self.prop.key = "key" - load = defer_everything_but(self.mapper, self.load, *self.required_columns) - self.assertFalse(load.defer.called) - - def test_id_column(self): - self.prop.key = "foo_id" - load = defer_everything_but(self.mapper, self.load, - *self.required_columns) - self.assertFalse(load.defer.called) - - def test_position_column(self): - self.prop.key = "position" - load = defer_everything_but(self.mapper, self.load, - *self.required_columns) - self.assertFalse(load.defer.called) - - def test_primary_key_always_loaded(self): - self.prop.key = "pk1" - load = defer_everything_but(self.mapper, self.load, - *self.required_columns) - self.assertFalse(load.defer.called) - - -class IteratePathValuesTest(unittest.TestCase): +class DeferEverythingButTest(TestCase): + + @mock.patch("sir.schema.searchentities.defer") + def test_defer_everything_but(self, mock_defer): + mapper = models.D.__mapper__ + original_load = mock.Mock() + required_columns = ["key", "key2"] + defer_everything_but( + mapper, + original_load, + *required_columns + ) + original_load.options.assert_called() + mock_defer.assert_called_once_with( + mapper.get_property("foo"), + raiseload=True + ) + + +class IteratePathValuesTest(TestCase): @classmethod def setUpClass(cls): c = models.C(id=1) @@ -91,11 +60,11 @@ def test_non_sqlalchemy_paths(self): self.assertEqual(res, [models.C.__tablename__]) -class MergePathsTest(unittest.TestCase): +class MergePathsTest(TestCase): def test_dotless_path(self): paths = [["id"], ["name"]] expected = {"id": "", "name": ""} - self.assertEquals(merge_paths(paths), expected) + self.assertEqual(merge_paths(paths), expected) def test_dotted_path(self): paths = [["rel.id"], ["rel2.rel3.id"]] @@ -110,7 +79,7 @@ def test_dotted_path(self): self.assertEqual(dict(merge_paths(paths)), expected) -class DBTest(unittest.TestCase): +class DBTest(TestCase): def test_non_composite_fk(self): paths, _, models, _ = generate_update_map() for table_paths in paths.values(): diff --git a/test/test_searchentities.py b/test/test_searchentities.py index 670d3ccc..58fa8352 100644 --- a/test/test_searchentities.py +++ b/test/test_searchentities.py @@ -1,5 +1,4 @@ -import mock -import unittest +from unittest import mock, TestCase from test import models from xml.etree.ElementTree import Element, tostring @@ -9,7 +8,7 @@ from sqlalchemy.orm import sessionmaker -class QueryResultToDictTest(unittest.TestCase): +class QueryResultToDictTest(TestCase): def setUp(self): config_patcher = mock.patch("sir.config.CFG") self.addCleanup(config_patcher.stop) @@ -26,14 +25,17 @@ def setUp(self): self.expected = { "id": 1, "c_bar": "foo", - "c_bar_trans": {"foo", "yay"}, + "c_bar_trans": ["yay", "foo"], } c = models.C(id=2, bar="foo") self.val = models.B(id=1, c=c) def test_fields(self): res = self.entity.query_result_to_dict(self.val) - self.assertDictEqual(self.expected, res) + self.assertEqual(res.keys(), self.expected.keys()) + self.assertEqual(res["id"], self.expected["id"]) + self.assertEqual(res["c_bar"], self.expected["c_bar"]) + self.assertCountEqual(res["c_bar_trans"], self.expected["c_bar_trans"]) def test_conversion(self): elem = Element("testelem", text="text") @@ -43,12 +45,15 @@ def test_conversion(self): res = self.entity.query_result_to_dict(self.val) - self.expected["_store"] = tostring(elem) - self.assertDictEqual(self.expected, res) + self.expected["_store"] = str(tostring(elem, encoding="us-ascii"), encoding="us-ascii") + self.assertEqual(res.keys(), self.expected.keys()) + self.assertEqual(res["id"], self.expected["id"]) + self.assertEqual(res["c_bar"], self.expected["c_bar"]) + self.assertCountEqual(res["c_bar_trans"], self.expected["c_bar_trans"]) self.assertEqual(convmock.to_etree.call_count, 1) -class TestIsCompositeColumn(unittest.TestCase): +class TestIsCompositeColumn(TestCase): def test_composite_column(self): self.assertTrue(is_composite_column(models.B, "composite_column")) @@ -59,7 +64,7 @@ def test_sqla_column(self): self.assertFalse(is_composite_column(models.B, "c")) -class SearchEntityTest(unittest.TestCase): +class SearchEntityTest(TestCase): FILTER_MAX = 20 @staticmethod diff --git a/test/test_util.py b/test/test_util.py index d92aa19a..34e9023f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,5 +1,4 @@ -import mock -import unittest +from unittest import mock, TestCase from test.models import B from json import dumps @@ -11,9 +10,9 @@ def noop(*args, **kwargs): pass -class VersionCheckerTest(unittest.TestCase): +class VersionCheckerTest(TestCase): def setUp(self): - urlopen = mock.patch("sir.util.urllib2.urlopen") + urlopen = mock.patch("sir.util.urllib.request.urlopen") urlopenmock = urlopen.start() self.addCleanup(urlopen.stop) @@ -41,14 +40,14 @@ def test_matching_version(self): def test_solr_version_too_large(self): self.read.return_value = dumps({"version": 1.2}) - self.assertRaisesRegexp(util.VersionMismatchException, + self.assertRaisesRegex(util.VersionMismatchException, "^testcore: Expected 1.1, got 1.2", util.solr_version_check, "testcore") def test_solr_version_too_small(self): self.read.return_value = dumps({"version": 1.0}) - self.assertRaisesRegexp(util.VersionMismatchException, + self.assertRaisesRegex(util.VersionMismatchException, "^testcore: Expected 1.1, got 1.0", util.solr_version_check, "testcore") diff --git a/tox.ini b/tox.ini index 24815971..8bc36521 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist=py27, flake8, docs +envlist=py313, flake8, docs [testenv] deps =