Skip to content
2 changes: 1 addition & 1 deletion materializationengine/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class BaseConfig:
AUTH_TOKEN = json.load(f)["token"]
else:
AUTH_TOKEN = ""

KEEP_CREATED_COLUMN = False
BEAT_SCHEDULES = [
{
"name": "Materialized Database Daily (2 Days)",
Expand Down
5 changes: 4 additions & 1 deletion materializationengine/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from sqlalchemy import Column, String, Integer, DateTime
from sqlalchemy import Column, String, Integer, DateTime, Boolean
from sqlalchemy.ext.declarative import declarative_base

MatBase = declarative_base()
Expand All @@ -11,3 +11,6 @@ class MaterializedMetadata(MatBase):
table_name = Column(String(100), nullable=False)
row_count = Column(Integer, nullable=False)
materialized_timestamp = Column(DateTime, nullable=False)
segmentation_source = Column(String(255), nullable=True)
is_merged = Column(Boolean, nullable=True)
has_created_ts = Column(Boolean, nullable=True)
1 change: 1 addition & 0 deletions materializationengine/shared_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def get_materialization_info(
"reference_table": reference_table,
"materialization_time_stamp": str(materialization_time_stamp),
"table_count": len(annotation_tables),
"keep_created_ts_col": md.get("keep_created_ts_col", False)
}
has_segmentation_table = db.schema.is_segmentation_table_required(schema)
if has_segmentation_table:
Expand Down
2 changes: 1 addition & 1 deletion materializationengine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
AnalysisVersion,
VersionErrorTable,
AnnoMetadata,
MaterializedMetadata,
)
from dynamicannotationdb.schema import DynamicSchemaClient
from flask import (
Expand All @@ -32,6 +31,7 @@
from materializationengine.celery_init import celery
from materializationengine.blueprints.client.query import specific_query
from materializationengine.database import sqlalchemy_cache, dynamic_annotation_cache
from materializationengine.models import MaterializedMetadata
from materializationengine.info_client import (
get_datastack_info,
get_datastacks,
Expand Down
13 changes: 10 additions & 3 deletions materializationengine/workflows/create_frozen_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
AnalysisVersion,
AnnoMetadata,
Base,
MaterializedMetadata,
)
from emannotationschemas import get_schema
from emannotationschemas.flatten import create_flattened_schema
Expand All @@ -28,6 +27,7 @@
dynamic_annotation_cache,
sqlalchemy_cache,
)
from materializationengine.models import MaterializedMetadata
from materializationengine.errors import IndexMatchError
from materializationengine.index_manager import index_cache
from materializationengine.shared_tasks import (
Expand Down Expand Up @@ -403,7 +403,8 @@ def create_materialized_metadata(
valid_row_count = mat_metadata["row_count"]
segmentation_source = mat_metadata.get("segmentation_source")
merge_table = mat_metadata.get("merge_table")

has_created_ts = mat_metadata.get("keep_created_ts_col")

celery_logger.info(f"Row count {valid_row_count}")
if valid_row_count == 0:
continue
Expand All @@ -415,6 +416,7 @@ def create_materialized_metadata(
materialized_timestamp=materialization_time_stamp,
segmentation_source=segmentation_source,
is_merged=merge_table,
has_created_ts=has_created_ts
)
analysis_session.add(mat_metadata)
analysis_session.commit()
Expand Down Expand Up @@ -651,7 +653,12 @@ def merge_tables(self, mat_metadata: dict):
mat_metadata, with_crud_columns=True
)
SegmentationModel = create_segmentation_model(mat_metadata)
crud_columns = ["created", "deleted", "superceded_id"]

keep_created = mat_metadata.get("keep_created_ts_col")
crud_columns = ["created", "deleted", "superceded_id"] # crud cols to drop
if keep_created:
crud_columns.pop(0) # we want to keep the 'created' col

query_columns = {
col.name: col
for col in AnnotationModel.__table__.columns
Expand Down
Empty file added migration/__init__.py
Empty file.
102 changes: 102 additions & 0 deletions migration/alembic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
script_location = .

# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .

# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python-dateutil library that can be
# installed by adding `alembic[tz]` to the pip requirements
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =

# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; This defaults
# to migration/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:migration/versions

# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

# sqlalchemy.url =


[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
75 changes: 75 additions & 0 deletions migration/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from logging.config import fileConfig

from sqlalchemy import engine_from_config
from sqlalchemy import pool

from alembic import context

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)

# add your model's MetaData object here
from materializationengine.models import MatBase
target_metadata = MatBase.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def run_migrations_offline():
"""Run migrations in 'offline' mode.

This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.

Calls to context.execute() here emit the given string to the
script output.

"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)

with context.begin_transaction():
context.run_migrations()


def run_migrations_online():
"""Run migrations in 'online' mode.

In this scenario we need to create an Engine
and associate a connection with the context.

"""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)

with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)

with context.begin_transaction():
context.run_migrations()


if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
24 changes: 24 additions & 0 deletions migration/script.py.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}


def upgrade():
${upgrades if upgrades else "pass"}


def downgrade():
${downgrades if downgrades else "pass"}
27 changes: 27 additions & 0 deletions migration/versions/4006a7c94f25_add_has_timestamp_col.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Add has timestamp col

Revision ID: 4006a7c94f25
Revises: 8ff84a0bb8f8
Create Date: 2023-01-03 10:53:44.569973

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "4006a7c94f25"
down_revision = "8ff84a0bb8f8"
branch_labels = None
depends_on = None


def upgrade():
op.add_column(
"materializedmetadata", sa.Column("has_created_ts", sa.Boolean(), nullable=True)
)
op.execute("UPDATE materializedmetadata SET has_created_ts = False")


def downgrade():
op.drop_column("materializedmetadata", "has_created_ts")
46 changes: 46 additions & 0 deletions migration/versions/8ff84a0bb8f8_base_mat_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Base mat metadata

Revision ID: 8ff84a0bb8f8
Revises:
Create Date: 2023-01-03 10:50:35.866055

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.engine import reflection
from sqlalchemy import engine_from_config

# revision identifiers, used by Alembic.
revision = "8ff84a0bb8f8"
down_revision = None
branch_labels = None
depends_on = None


def get_tables():
config = op.get_context().config
engine = engine_from_config(
config.get_section(config.config_ini_section), prefix="sqlalchemy."
)
inspector = reflection.Inspector.from_engine(engine)
return inspector.get_table_names()


def upgrade():
tables = get_tables()
if "materializedmetadata" not in tables:
op.create_table(
"materializedmetadata",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("schema", sa.String(length=100), nullable=False),
sa.Column("table_name", sa.String(length=100), nullable=False),
sa.Column("row_count", sa.Integer(), nullable=False),
sa.Column("materialized_timestamp", sa.DateTime(), nullable=False),
sa.Column("segmentation_source", sa.String(length=255), nullable=True),
sa.Column("is_merged", sa.Boolean(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)


def downgrade():
op.drop_table("materializedmetadata")
1 change: 1 addition & 0 deletions tests/test_shared_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def test_get_materialization_info(self):
"chunk_size": 2,
"table_count": 1,
"find_all_expired_roots": False,
'keep_created_ts_col': False,
"analysis_version": 1,
"analysis_database": "test_datastack__mat1",
"queue_length_limit": 10000,
Expand Down