Maybe One‐Off commands

Find base_studies with the same doi

same_dois = BaseStudy.query.group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1).with_entities(BaseStudy.doi, func.array_agg(BaseStudy.id).label('id_list')).all()

from sqlalchemy import or_

# Collect all IDs to batch query
all_ids = [id_ for doi in same_dois if len(doi[1]) == 2 for id_ in doi[1]]
id_to_study = {s.id: s for s in BaseStudy.query.filter(BaseStudy.id.in_(all_ids)).all()}

keep = []
delete = []

for doi in same_dois:
    if len(doi[1]) != 2:
        continue

    first_id, second_id = doi[1]
    first = id_to_study[first_id]
    second = id_to_study[second_id]

    # Determine priority based on number of identifiers
    first_id_count = sum(bool(getattr(first, attr)) for attr in ["doi", "pmid", "pmcid"])
    second_id_count = sum(bool(getattr(second, attr)) for attr in ["doi", "pmid", "pmcid"])

    if second_id_count > first_id_count:
        first, second = second, first  # Swap so that first is always the higher priority study

    # Merge identifiers (priority study retains its values unless missing)
    first.pmid = first.pmid or second.pmid
    first.pmcid = first.pmcid or second.pmcid
    first.name = first.name or second.name
    first.description = first.description or second.description
    first.year = first.year or second.year
    first.publication = first.publication or second.publication
    first.metadata_ = first.metadata_ or second.metadata_

    # Merge versions/studies
    first.versions.extend(second.versions)

    # Add to lists for keeping and deleting
    keep.append(first)
    delete.append(second)

    # Optional logging
    print(f"doi: {doi[0]}")
    print(f"name: {first.name}")
    print(f"pmid: {first.pmid}")
    print(f"pmcid: {first.pmcid}")
    print(f"studies: {len(first.versions)} versions merged")
    print(f"first source: {first.versions[0].source if first.versions else 'No source'}")

# Optional: Delete duplicates after processing
#for study in delete:
#    db.session.delete(study)

# Commit the changes
# db.session.commit()

Divide combined studies

dup_bs = []
for bs in BaseStudy.query.options(joinedload(BaseStudy.versions)):
    if len(bs.versions) < 1:
        continue
    pmid = bs.versions[0].pmid
    for s in bs.versions[1:]:
        if not pmid:
            pmid = s.pmid
        if s.pmid and s.pmid != pmid:
            if bs not in dup_bs:
                dup_bs.append(bs)
            print(bs.id)

new_bs = []
for bs in dup_bs:
    orig_pmid = bs.pmid
    groups = {}
    for v in bs.versions:
        if v.pmid == orig_pmid:
            continue
        if v.pmid in groups:
            groups[v.pmid].append(v)
        else:
            groups[v.pmid] = [v]
    for pmid, studies in groups.items():
        new_bs.append(BaseStudy(
            name=next((x.name for x in studies if x.name), None),
            pmid=pmid,
            doi=next((x.doi for x in studies if x.doi), None),
            authors=next((x.authors for x in studies if x.authors), None),
            year=next((x.year for x in studies if x.year), None),
            description=next((x.description for x in studies if x.description), None),
            publication=next((x.publication for x in studies if x.publication), None),
            metadata_=next((x.metadata for x in studies if x.metadata_), None),
            level="group",
            public=True,
            versions=studies,
       ))

db.session.add_all(new_bs)
db.session.commit()

Update provenance

from sqlalchemy.orm.attributes import flag_modified


to_commit = []
for p in Project.query:
    if not p.provenance.get("extractionMetadata"):
        continue
    for ss in p.provenance['extractionMetadata']['studyStatusList']: 
        if ss['status'] == "COMPLETE":
            ss['status'] = 'completed'
        elif ss['status'] == 'SAVEFORLATER':
            ss['status'] = 'savedforlater'
   
    flag_modified(p, "provenance")
    to_commit.append(p)

db.session.add_all(to_commit)
db.session.commit()

Add order to existing analysis objects

from sqlalchemy.orm import joinedload

def order_objects(objects):
    # Check if all objects have table_id defined
    if all(hasattr(obj, 'table_id') and obj.table_id is not None for obj in objects):
        # Check if all table_ids can be represented as integers
        if all(obj.table_id.isdigit() for obj in objects):
            # Convert table_ids to integers and sort based on value
            objects.sort(key=lambda obj: int(obj.table_id))
        else:
            # Sort based on string comparison of table_ids
            objects.sort(key=lambda obj: obj.table_id)
    else:
        # Check if all names can be represented as integers
        if all(obj.name.isdigit() for obj in objects):
            # Convert names to integers and sort based on value
            objects.sort(key=lambda obj: int(obj.name))
        else:
            # Sort based on string comparison of names
            objects.sort(key=lambda obj: obj.name)
    
    # Assign order attribute to each object
    for i, obj in enumerate(objects):
        obj.order = i

studies = Study.query.options(joinedload(Study.analyses)).all()

handle studies with doi, but no pmid

doi_no_pmid = BaseStudy.query.filter(and_(BaseStudy.doi != None, BaseStudy.doi != '')).filter(or_(BaseStudy.pmid == None, BaseStudy.doi == '')).all()

to_commit = []
to_delete = []
attribute_names = [column.name for column in BaseStudy.__table__.columns]
for bs in doi_no_pmid:
    pmids = doi_to_pmid(bs.doi)
    if len(pmids) == 1:
        pmid = pmids[0]
        other_bs = BaseStudy.query.filter_by(doi=bs.doi, pmid=pmid).one_or_none()
        if other_bs:
            for attr in ['name', 'description', 'metadata', 'publication', 'authors', 'year', 'level']:
                setattr(other_bs, attr, getattr(other_bs, attr, getattr(bs, attr, None)))
            other_bs.versions.extend(bs.versions)
            to_delete.append(bs)
            bs = other_bs
        else:
            bs.pmid = pmid
        to_commit.append(bs)
        for v in bs.versions:
            v.pmid = v.pmid or pmid
            to_commit.append(v)
    print(bs.name)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Maybe One‐Off commands

Find base_studies with the same doi

Divide combined studies

Update provenance

Add order to existing analysis objects

handle studies with doi, but no pmid

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally