Skip to content

Maybe One‐Off commands

James Kent edited this page Dec 12, 2023 · 18 revisions

Find base_studies with the same doi

same_dois = BaseStudy.query.group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1).with_entities(BaseStudy.doi, func.array_agg(BaseStudy.id).label('id_list')).all()
keep = []
delete = []
for doi in same_dois:
    if not len(doi[1]) == 2:
        continue
    first_id, second_id = doi[1]
    first = BaseStfor doi in same_dois:
    if not len(doi[1]) == 2:
        continue
    first_id, second_id = doi[1]
    first = BaseStudy.query.filter_by(id=first_id).one()
    second = BaseStudy.query.filter_by(id=second_id).one()
    pmid = first.pmid or second.pmid
    name = first.name or second.name
    studies = first.versions + second.versions
    first.name = name
    first.pmid = pmid
    first.versions = studies
    first.description = first.description or second.description
    first.year = first.year or second.year
    first.publication = first.publication or second.publication
    first.metadata_ = first.metadata_ or second.metadata_
    keep.append(first)
    delete.append(second)
    print(f"doi: {doi[0]}")
    print(f"name: {name}")
    print(f"pmid: {pmid}")
    print(f"studies: {studies}")
    print(f"first source:{first.versions[0].source}")udy.query.filter_by(id=first_id).one()
    second = BaseStudy.query.filter_by(id=second_id).one()
    pmid = first.pmid or second.pmid
    name = first.name or second.name
    studies = first.versions + second.versions
    first.name = name
    first.pmid = pmid
    first.versions = studies
    first.description = first.description or second.description
    first.year = first.year or second.year
    first.publication = first.publication or second.publication
    first.metadata_ = first.metadata_ or second.metadata_
    keep.append(first)
    delete.append(second)
    print(f"doi: {doi[0]}")
    print(f"name: {name}")
    print(f"pmid: {pmid}")
    print(f"studies: {studies}")
    print(f"first source:{first.versions[0].source}")

Divide combined studies

dup_bs = []
for bs in BaseStudy.query.options(joinedload(BaseStudy.versions)):
    if len(bs.versions) < 1:
        continue
    pmid = bs.versions[0].pmid
    for s in bs.versions[1:]:
        if not pmid:
            pmid = s.pmid
        if s.pmid and s.pmid != pmid:
            if bs not in dup_bs:
                dup_bs.append(bs)
            print(bs.id)
new_bs = []
for bs in dup_bs:
    orig_pmid = bs.pmid
    groups = {}
    for v in bs.versions:
        if v.pmid == orig_pmid:
            continue
        if v.pmid in groups:
            groups[v.pmid].append(v)
        else:
            groups[v.pmid] = [v]
    for pmid, studies in groups.items():
        new_bs.append(BaseStudy(
            name=next((x.name for x in studies if x.name), None),
            pmid=pmid,
            doi=next((x.doi for x in studies if x.doi), None),
            authors=next((x.authors for x in studies if x.authors), None),
            year=next((x.year for x in studies if x.year), None),
            description=next((x.description for x in studies if x.description), None),
            publication=next((x.publication for x in studies if x.publication), None),
            metadata_=next((x.metadata for x in studies if x.metadata_), None),
            level="group",
            public=True,
            versions=studies,
       ))

db.session.add_all(new_bs)
db.session.commit()

Update provenance

to_commit = []
for p in Project.query:
    if not p.provenance.get("extractionMetadata"):
        continue
    for ss in p.provenance['extractionMetadata']['studyStatusList']: 
        if ss['status'] == "COMPLETE":
            ss['status'] = 'completed'
        elif ss['status'] == 'saveforlater':
            ss['status'] = 'savedforlater'
   
    flag_modified(p, "provenance")
    to_commit.append(p)

db.session.add_all(p)
db.session.commit()

Clone this wiki locally