Skip to content

Commit 4da3297

Browse files
Merge pull request #159 from databio/dev
Release v0.9.0
2 parents a6fcb8a + 1215618 commit 4da3297

34 files changed

+1387
-398
lines changed

bedhost/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.8.0"
1+
__version__ = "0.9.0"

bedhost/data_models.py

+4
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,7 @@ class BaseListResponse(BaseModel):
7777
limit: int
7878
offset: int
7979
results: list
80+
81+
82+
class CreateBEDsetRequest(BaseModel):
83+
registry_path: str

bedhost/routers/bed_api.py

+91-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
BEDFileNotFoundError,
1616
TokenizeFileNotExistError,
1717
)
18+
from bbconf.models.bed_models import BedClassification # BedPEPHub,
1819
from bbconf.models.bed_models import (
19-
BedClassification, # BedPEPHub,
2020
BedEmbeddingResult,
2121
BedFiles,
2222
BedListResult,
@@ -27,6 +27,8 @@
2727
BedStatsModel,
2828
TokenizedBedResponse,
2929
TokenizedPathResponse,
30+
QdrantSearchResult,
31+
RefGenValidReturnModel,
3032
)
3133
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
3234
from fastapi.responses import PlainTextResponse
@@ -193,6 +195,27 @@ async def get_bed_pephub(
193195
)
194196

195197

198+
@router.get(
199+
"/{bed_id}/neighbours",
200+
summary="Get nearest neighbours for a single BED record",
201+
response_model=BedListSearchResult,
202+
response_model_by_alias=False,
203+
description=f"Returns most similar BED files in the database. "
204+
f"Example\n bed_id: {EXAMPLE_BED}",
205+
)
206+
async def get_bed_neighbours(
207+
bed_id: str = BedDigest,
208+
limit: int = 10,
209+
offset: int = 0,
210+
):
211+
try:
212+
return bbagent.bed.get_neighbours(bed_id, limit=limit, offset=offset)
213+
except BEDFileNotFoundError as _:
214+
raise HTTPException(
215+
status_code=404,
216+
)
217+
218+
196219
@router.get(
197220
"/{bed_id}/embedding",
198221
summary="Get embeddings for a single BED record",
@@ -335,7 +358,52 @@ async def text_to_bed_search(query, limit: int = 10, offset: int = 0):
335358
Example: query="cancer"
336359
"""
337360
_LOGGER.info(f"Searching for: {query}")
338-
results = bbagent.bed.text_to_bed_search(query, limit=limit, offset=offset)
361+
362+
# results_sql = bbagent.bed.sql_search(
363+
# query, limit=round(limit / 2, 0), offset=round(offset / 2, 0)
364+
# )
365+
#
366+
# if results_sql.count > results_sql.offset:
367+
# qdrant_offset = offset - results_sql.offset
368+
# else:
369+
# qdrant_offset = offset - results_sql.count
370+
#
371+
# results_qdr = bbagent.bed.text_to_bed_search(
372+
# query, limit=limit, offset=qdrant_offset - 1 if qdrant_offset > 0 else 0
373+
# )
374+
#
375+
# results = BedListSearchResult(
376+
# count=results_qdr.count,
377+
# limit=limit,
378+
# offset=offset,
379+
# results=(results_sql.results + results_qdr.results)[0:limit],
380+
# )
381+
spaceless_query = query.replace(" ", "")
382+
if len(spaceless_query) == 32 and spaceless_query == query:
383+
try:
384+
similar_results = bbagent.bed.get_neighbours(
385+
query, limit=limit, offset=offset
386+
)
387+
388+
if similar_results.results and offset == 0:
389+
390+
result = QdrantSearchResult(
391+
id=query,
392+
payload={},
393+
score=1.0,
394+
metadata=bbagent.bed.get(query),
395+
)
396+
397+
similar_results.results.insert(0, result)
398+
return similar_results
399+
except Exception as _:
400+
pass
401+
402+
results = bbagent.bed.text_to_bed_search(
403+
query,
404+
limit=limit,
405+
offset=offset,
406+
)
339407

340408
if results:
341409
return results
@@ -414,3 +482,24 @@ async def get_tokens(
414482
status_code=404,
415483
detail="Tokenized file not found",
416484
)
485+
486+
487+
@router.get(
488+
"/{bed_id}/genome-stats",
489+
summary="Get reference genome validation results",
490+
response_model=RefGenValidReturnModel,
491+
)
492+
async def get_ref_gen_results(
493+
bed_id: str,
494+
):
495+
"""
496+
Return reference genome validation results for a bed file
497+
Example: bed: 0dcdf8986a72a3d85805bbc9493a1302
498+
"""
499+
try:
500+
return bbagent.bed.get_reference_validation(bed_id)
501+
except BEDFileNotFoundError as _:
502+
raise HTTPException(
503+
status_code=404,
504+
detail=f"Bed file {bed_id} not found",
505+
)

bedhost/routers/bedset_api.py

+73-15
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
import logging
22

3-
from bbconf.exceptions import BedSetNotFoundError
3+
from bbconf.exceptions import BedSetNotFoundError, BedSetTrackHubLimitError
44
from bbconf.models.bedset_models import (
55
BedSetBedFiles,
66
BedSetListResult,
77
BedSetMetadata,
88
BedSetPlots,
99
BedSetStats,
1010
)
11+
from pephubclient.helpers import is_registry_path, unwrap_registry_path
1112
from fastapi import APIRouter, HTTPException, Request, Response
1213

1314
from ..const import EXAMPLE_BEDSET, PKG_NAME
1415
from ..main import bbagent
16+
from ..data_models import CreateBEDsetRequest
1517
from ..utils import zip_pep
1618

1719
router = APIRouter(prefix="/v1/bedset", tags=["bedset"])
@@ -165,22 +167,78 @@ async def get_trackDb_file_bedset(bedset_id: str):
165167
"""
166168
Generate trackDb file for the BED set track hub
167169
"""
170+
# Response should be this type:
171+
# trackDb_txt = (
172+
# trackDb_txt + f"track\t {metadata.name}\n"
173+
# "type\t bigBed\n"
174+
# f"bigDataUrl\t {metadata.files.bigbed_file.access_methods[0].access_url.url} \n"
175+
# f"shortLabel\t {metadata.name}\n"
176+
# f"longLabel\t {metadata.description}\n"
177+
# "visibility\t full\n\n"
178+
# )
179+
try:
180+
trackDb_txt = bbagent.bedset.get_track_hub_file(bedset_id)
181+
except BedSetTrackHubLimitError as _:
182+
raise HTTPException(
183+
status_code=400,
184+
detail="Track hub limit reached. Please try smaller BEDset.",
185+
)
186+
187+
return Response(trackDb_txt, media_type="text/plain")
168188

169-
hit = bbagent.bedset.get_bedset_bedfiles(bedset_id)
170189

171-
trackDb_txt = ""
172-
for bed in hit.results:
173-
metadata = bbagent.bed.get(bed.id, full=True)
190+
@router.post(
191+
"/create/",
192+
description="Create a new bedset by providing registry path to the PEPhub project",
193+
)
194+
async def create_bedset(bedset: CreateBEDsetRequest):
195+
"""
196+
Create a new bedset
197+
"""
198+
# Validate the PEPhub project string
199+
if not is_registry_path(bedset.registry_path):
200+
raise HTTPException(status_code=406, detail="Invalid registry path")
201+
202+
project_reg_path = unwrap_registry_path(bedset.registry_path)
174203

175-
if metadata.files.bigbed_file:
204+
if project_reg_path.namespace not in ["databio", "bedbase", "pepkit"]:
205+
raise HTTPException(status_code=403, detail="User is not in admin list")
176206

177-
trackDb_txt = (
178-
trackDb_txt + f"track\t {metadata.name}\n"
179-
"type\t bigBed\n"
180-
f"bigDataUrl\t {metadata.files.bigbed_file.access_methods[0].access_url.url} \n"
181-
f"shortLabel\t {metadata.name}\n"
182-
f"longLabel\t {metadata.description}\n"
183-
"visibility\t full\n\n"
184-
)
207+
try:
208+
project = bbagent.config.phc.load_project(bedset.registry_path)
209+
except Exception as _:
210+
raise HTTPException(
211+
status_code=404, detail=f"Project: '{bedset.registry_path}' not found"
212+
)
213+
214+
bedfiles_list = [
215+
bedfile_id.get("record_identifier") or bedfile_id.sample_name
216+
for bedfile_id in project.samples
217+
]
218+
219+
if bbagent.bedset.exists(identifier=project.name):
220+
raise HTTPException(
221+
status_code=409,
222+
detail=f"BEDset with identifier {project.name} already exists",
223+
)
185224

186-
return Response(trackDb_txt, media_type="text/plain")
225+
try:
226+
bbagent.bedset.create(
227+
identifier=project.name,
228+
name=project.name,
229+
bedid_list=bedfiles_list,
230+
statistics=True,
231+
description=project.description,
232+
annotation={
233+
"source": project.config.get("source", ""),
234+
"author": project.config.get("author", project_reg_path.namespace),
235+
},
236+
no_fail=False,
237+
overwrite=False,
238+
)
239+
except Exception as err:
240+
raise HTTPException(
241+
status_code=400, detail=f"Unable to create bedset. Error: {err}"
242+
)
243+
244+
return {"status": "success"}

interactive.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import bbconf
2+
3+
bba = bbconf.BedBaseAgent("deployment/config/api-dev.bedbase.org.yaml")
4+
5+
bba.config._b2bsi = bba.config._init_b2bsi_object()
6+
bba.config._r2v = bba.config._init_r2v_object()
7+
bba.config._bivec = bba.config._init_bivec_object()
8+
9+
10+
# Here's some code to test the BiVectorSearchInterface
11+
12+
from geniml.search.interfaces import BiVectorSearchInterface
13+
from geniml.search.backends import BiVectorBackend
14+
15+
from geniml.search.query2vec import Text2Vec
16+
17+
search_backend = BiVectorBackend(
18+
metadata_backend=self._qdrant_text_engine, bed_backend=self._qdrant_engine
19+
)
20+
21+
t2v = Text2Vec("sentence-transformers/all-MiniLM-L6-v2", v2v=None)
22+
23+
bvsi = BiVectorSearchInterface()
24+
25+
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
26+
import logging
27+
from typing import Union
28+
29+
import numpy as np
30+
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
31+
32+
from geniml.text2bednn import Vec2VecFNN
33+
from geniml.search.query2vec.abstract import Query2Vec
34+
35+
# culprit:
36+
te = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
37+
38+
# Testing the sentence transformers:
39+
40+
41+
from sentence_transformers import SentenceTransformer
42+
43+
sentences = ["This is an example sentence", "Each sentence is converted"]
44+
45+
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
46+
embeddings = model.encode(sentences)
47+
print(embeddings)
48+
49+
50+
from fastembed import TextEmbedding
51+
52+
model = TextEmbedding(
53+
model_name="sentence-transformers/all-MiniLM-L6-v2", max_length=512
54+
)
55+
sentences = ["This is an example sentence", "Each sentence is converted"]
56+
embeddings = list(model.embed(sentences))

requirements/requirements-all.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf
2-
bbconf>=0.9.0
2+
bbconf>=0.10.0
33
fastapi>=0.103.0
44
logmuse>=0.2.7
55
markdown
@@ -9,4 +9,4 @@ uvicorn
99
yacman>=0.9.2
1010
pephubclient>=0.4.1
1111
psycopg[binary,pool]
12-
python-multipart>=0.0.9
12+
python-multipart>=0.0.9

0 commit comments

Comments
 (0)