macrocosm-os
diff --git a/‎neurons/miners/epistula_miner/miner.py‎
Lines changed: 31 additions & 44 deletions b/‎neurons/miners/epistula_miner/miner.py‎
Lines changed: 31 additions & 44 deletions
diff --git a/‎neurons/validator.py‎
Lines changed: 11 additions & 3 deletions b/‎neurons/validator.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎prompting/api/api.py‎
Lines changed: 15 additions & 0 deletions b/‎prompting/api/api.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎prompting/api/gpt_endpoints/api.py‎
Lines changed: 45 additions & 0 deletions b/‎prompting/api/gpt_endpoints/api.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎prompting/api/gpt_endpoints/serialisers.py‎ b/‎prompting/api/gpt_endpoints/serialisers.py‎
diff --git a/‎prompting/api/miner_availabilities/api.py‎
Lines changed: 19 additions & 0 deletions b/‎prompting/api/miner_availabilities/api.py‎
Lines changed: 19 additions & 0 deletions
@@ -5,17 +5,14 @@
 settings = settings.settings
 
 import time
-import asyncio
-import json
 import httpx
 import netaddr
 import uvicorn
 import requests
 import traceback
 import bittensor as bt
-from starlette.responses import JSONResponse
 from loguru import logger
-from fastapi import APIRouter, Depends, FastAPI, Request, HTTPException
+from fastapi import APIRouter, FastAPI, Request, HTTPException
 from starlette.background import BackgroundTask
 from starlette.responses import StreamingResponse
 from bittensor.subtensor import serve_extrinsic
@@ -34,44 +31,41 @@
 SYSTEM_PROMPT = """You are a helpful agent that does it's best to answer all questions!"""
 
 
-class OpenAIMiner():
-    
+class OpenAIMiner:
     def __init__(self):
         self.should_exit = False
         self.client = httpx.AsyncClient(
-        base_url="https://api.openai.com/v1",
-        headers={
-            "Authorization": f"Bearer {settings.OPENAI_API_KEY}",
-            "Content-Type": "application/json",
-        },
-    )
+            base_url="https://api.openai.com/v1",
+            headers={
+                "Authorization": f"Bearer {settings.OPENAI_API_KEY}",
+                "Content-Type": "application/json",
+            },
+        )
         print("OpenAI Key: ", settings.OPENAI_API_KEY)
 
     async def format_openai_query(self, request: Request):
         # Read the JSON data once
         data = await request.json()
-        
+
         # Extract the required fields
         openai_request = {}
         for key in ["messages", "model", "stream"]:
             if key in data:
                 openai_request[key] = data[key]
         openai_request["model"] = MODEL_ID
-        
+
         return openai_request
-    
+
     async def create_chat_completion(self, request: Request):
         bt.logging.info(
             "\u2713",
             f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
         )
-        req = self.client.build_request(
-            "POST", "chat/completions", json = await self.format_openai_query(request)
-        )
+        logger.debug("Starting chat completion request...")
+        req = self.client.build_request("POST", "chat/completions", json=await self.format_openai_query(request))
         r = await self.client.send(req, stream=True)
-        return StreamingResponse(
-            r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers
-        )
+        logger.debug("Chat completion request returning...")
+        return StreamingResponse(r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers)
 
     # async def create_chat_completion(self, request: Request):
     #     bt.logging.info(
@@ -104,7 +98,7 @@ async def create_chat_completion(self, request: Request):
     #         "\u2713",
     #         f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
     #     )
-        
+
     #     async def word_stream():
     #         words = "This is a test stream".split()
     #         for word in words:
@@ -133,30 +127,27 @@ async def create_chat_completion(self, request: Request):
     #         }
     #         yield f"data: {json.dumps(data)}\n\n"
     #         yield "data: [DONE]\n\n"
-        
+
     #     return StreamingResponse(word_stream(), media_type='text/event-stream')
 
     async def check_availability(self, request: Request):
         print("Checking availability")
         # Parse the incoming JSON request
         data = await request.json()
-        task_availabilities = data.get('task_availabilities', {})
-        llm_model_availabilities = data.get('llm_model_availabilities', {})
-        
+        task_availabilities = data.get("task_availabilities", {})
+        llm_model_availabilities = data.get("llm_model_availabilities", {})
+
         # Set all task availabilities to True
         task_response = {key: True for key in task_availabilities}
-        
+
         # Set all model availabilities to False
         model_response = {key: False for key in llm_model_availabilities}
-        
+
         # Construct the response dictionary
-        response = {
-            'task_availabilities': task_response,
-            'llm_model_availabilities': model_response
-        }
-        
+        response = {"task_availabilities": task_response, "llm_model_availabilities": model_response}
+
         return response
-    
+
     async def verify_request(
         self,
         request: Request,
@@ -170,18 +161,14 @@ async def verify_request(
         signed_by = request.headers.get("Epistula-Signed-By")
         signed_for = request.headers.get("Epistula-Signed-For")
         if signed_for != self.wallet.hotkey.ss58_address:
-            raise HTTPException(
-                status_code=400, detail="Bad Request, message is not intended for self"
-            )
+            raise HTTPException(status_code=400, detail="Bad Request, message is not intended for self")
         if signed_by not in self.metagraph.hotkeys:
             raise HTTPException(status_code=401, detail="Signer not in metagraph")
 
         uid = self.metagraph.hotkeys.index(signed_by)
         stake = self.metagraph.S[uid].item()
         if not self.config.no_force_validator_permit and stake < 10000:
-            bt.logging.warning(
-                f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}"
-            )
+            bt.logging.warning(f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}")
             raise HTTPException(status_code=401, detail="Stake below minimum: {stake}")
 
         # If anything is returned here, we can throw
@@ -200,8 +187,7 @@ async def verify_request(
             raise HTTPException(status_code=400, detail=err)
 
     def run(self):
-
-        external_ip = None #settings.EXTERNAL_IP
+        external_ip = None  # settings.EXTERNAL_IP
         if not external_ip or external_ip == "[::]":
             try:
                 external_ip = requests.get("https://checkip.amazonaws.com").text.strip()
@@ -232,7 +218,7 @@ def run(self):
         router.add_api_route(
             "/v1/chat/completions",
             self.create_chat_completion,
-            #dependencies=[Depends(self.verify_request)],
+            # dependencies=[Depends(self.verify_request)],
             methods=["POST"],
         )
         router.add_api_route(
@@ -244,7 +230,8 @@ def run(self):
         fast_config = uvicorn.Config(
             app,
             host="0.0.0.0",
-            port=settings.AXON_PORT,
+            # port=settings.AXON_PORT,
+            port=8008,
             log_level="info",
             loop="asyncio",
         )
 
@@ -25,6 +25,7 @@
 from prompting.weight_setting.weight_setter import weight_setter
 from prompting.llms.utils import GPUInfo
 from prompting.base.epistula import query_miners
+from prompting.api.api import start_api
 
 NEURON_SAMPLE_SIZE = 100
 
@@ -139,14 +140,18 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent |
             logger.warning("No available miners. This should already have been caught earlier.")
             return
 
-
-        body = {"seed": task.seed, "model": task.llm_model_id, "messages": [{'role': 'user', 'content': task.query},]}
+        body = {
+            "seed": task.seed,
+            "model": task.llm_model_id,
+            "messages": [
+                {"role": "user", "content": task.query},
+            ],
+        }
         body_bytes = json.dumps(body).encode("utf-8")
         stream_results = await query_miners(task.__class__.__name__, uids, body_bytes)
 
         log_stream_results(stream_results)
 
-
         response_event = DendriteResponseEvent(
             stream_results=stream_results, uids=uids, timeout=settings.NEURON_TIMEOUT
         )
@@ -202,6 +207,9 @@ def __exit__(self, exc_type, exc_value, traceback):
 
 
 async def main():
+    # start api
+    asyncio.create_task(start_api())
+
     GPUInfo.log_gpu_info()
     # start profiling
     asyncio.create_task(profiler.print_stats())
 
@@ -0,0 +1,15 @@
+from fastapi import FastAPI
+import uvicorn
+from prompting.api.gpt_endpoints.api import router as gpt_router
+from prompting.api.miner_availabilities.api import router as miner_availabilities_router
+from loguru import logger
+
+app = FastAPI()
+
+app.include_router(gpt_router)
+app.include_router(miner_availabilities_router)
+
+
+async def start_api():
+    logger.info("Starting API")
+    uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,45 @@
+from fastapi import APIRouter, Request
+import openai
+from prompting.settings import settings
+from httpx import Timeout
+from prompting.base.epistula import create_header_hook
+from fastapi.responses import StreamingResponse
+import json
+
+router = APIRouter()
+
+
+async def process_stream(stream):
+    async for chunk in stream:
+        if hasattr(chunk, "choices") and chunk.choices:
+            # Extract the delta content from the chunk
+            delta = chunk.choices[0].delta
+            if hasattr(delta, "content") and delta.content is not None:
+                # Format as SSE data
+                yield f"data: {json.dumps(chunk.model_dump())}\n\n"
+    yield "data: [DONE]\n\n"
+
+
+@router.post("/v1/chat/completions")
+async def proxy_chat_completions(request: Request):
+    # Get the request body
+    body = await request.json()
+
+    # Ensure streaming is enabled
+    body["stream"] = True
+
+    # TODO: Forward to actual miners
+    miner = openai.AsyncOpenAI(
+        base_url="http://localhost:8008/v1",
+        max_retries=0,
+        timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
+        http_client=openai.DefaultAsyncHttpxClient(
+            event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, None)]}
+        ),
+    )
+
+    # Create streaming request to OpenAI
+    response = await miner.chat.completions.create(**body)
+
+    # Return a streaming response with properly formatted chunks
+    return StreamingResponse(process_stream(response), media_type="text/event-stream")
@@ -0,0 +1,19 @@
+from fastapi import APIRouter
+from prompting.miner_availability.miner_availability import miner_availabilities
+from loguru import logger
+
+router = APIRouter()
+
+
+@router.post("/miner_availabilities")
+async def get_miner_availabilities(uids: list[int] | None = None):
+    if uids:
+        return {uid: miner_availabilities.miners.get(uid) for uid in uids}
+    logger.info(f"Returning all miner availabilities for {len(miner_availabilities.miners)} miners")
+    return miner_availabilities.miners
+
+
+@router.get("/get_available_miners")
+async def get_available_miners(task: str | None = None, model: str | None = None, k: int = 10):
+    logger.info(f"Getting {k} available miners for task {task} and model {model}")
+    return miner_availabilities.get_available_miners(task=task, model=model, k=k)