Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
__pycache__/
scripts/notebooks/
scripts/notebooks/
matrix.egg-info/
build/
.pyre/
.pyre_configuration
.pyre_configuration.local
.watchmanconfig
3 changes: 2 additions & 1 deletion matrix/app_server/app_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from matrix.client.endpoint_cache import EndpointCache
from matrix.common.cluster_info import ClusterInfo, get_head_http_host
from matrix.utils.basics import convert_to_json_compatible, sanitize_app_name
from matrix.utils.logging import get_logger
from matrix.utils.os import download_s3_dir, lock_file, run_and_stream, run_async
from matrix.utils.ray import (
ACTOR_NAME_SPACE,
Expand All @@ -42,7 +43,7 @@
kill_matrix_actors,
)

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")

DEPLOYMENT_YAML = "deployment.yaml"
DEPLOYMENT_SGLANG_YAML = "deployment_sglang.yaml"
Expand Down
3 changes: 2 additions & 1 deletion matrix/app_server/code/code_execution_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
from starlette.responses import JSONResponse

from matrix.app_server.code.sandbox_runner import SandboxRunner
from matrix.utils.logging import get_logger

CODE_EXEC_TIMEOUT = 10
logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")


@serve.deployment(ray_actor_options={"num_cpus": 1, "num_gpus": 0})
Expand Down
6 changes: 4 additions & 2 deletions matrix/app_server/llm/azure_openai_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
ErrorResponse,
)

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -114,7 +116,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

Expand Down
8 changes: 5 additions & 3 deletions matrix/app_server/llm/bedrock_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
from starlette.requests import Request
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -124,11 +126,11 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

logger.log(logging.INFO, f"args: {args}")
logger.info(f"args: {args}")
assert "claude" in args.model_name.lower(), "Only Claude model is supported"

return BedrockDeployment.options( # type: ignore[attr-defined]
Expand Down
3 changes: 2 additions & 1 deletion matrix/app_server/llm/deploy_sglang_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@

from matrix.common.cluster_info import ClusterInfo
from matrix.utils.http import fetch_url, post_url
from matrix.utils.logging import get_logger
from matrix.utils.os import run_and_stream, stop_process
from matrix.utils.ray import ACTOR_NAME_SPACE, get_matrix_actors, get_ray_head_node

logger = logging.getLogger("ray.sglang")
logger = get_logger("ray.sglang")
handler = logging.StreamHandler()
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s")
handler.setFormatter(formatter)
Expand Down
6 changes: 4 additions & 2 deletions matrix/app_server/llm/gemini_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
from starlette.requests import Request
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -209,7 +211,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/llm/metagen_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
)

from matrix.utils.http import post_url
from matrix.utils.logging import get_logger

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -124,7 +125,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

Expand Down
6 changes: 4 additions & 2 deletions matrix/app_server/llm/ray_serve_fastgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
from torch.distributed.device_mesh import DeviceMesh, init_device_mesh

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -350,7 +352,7 @@ def parse_args(cli_args: Dict[str, str]):
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))
parsed_args = parser.parse_args(args=arg_strings)
return parsed_args

Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/llm/ray_serve_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
)

from matrix.app_server.llm import openai_pb2
from matrix.utils.logging import get_logger

try:
from vllm.entrypoints.openai.serving_engine import ( # type: ignore[attr-defined]
Expand Down Expand Up @@ -76,7 +77,7 @@
"enable_tools",
]

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")

app = FastAPI()

Expand Down Expand Up @@ -534,7 +535,7 @@ def parse_vllm_args(cli_args: Dict[str, str]):
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))
parsed_args = parser.parse_args(args=arg_strings)
return parsed_args, deploy_args

Expand Down
8 changes: 5 additions & 3 deletions matrix/app_server/llm/sagemaker_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from starlette.requests import Request
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

logger = logging.getLogger("ray.serve")
from matrix.utils.logging import get_logger

logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -122,11 +124,11 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
arg_strings.extend([f"--{key}"])
else:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(args=arg_strings)

logger.log(logging.INFO, f"args: {args}")
logger.info(f"args: {args}")

return SageMakerDeployment.options( # type: ignore[attr-defined]
placement_group_bundles=pg_resources,
Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/vision/optical_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
from torch.utils.data import DataLoader, Dataset

from matrix.app_server.vision.utils import SamplingMode, TorchCodecVideoDataset
from matrix.utils.logging import get_logger

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -207,7 +208,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
for key, value in cli_args.items():
if value is not None:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(arg_strings)

Expand Down
5 changes: 3 additions & 2 deletions matrix/app_server/vision/perception_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
execute_with_retry,
get_image_transform,
)
from matrix.utils.logging import get_logger

logger = logging.getLogger("ray.serve")
logger = get_logger("ray.serve")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down Expand Up @@ -289,7 +290,7 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application:
for key, value in cli_args.items():
if value is not None:
arg_strings.extend([f"--{key}", str(value)])
logger.info(arg_strings)
logger.info(",".join(arg_strings))

args = argparse.parse_args(arg_strings)

Expand Down
4 changes: 3 additions & 1 deletion matrix/app_server/vision/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from torch.utils.data import Dataset
from torchcodec.decoders import VideoDecoder

logger = logging.getLogger("matrix.app_server.vision.utils")
from matrix.utils.logging import get_logger

logger = get_logger("matrix.app_server.vision.utils")
logging.getLogger("httpx").setLevel(logging.WARNING)


Expand Down
3 changes: 2 additions & 1 deletion matrix/client/client_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
import typing as tp

from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger

logger = logging.getLogger("matrix.client.utils")
logger = get_logger("matrix.client.utils")
logging.getLogger("httpx").setLevel(logging.WARNING)


Expand Down
3 changes: 2 additions & 1 deletion matrix/client/container_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
import aiohttp

from matrix.utils.http import post_url
from matrix.utils.logging import get_logger

logger = logging.getLogger(__name__)
logger = get_logger(__name__)


class ContainerClient:
Expand Down
3 changes: 2 additions & 1 deletion matrix/client/endpoint_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

from matrix.common.cluster_info import ClusterInfo, get_head_http_host
from matrix.utils.http import fetch_url
from matrix.utils.logging import get_logger
from matrix.utils.ray import get_ray_dashboard_address

logger = logging.getLogger("endpoint_cache")
logger = get_logger("endpoint_cache")


class EndpointCache:
Expand Down
3 changes: 2 additions & 1 deletion matrix/client/execute_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@

from matrix.client.client_utils import get_an_endpoint_url, save_to_jsonl
from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger

# Configure logging for execute_code.py
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("execute_code")
logger = get_logger("execute_code")
# Optionally suppress noisy logs from imported modules if not already done
logging.getLogger("httpx").setLevel(logging.WARNING)

Expand Down
3 changes: 2 additions & 1 deletion matrix/client/process_vision_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
save_to_jsonl,
)
from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("process_vision_data")
logger = get_logger("process_vision_data")
logging.getLogger("httpx").setLevel(logging.WARNING)


Expand Down
3 changes: 2 additions & 1 deletion matrix/client/query_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@
from matrix.app_server.llm import openai_pb2, openai_pb2_grpc
from matrix.client.client_utils import get_an_endpoint_url, save_to_jsonl
from matrix.client.endpoint_cache import EndpointCache
from matrix.utils.logging import get_logger
from matrix.utils.os import batch_requests_async, run_async

CHAR_PER_TOKEN = 3.61
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("query_llm")
logger = get_logger("query_llm")
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai._base_client").setLevel(logging.WARNING)

Expand Down
4 changes: 3 additions & 1 deletion matrix/cluster/ray_dashboard_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@

import ray

from matrix.utils.logging import get_logger

# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("RayDashboardJob")
logger = get_logger("RayDashboardJob")


@ray.remote(max_restarts=10)
Expand Down
2 changes: 1 addition & 1 deletion matrix/data_pipeline/clustering/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def select_sample_ids_from_cluster(cluster_df):
else:
embeddings = np.vstack(cluster_df["embedding"])
centroid = np.mean(embeddings, axis=0)
logger.info(embeddings.shape, centroid.shape)
logger.info(f"{embeddings.shape}, {centroid.shape}")
distances_to_centroid = np.linalg.norm(embeddings - centroid, axis=1)

# First pick the closest point to centroid
Expand Down
3 changes: 2 additions & 1 deletion matrix/data_pipeline/clustering/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
from sentence_transformers import SentenceTransformer

from matrix.utils.basics import get_nested_value, get_user_message_from_llama3_prompt
from matrix.utils.logging import get_logger

# Basic Logging Setup
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
logger = get_logger(__name__)

# --- Model Saving/Loading ---

Expand Down
4 changes: 3 additions & 1 deletion matrix/data_pipeline/generate/vllm_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
from fire import Fire
from vllm import LLM, SamplingParams

logger = logging.getLogger(__name__)
from matrix.utils.logging import get_logger

logger = get_logger(__name__)

USER_MESSAGE = "<user_message>"

Expand Down
Loading