Skip to content

Commit cd39e59

Browse files
committed
[owl] Bug fix: String column should not have file extension validation (#869)
Backend - owl (API server) - Bug fix: String column should be able to take in arbitrary strings without URL validation CI - Change concurrency limit to be at job level - Mirror all images to GitHub Registry - Increase Docker timeouts
1 parent e1ed8af commit cd39e59

File tree

10 files changed

+255
-71
lines changed

10 files changed

+255
-71
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,6 @@ on:
1313
tags:
1414
- "v*"
1515

16-
# Cancel in-progress CI jobs if there is a new push
17-
# https://stackoverflow.com/a/72408109
18-
concurrency:
19-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
20-
cancel-in-progress: true
21-
2216
jobs:
2317
check_changes:
2418
name: Check for changes
@@ -58,6 +52,9 @@ jobs:
5852
jamai-mode: ["oss"]
5953
test-group: [group1, group2, group3, group4]
6054
timeout-minutes: 90
55+
concurrency:
56+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ matrix.jamai-mode }}-${{ matrix.test-group }}
57+
cancel-in-progress: true
6158

6259
steps:
6360
- name: Checkout code
@@ -148,14 +145,22 @@ jobs:
148145
OWL_STRIPE_WEBHOOK_SECRET_TEST: ${{ secrets.OWL_STRIPE_WEBHOOK_SECRET_TEST }}
149146
OWL_STRIPE_PUBLISHABLE_KEY_TEST: ${{ secrets.OWL_STRIPE_PUBLISHABLE_KEY_TEST }}
150147

148+
- name: Configure Docker timeout
149+
run: |
150+
mkdir -p ~/.docker
151+
echo '{"max-concurrent-downloads": 3, "max-download-attempts": 5}' > ~/.docker/config.json
152+
151153
- name: Launch services
152154
id: launch_services
153155
timeout-minutes: 20
154156
if: always()
155-
run: docker compose -p jm -f docker/compose.ci.yml up --quiet-pull -d --wait
157+
run: |
158+
docker compose -p jm -f docker/compose.ci.yml up --quiet-pull -d --wait
156159
env:
157160
COMPOSE_DOCKER_CLI_BUILD: 1
158161
DOCKER_BUILDKIT: 1
162+
COMPOSE_HTTP_TIMEOUT: 30
163+
DOCKER_CLIENT_TIMEOUT: 30
159164

160165
- name: Inspect owl logs if failed to launch
161166
timeout-minutes: 1

clients/python/src/jamaibase/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3296,7 +3296,7 @@ async def export_table_data(
32963296
Args:
32973297
table_type (str): Table type.
32983298
table_id (str): ID or name of the table to be exported.
3299-
delimiter (str, optional): The delimiter of the file: can be "," or "\\t". Defaults to ",".
3299+
delimiter (str, optional): The delimiter of the content can be "," or "\\t". Defaults to ",".
33003300
columns (list[str], optional): A list of columns to be exported. Defaults to None (export all columns).
33013301
33023302
Returns:
@@ -6130,7 +6130,7 @@ def export_table_data(
61306130
Args:
61316131
table_type (str): Table type.
61326132
table_id (str): ID or name of the table to be exported.
6133-
delimiter (str, optional): The delimiter of the file: can be "," or "\\t". Defaults to ",".
6133+
delimiter (str, optional): The delimiter of the content can be "," or "\\t". Defaults to ",".
61346134
columns (list[str], optional): A list of columns to be exported. Defaults to None (export all columns).
61356135
61366136
Returns:

clients/python/src/jamaibase/types/gen_table.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ class TableDataImportRequest(BaseModel):
647647
# ] = None
648648
delimiter: Annotated[
649649
Literal[",", "\t"],
650-
Field(description='The delimiter of the file: can be "," or "\\t". Defaults to ",".'),
650+
Field(description='The delimiter of the content can be "," or "\\t". Defaults to ",".'),
651651
] = ","
652652

653653

docker/compose.base.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ services:
1919
- jamai
2020

2121
otel-collector:
22-
image: otel/opentelemetry-collector-contrib:0.113.0
22+
image: ghcr.io/embeddedllm/otel/opentelemetry-collector-contrib:0.113.0
2323
command: ["--config=/etc/otelcol/config.yaml"]
2424
volumes:
2525
- ./otel_configs/otel-collector-config.yaml:/etc/otelcol/config.yaml
2626
networks:
2727
- jamai
2828

2929
victoriametrics:
30-
image: victoriametrics/victoria-metrics:v1.124.0
30+
image: ghcr.io/embeddedllm/victoriametrics/victoria-metrics:v1.124.0
3131
command:
3232
- "--selfScrapeInterval=15s"
3333
- "--retentionPeriod=100y"
@@ -43,7 +43,7 @@ services:
4343
start_period: 30s
4444

4545
vmauth:
46-
image: victoriametrics/vmauth:v1.124.0
46+
image: ghcr.io/embeddedllm/victoriametrics/vmauth:v1.124.0
4747
command:
4848
- "--auth.config=/etc/config.yml"
4949
volumes:
@@ -58,7 +58,7 @@ services:
5858
start_period: 30s
5959

6060
victorialogs:
61-
image: victoriametrics/victoria-logs:v1.28.0
61+
image: ghcr.io/embeddedllm/victoriametrics/victoria-logs:v1.28.0
6262
command:
6363
- "--retentionPeriod=100y"
6464
volumes:
@@ -73,7 +73,7 @@ services:
7373
start_period: 30s
7474

7575
vmagent:
76-
image: victoriametrics/vmagent:v1.124.0
76+
image: ghcr.io/embeddedllm/victoriametrics/vmagent:v1.124.0
7777
depends_on:
7878
victoriametrics:
7979
condition: service_healthy
@@ -97,7 +97,7 @@ services:
9797
- jamai
9898

9999
clickhouse:
100-
image: clickhouse:24.10.2.80
100+
image: ghcr.io/embeddedllm/clickhouse:24.10.2.80
101101
volumes:
102102
- ../docker_data/ch_data:/var/lib/clickhouse/
103103
- ../docker_data/ch_logs:/var/log/clickhouse-server/
@@ -157,7 +157,7 @@ services:
157157
start_period: 30s
158158

159159
pgbouncer:
160-
image: edoburu/pgbouncer:v1.24.0-p0
160+
image: ghcr.io/embeddedllm/edoburu/pgbouncer:v1.24.0-p0
161161
depends_on:
162162
postgresql:
163163
condition: service_healthy
@@ -187,7 +187,7 @@ services:
187187
- jamai
188188

189189
minio:
190-
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
190+
image: ghcr.io/embeddedllm/minio/minio:RELEASE.2025-05-24T17-08-30Z
191191
entrypoint: /bin/sh -c " minio server /data --console-address ':9001' & until (mc config host add myminio http://localhost:9000 $${MINIO_ROOT_USER} $${MINIO_ROOT_PASSWORD}) do echo '...waiting...' && sleep 1; done; mc mb myminio/file; wait "
192192
environment:
193193
MINIO_ROOT_USER: minioadmin

scripts/mirror_docker_image.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""
2+
Docker Image Mirror Tool
3+
Mirrors selected Docker images from source to destination registry using `skopeo`.
4+
"""
5+
6+
import json
7+
import subprocess
8+
import sys
9+
10+
11+
def run_command(cmd, capture_output=True):
12+
"""Run a shell command and return output."""
13+
try:
14+
if capture_output:
15+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
16+
return result.stdout.strip()
17+
else:
18+
return subprocess.run(cmd, check=True)
19+
except subprocess.CalledProcessError as e:
20+
print(f"Error: {e.stderr if hasattr(e, 'stderr') else str(e)}")
21+
return None
22+
except FileNotFoundError:
23+
print("Error: 'skopeo' not found. Please install it first.")
24+
sys.exit(1)
25+
26+
27+
def check_skopeo_installed() -> bool:
28+
"""Check if skopeo is installed."""
29+
try:
30+
subprocess.run(["skopeo", "--version"], capture_output=True, check=True)
31+
return True
32+
except (subprocess.CalledProcessError, FileNotFoundError):
33+
return False
34+
35+
36+
def list_tags(image_path) -> list[str] | None:
37+
"""List all tags for a given image."""
38+
print(f"\nFetching tags from {image_path}...")
39+
40+
output = run_command(["skopeo", "list-tags", f"docker://{image_path}"])
41+
if not output:
42+
return None
43+
44+
try:
45+
data = json.loads(output)
46+
return [t for t in data.get("Tags", []) if "sha256" not in t]
47+
except json.JSONDecodeError as e:
48+
print(f"Error parsing tags: {e}")
49+
return None
50+
51+
52+
def copy_image(source, destination, tag):
53+
"""Copy a single image tag using skopeo."""
54+
if ":" in source:
55+
source = source.split(":")[0]
56+
if ":" in destination:
57+
destination = destination.split(":")[0]
58+
source_full = f"docker://{source}:{tag}"
59+
dest_full = f"docker://{destination}:{tag}"
60+
61+
print(f"\nCopying {tag}...")
62+
print(f" From: {source_full}")
63+
print(f" To: {dest_full}")
64+
65+
cmd = ["skopeo", "copy", source_full, dest_full]
66+
return run_command(cmd, capture_output=False) is not None
67+
68+
69+
def main():
70+
print("=" * 60)
71+
print("Docker Image Mirror Tool")
72+
print("=" * 60)
73+
74+
# Check if skopeo is installed
75+
if not check_skopeo_installed():
76+
print("\n❌ Error: skopeo is not installed.")
77+
print("\nInstallation instructions:")
78+
print(" Ubuntu/Debian: sudo apt-get install skopeo")
79+
print(" macOS: brew install skopeo")
80+
print(" Fedora/RHEL: sudo dnf install skopeo")
81+
sys.exit(1)
82+
83+
print("✅ skopeo is installed")
84+
85+
# Get source and destination
86+
source_image = input("\nEnter source image path (tag is optional): ").strip()
87+
if not source_image:
88+
print("Error: Source image cannot be empty")
89+
sys.exit(1)
90+
91+
destination_image = input("Enter destination image path (exclude tag): ").strip()
92+
if not destination_image:
93+
destination_image = f"ghcr.io/embeddedllm/{source_image}"
94+
print(f'Defaulting to "{destination_image}"')
95+
96+
# List available tags
97+
if ":" in source_image:
98+
src_tag = source_image.split(":")[-1]
99+
if ":" in destination_image and src_tag != destination_image.split(":")[-1]:
100+
print("Error: When specifying tags in source and destination, they must match")
101+
sys.exit(1)
102+
tags = [src_tag]
103+
else:
104+
tags = list_tags(source_image)
105+
if not tags:
106+
print("Error: No tags found")
107+
sys.exit(1)
108+
if len(tags) == 1:
109+
selected_tags = tags
110+
else:
111+
print(f"\nAvailable tags ({len(tags)} total):")
112+
for tag in tags:
113+
print(f" - {tag}")
114+
115+
# Get user selection
116+
print("\nEnter tags to mirror (comma-separated):")
117+
print("Example: latest,v1.0.0,v1.1.0")
118+
selection = input("Tags: ").strip()
119+
120+
if not selection:
121+
print("Error: No tags selected")
122+
sys.exit(1)
123+
124+
selected_tags = [tag.strip() for tag in selection.split(",")]
125+
126+
# Validate selected tags
127+
invalid_tags = [tag for tag in selected_tags if tag not in tags]
128+
if invalid_tags:
129+
print(f"\nError: Invalid tags: {', '.join(invalid_tags)}")
130+
sys.exit(1)
131+
132+
# Confirm
133+
print(f"\nWill mirror {len(selected_tags)} tag(s):")
134+
for tag in selected_tags:
135+
print(f" - {tag}")
136+
137+
confirm = input("\nProceed? (Y/n): ").strip().lower()
138+
if confirm not in ("", "y"):
139+
print("Cancelled")
140+
sys.exit(0)
141+
142+
# Copy images
143+
print("\nStarting mirror process...")
144+
success_count = 0
145+
failed_tags = []
146+
147+
for idx, tag in enumerate(selected_tags, 1):
148+
print(f"\n[{idx}/{len(selected_tags)}]")
149+
if copy_image(source_image, destination_image, tag):
150+
success_count += 1
151+
print(f"✅ Success: {tag}")
152+
else:
153+
failed_tags.append(tag)
154+
print(f"❌ Failed: {tag}")
155+
156+
# Summary
157+
print("\n" + "=" * 60)
158+
print(f"Summary: {success_count}/{len(selected_tags)} successful")
159+
if failed_tags:
160+
print(f"Failed tags: {', '.join(failed_tags)}")
161+
print("=" * 60)
162+
163+
164+
if __name__ == "__main__":
165+
try:
166+
main()
167+
except KeyboardInterrupt:
168+
print("\n\nCancelled by user")
169+
sys.exit(1)

services/api/src/owl/types/__init__.py

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from datetime import datetime
22
from enum import StrEnum
3-
from os.path import splitext
43
from typing import Annotated, Any, Generic, Literal, Self, Type, TypeVar
54

65
import pandas as pd
@@ -498,28 +497,11 @@ class ColumnRenameRequest(t.ColumnRenameRequest):
498497
)
499498

500499

501-
def check_data(value: Any) -> Any:
502-
if isinstance(value, str) and (value.startswith("s3://") or value.startswith("file://")):
503-
extension = splitext(value)[1].lower()
504-
if extension not in ALLOWED_FILE_EXTENSIONS:
505-
raise ValueError(
506-
"Unsupported file type. Make sure the file belongs to "
507-
"one of the following formats: \n"
508-
f"[Image File Types]: \n{IMAGE_FILE_EXTENSIONS} \n"
509-
f"[Audio File Types]: \n{AUDIO_FILE_EXTENSIONS} \n"
510-
f"[Document File Types]: \n{DOCUMENT_FILE_EXTENSIONS}"
511-
)
512-
return value
513-
514-
515-
CellValue = Annotated[Any, AfterValidator(check_data)]
516-
517-
518500
class RowAdd(BaseModel):
519501
table_id: str = Field(
520502
description="Table name or ID.",
521503
)
522-
data: dict[str, CellValue] = Field(
504+
data: dict[str, Any] = Field(
523505
description="Mapping of column names to its value.",
524506
)
525507
stream: bool = Field(
@@ -533,7 +515,7 @@ class RowAdd(BaseModel):
533515

534516

535517
class MultiRowAddRequest(t.MultiRowAddRequest):
536-
data: list[dict[str, CellValue]] = Field(
518+
data: list[dict[str, Any]] = Field(
537519
min_length=1,
538520
description=(
539521
"List of mapping of column names to its value. "
@@ -544,7 +526,7 @@ class MultiRowAddRequest(t.MultiRowAddRequest):
544526

545527

546528
class MultiRowAddRequestWithLimit(MultiRowAddRequest):
547-
data: list[dict[str, CellValue]] = Field(
529+
data: list[dict[str, Any]] = Field(
548530
min_length=1,
549531
max_length=100,
550532
description=(
@@ -556,22 +538,22 @@ class MultiRowAddRequestWithLimit(MultiRowAddRequest):
556538

557539

558540
class MultiRowUpdateRequest(t.MultiRowUpdateRequest):
559-
data: dict[str, dict[str, CellValue]] = Field(
541+
data: dict[str, dict[str, Any]] = Field(
560542
min_length=1,
561543
description="Mapping of row IDs to row data, where each row data is a mapping of column names to its value.",
562544
)
563545

564546

565547
class MultiRowUpdateRequestWithLimit(MultiRowUpdateRequest):
566-
data: dict[str, dict[str, CellValue]] = Field(
548+
data: dict[str, dict[str, Any]] = Field(
567549
min_length=1,
568550
max_length=100,
569551
description="Mapping of row IDs to row data, where each row data is a mapping of column names to its value.",
570552
)
571553

572554

573555
class RowUpdateRequest(t.RowUpdateRequest):
574-
data: dict[str, CellValue] = Field(
556+
data: dict[str, Any] = Field(
575557
description="Mapping of column names to its value.",
576558
)
577559

0 commit comments

Comments
 (0)