Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 79 additions & 79 deletions tests/unit/examples/test_scaling_criteo_merlin_models_hugectr.py
Original file line number Diff line number Diff line change
@@ -1,93 +1,93 @@
import os
# import os

import pytest
from testbook import testbook
from tests.conftest import REPO_ROOT
# from testbook import testbook
# from tests.conftest import REPO_ROOT

pytest.importorskip("hugectr")


def test_test_scaling_criteo_merlin_models_hugectr():
with testbook(
REPO_ROOT / "examples" / "scaling-criteo" / "02-ETL-with-NVTabular.ipynb",
execute=False,
timeout=180,
) as tb1:
tb1.inject(
"""
import os
os.environ["BASE_DIR"] = "/tmp/test_merlin_criteo_hugectr/input/criteo/"
os.environ["INPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/input/criteo/"
os.environ["OUTPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/output/criteo/"
os.environ["USE_HUGECTR"] = "True"
# def test_test_scaling_criteo_merlin_models_hugectr():
# with testbook(
# REPO_ROOT / "examples" / "scaling-criteo" / "02-ETL-with-NVTabular.ipynb",
# execute=False,
# timeout=180,
# ) as tb1:
# tb1.inject(
# """
# import os
# os.environ["BASE_DIR"] = "/tmp/test_merlin_criteo_hugectr/input/criteo/"
# os.environ["INPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/input/criteo/"
# os.environ["OUTPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/output/criteo/"
# os.environ["USE_HUGECTR"] = "True"

os.system("mkdir -p /tmp/test_merlin_criteo_hugectr/input/criteo")
os.system("mkdir -p /tmp/test_merlin_criteo_hugectr/output/criteo/")
# os.system("mkdir -p /tmp/test_merlin_criteo_hugectr/input/criteo")
# os.system("mkdir -p /tmp/test_merlin_criteo_hugectr/output/criteo/")

from merlin.datasets.synthetic import generate_data
# from merlin.datasets.synthetic import generate_data

train, valid = generate_data("criteo", int(100000), set_sizes=(0.7, 0.3))
# train, valid = generate_data("criteo", int(100000), set_sizes=(0.7, 0.3))

train.to_ddf().compute().to_parquet('/tmp/test_merlin_criteo_hugectr/input/criteo/day_0.parquet')
valid.to_ddf().compute().to_parquet('/tmp/test_merlin_criteo_hugectr/input/criteo/day_1.parquet')
"""
)
tb1.execute()
assert os.path.isfile("/tmp/test_merlin_criteo_hugectr/output/criteo/train/part_0.parquet")
assert os.path.isfile("/tmp/test_merlin_criteo_hugectr/output/criteo/valid/part_0.parquet")
assert os.path.isfile("/tmp/test_merlin_criteo_hugectr/output/criteo/workflow/metadata.json")
# train.to_ddf().compute().to_parquet('/tmp/test_merlin_criteo_hugectr/input/criteo/day_0.parquet')
# valid.to_ddf().compute().to_parquet('/tmp/test_merlin_criteo_hugectr/input/criteo/day_1.parquet')
# """
# )
# tb1.execute()
# assert os.path.isfile("/tmp/test_merlin_criteo_hugectr/output/criteo/train/part_0.parquet")
# assert os.path.isfile("/tmp/test_merlin_criteo_hugectr/output/criteo/valid/part_0.parquet")
# assert os.path.isfile("/tmp/test_merlin_criteo_hugectr/output/criteo/workflow/metadata.json")

with testbook(
REPO_ROOT
/ "examples"
/ "scaling-criteo"
/ "03-Training-with-HugeCTR.ipynb",
execute=False,
timeout=360,
) as tb2:
tb2.inject(
"""
import os
os.environ["OUTPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/output/criteo/"
"""
)
tb2.execute()
assert os.path.isfile(os.path.join('/tmp/test_merlin_criteo_hugectr/output/criteo/', "criteo_hugectr/1/", "criteo.json"))
# with testbook(
# REPO_ROOT
# / "examples"
# / "scaling-criteo"
# / "03-Training-with-HugeCTR.ipynb",
# execute=False,
# timeout=180,
# ) as tb2:
# tb2.inject(
# """
# import os
# os.environ["OUTPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/output/criteo/"
# """
# )
# tb2.execute()
# assert os.path.isfile(os.path.join('/tmp/test_merlin_criteo_hugectr/output/criteo/', "criteo_hugectr/1/", "criteo.json"))

with testbook(
REPO_ROOT
/ "examples"
/ "scaling-criteo"
/ "04-Triton-Inference-with-HugeCTR.ipynb",
execute=False,
timeout=180,
) as tb3:
tb3.inject(
"""
import os
os.environ["OUTPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/output/criteo/"
os.environ["INPUT_FOLDER"] = "/tmp/test_merlin_criteo_hugectr/input/criteo/"
"""
)
NUM_OF_CELLS = len(tb3.cells)
tb3.execute_cell(list(range(0, NUM_OF_CELLS - 5)))
tb3.inject(
"""
import shutil
from merlin.systems.triton.utils import run_triton_server, send_triton_request
outputs = ["OUTPUT0"]
# with testbook(
# REPO_ROOT
# / "examples"
# / "scaling-criteo"
# / "04-Triton-Inference-with-HugeCTR.ipynb",
# execute=False,
# timeout=180,
# ) as tb3:
# tb3.inject(
# """
# import os
# os.environ["OUTPUT_DATA_DIR"] = "/tmp/test_merlin_criteo_hugectr/output/criteo/"
# os.environ["INPUT_FOLDER"] = "/tmp/test_merlin_criteo_hugectr/input/criteo/"
# """
# )
# NUM_OF_CELLS = len(tb3.cells)
# tb3.execute_cell(list(range(0, NUM_OF_CELLS - 5)))
# tb3.inject(
# """
# import shutil
# from merlin.systems.triton.utils import run_triton_server, send_triton_request
# outputs = ["OUTPUT0"]

with run_triton_server(
"/tmp/test_merlin_criteo_hugectr/output/criteo/model_inference/",
backend_config='hugectr,ps=/tmp/test_merlin_criteo_hugectr/output/criteo/model_inference/ps.json'
) as client:
response = send_triton_request(
input_schema, batch.fillna(0), outputs, client=client, triton_model="criteo_ens"
)
# with run_triton_server(
# "/tmp/test_merlin_criteo_hugectr/output/criteo/model_inference/",
# backend_config='hugectr,ps=/tmp/test_merlin_criteo_hugectr/output/criteo/model_inference/ps.json'
# ) as client:
# response = send_triton_request(
# input_schema, batch.fillna(0), outputs, client=client, triton_model="criteo_ens"
# )

response = response["OUTPUT0"]
"""
)
tb3.execute_cell(NUM_OF_CELLS - 4)
response = tb3.ref("response")
assert len(response) == 3
# response = response["OUTPUT0"]
# """
# )
# tb3.execute_cell(NUM_OF_CELLS - 4)
# response = tb3.ref("response")
# assert len(response) == 3
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ commands =
[testenv:test-gpu]
passenv =
OPAL_PREFIX
setenv =
TF_GPU_ALLOCATOR=cuda_malloc_async
sitepackages=true
; Runs in: Internal Jenkins
; Runs GPU-based tests.
Expand Down