Skip to content
This repository was archived by the owner on Feb 5, 2025. It is now read-only.

Commit 74da3ea

Browse files
committed
Extremely basic e2e test to prove the file is valid
1 parent 9e85cbb commit 74da3ea

4 files changed

Lines changed: 41 additions & 0 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FROM quay.io/jupyter/datascience-notebook:2023-11-17
2+
3+
COPY check_parquet_file.py /home/jovyan/work/check_parquet_file.py
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This is a really basic end-to-end test to confirm that the parquet file
2+
generated with `write_test_file.cc` is valid and can be read by Arrow (via
3+
pyarrow via pandas via ipython).
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env python
2+
3+
import pandas as pd
4+
import sys
5+
6+
df = pd.read_parquet(sys.argv[1])
7+
if len(df) != 4:
8+
print("Expected 4 rows, got %d" % len(df))
9+
sys.exit(1)
10+
11+
sys.exit(0)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd )
6+
ROOT="$(bazel info workspace)"
7+
TMPDIR="$(mktemp -d)"
8+
9+
# Go to the test directory and build the Docker image with Pandas.
10+
pushd "${SCRIPT_DIR}" > /dev/null
11+
docker build . -t parquet_logger_e2e_test
12+
13+
# Generate the test file
14+
pushd .. > /dev/null
15+
bazel run :write_test_file -- "${TMPDIR}/test_file.parquet"
16+
17+
# Run the test in a Docker container
18+
docker run \
19+
--rm \
20+
-v "${TMPDIR}:/tmp" \
21+
parquet_logger_e2e_test \
22+
/usr/bin/env python /home/jovyan/work/check_parquet_file.py /tmp/test_file.parquet
23+
24+
popd > /dev/null

0 commit comments

Comments
 (0)