Skip to content

Commit 3269bd6

Browse files
authored
Merge pull request #654 from robertknight/fix-pytorch-half-pixel
Fix `pytorch_half_pixel` resize mode in Resize op
2 parents 0286ca5 + 1b8663e commit 3269bd6

File tree

4 files changed

+175
-41
lines changed

4 files changed

+175
-41
lines changed

Diff for: rten-examples/src/deeplab_reference.py

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Reference inference for DeepLab example using ONNX Runtime.
2+
#
3+
# To use this, first export the DeepLab model then run inference:
4+
#
5+
# ```
6+
# python export-deeplab.py
7+
# python deeplab_reference.py deeplab.onnx path/to/test_image.jpeg
8+
# ```
9+
#
10+
# This will produce an `out_reference.png` image containing the segmentation map.
11+
from argparse import ArgumentParser
12+
13+
from PIL import Image
14+
import numpy as np
15+
import onnxruntime
16+
17+
IMAGENET_MEAN = [0.485, 0.456, 0.406]
18+
IMAGENET_STD_DEV = [0.229, 0.224, 0.225]
19+
20+
# Labels and colors for the different categories of object that DeepLabv3 can
21+
# detect.
22+
#
23+
# For the labels, see https://github.com/NVIDIA/DIGITS/blob/master/examples/semantic-segmentation/pascal-voc-classes.txt.
24+
PASCAL_VOC_LABELS = [
25+
("background", (0.0, 0.0, 0.0)), # Black
26+
("aeroplane", (0.0, 1.0, 0.0)), # Green
27+
("bicycle", (0.0, 0.0, 1.0)), # Blue
28+
("bird", (1.0, 1.0, 0.0)), # Yellow
29+
("boat", (1.0, 0.0, 1.0)), # Magenta
30+
("bottle", (0.0, 1.0, 1.0)), # Cyan
31+
("bus", (0.5, 0.0, 0.0)), # Dark Red
32+
("car", (0.0, 0.5, 0.0)), # Dark Green
33+
("cat", (0.0, 0.0, 0.5)), # Dark Blue
34+
("chair", (0.5, 0.5, 0.0)), # Olive
35+
("cow", (0.5, 0.0, 0.5)), # Purple
36+
("diningtable", (0.0, 0.5, 0.5)), # Teal
37+
("dog", (0.75, 0.75, 0.75)), # Light Gray
38+
("horse", (0.5, 0.5, 0.5)), # Gray
39+
("motorbike", (0.25, 0.25, 0.25)), # Dark Gray
40+
("person", (1.0, 0.5, 0.0)), # Orange
41+
("pottedplant", (0.5, 1.0, 0.5)), # Pastel Green
42+
("sheep", (0.5, 0.5, 1.0)), # Pastel Blue
43+
("sofa", (1.0, 0.75, 0.8)), # Pink
44+
("train", (0.64, 0.16, 0.16)), # Brown
45+
("tvmonitor", (1.0, 1.0, 1.0)), # White
46+
]
47+
48+
parser = ArgumentParser()
49+
parser.add_argument("model", help="Path to DeepLab ONNX model")
50+
parser.add_argument("image", help="Image to segment")
51+
args = parser.parse_args()
52+
53+
session = onnxruntime.InferenceSession(args.model)
54+
55+
# Input image size expected by model
56+
input_width = 693
57+
input_height = 520
58+
59+
# Load image, normalize and convert to NHWC layout
60+
image = Image.open(args.image)
61+
image = image.resize([input_width, input_height])
62+
image = np.asarray(image).astype("float32") / 255.0
63+
image = np.transpose(image, (2, 0, 1)) # HWC => CHW
64+
65+
norm_mean = np.array(IMAGENET_MEAN, dtype="float32").reshape(-1, 1, 1)
66+
norm_std_dev = np.array(IMAGENET_STD_DEV, dtype="float32").reshape(-1, 1, 1)
67+
image = (image - norm_mean) / norm_std_dev
68+
image = np.expand_dims(image, axis=0) # Insert batch dim
69+
70+
# Segment image, producing an HW tensor containing the class index for each pixel.
71+
seg_classes = session.run(["output"], {"input": image})[0]
72+
seg_classes = np.transpose(seg_classes, (0, 2, 3, 1)) # (N,class,H,W) => (N,H,W,class)
73+
seg_classes = np.argmax(seg_classes[0], axis=-1)
74+
75+
# Produce a segmentation map with pixels colored based on predicted class for
76+
# each pixel.
77+
out_height, out_width = seg_classes.shape
78+
seg_map = np.zeros((out_height, out_width, 3), dtype="float32")
79+
for cls_id, cls_info in enumerate(PASCAL_VOC_LABELS):
80+
cls_name, cls_color = cls_info
81+
cls_mask = seg_classes == cls_id
82+
for chan in range(3):
83+
seg_map[cls_mask, chan] = cls_color[chan]
84+
85+
out_im = Image.fromarray(np.uint8(seg_map * 255))
86+
out_im.save("out_reference.png")

Diff for: rten-examples/src/export-deeplab.py

+30-8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from argparse import ArgumentParser
2+
13
import torch
24
from torchvision.models.segmentation import (
35
deeplabv3_mobilenet_v3_large,
@@ -16,12 +18,32 @@
1618
img = torch.rand((3, 480, 640))
1719
batch = preprocess(img).unsqueeze(0)
1820

19-
# Export to ONNX
20-
torch.onnx.export(
21-
model,
22-
args=(batch),
23-
f="deeplab.onnx",
24-
verbose=False,
25-
input_names=["input"],
26-
output_names=["output"],
21+
parser = ArgumentParser()
22+
parser.add_argument("-f", "--filename", default="deeplab.onnx")
23+
parser.add_argument(
24+
"--dynamo", action="store_true", help="Use TorchDynamo-based exporter"
2725
)
26+
args = parser.parse_args()
27+
28+
if args.dynamo:
29+
print("Exporting model using TorchDynamo...")
30+
onnx_prog = torch.onnx.export(
31+
model,
32+
args=(batch),
33+
verbose=False,
34+
input_names=["input"],
35+
output_names=["output"],
36+
dynamo=True,
37+
)
38+
onnx_prog.optimize()
39+
onnx_prog.save(args.filename)
40+
else:
41+
print("Exporting model using TorchScript...")
42+
torch.onnx.export(
43+
model,
44+
args=(batch),
45+
f=args.filename,
46+
verbose=False,
47+
input_names=["input"],
48+
output_names=["output"],
49+
)

Diff for: src/ops/resize.rs

+37-31
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ pub enum ResizeTarget<'a> {
3131
/// - `length_resized` is the size of the axis in the output
3232
///
3333
/// See https://github.com/onnx/onnx/blob/v1.15.0/docs/Operators.md#resize
34-
/// for the formulae for different transform modes.
34+
/// for the formulae for different transform modes. Note that `scale` here is
35+
/// the inverse of the `scale` used in the spec, in order to replace division
36+
/// with multiplication.
3537
///
3638
/// The default is half pixel, and is is consistent with how OpenCV
3739
/// (`cv2.resize`) and PyTorch (`torch.nn.functional.interpolate`) work. See
@@ -58,7 +60,7 @@ fn input_coord(
5860
// PyTorch behavior). This implementation does however match
5961
// ONNX Runtime (https://github.com/microsoft/onnxruntime/blob/24620e70d9f14956a0dc84bb8a332dcd64c95a94/onnxruntime/core/providers/cpu/tensor/upsamplebase.h#L331)
6062
if length_resized > 1 {
61-
(dest_coord as f32 + 0.5) / scale - 0.5
63+
scale * (dest_coord as f32 + 0.5) - 0.5
6264
} else {
6365
0.
6466
}
@@ -692,29 +694,35 @@ mod tests {
692694
image,
693695
scales: vec![1., 1., 1.5, 1.5],
694696
coord_transform_mode: None,
695-
expected: Tensor::from_data(
696-
&[1, 1, 3, 3],
697-
vec![
698-
0.2, 0.45, 0.7, // Y=0
699-
0.25, 0.5, 0.75, // Y=1
700-
0.3, 0.55, 0.8, // Y=2
701-
],
702-
),
697+
expected: Tensor::from([[0.2, 0.45, 0.7], [0.25, 0.5, 0.75], [0.3, 0.55, 0.8]])
698+
.into_shape([1, 1, 3, 3].as_slice()),
703699
},
704-
// Scale width and height by 2x
700+
// Scale width and height by 2x, using `half_pixel`.
705701
Case {
706702
image,
707703
scales: vec![1., 1., 2., 2.],
708704
coord_transform_mode: None,
709-
expected: Tensor::from_data(
710-
&[1, 1, 4, 4],
711-
vec![
712-
0.2, 0.325, 0.575, 0.7, // Y=0
713-
0.225, 0.35, 0.6, 0.725, // Y=1
714-
0.275, 0.4, 0.65, 0.775, // Y=2
715-
0.3, 0.425, 0.675, 0.8, // Y=3
716-
],
717-
),
705+
expected: Tensor::from([
706+
[0.2, 0.325, 0.575, 0.7],
707+
[0.225, 0.35, 0.6, 0.725],
708+
[0.275, 0.4, 0.65, 0.775],
709+
[0.3, 0.425, 0.675, 0.8],
710+
])
711+
.into_shape([1, 1, 4, 4].as_slice()),
712+
},
713+
// Scale width and height by 2x, using `pytorch_half_pixel`. This
714+
// should give the same result as for `half_pixel`.
715+
Case {
716+
image,
717+
scales: vec![1., 1., 2., 2.],
718+
coord_transform_mode: Some(CoordTransformMode::PytorchHalfPixel),
719+
expected: Tensor::from([
720+
[0.2, 0.325, 0.575, 0.7],
721+
[0.225, 0.35, 0.6, 0.725],
722+
[0.275, 0.4, 0.65, 0.775],
723+
[0.3, 0.425, 0.675, 0.8],
724+
])
725+
.into_shape([1, 1, 4, 4].as_slice()),
718726
},
719727
// Scale width and height by 2x, align corners.
720728
Case {
@@ -737,17 +745,15 @@ mod tests {
737745
image,
738746
scales: vec![1., 1., 3., 3.],
739747
coord_transform_mode: None,
740-
expected: Tensor::from_data(
741-
&[1, 1, 6, 6],
742-
vec![
743-
0.2000, 0.2000, 0.3667, 0.5333, 0.7000, 0.7000, // Y=0
744-
0.2000, 0.2000, 0.3667, 0.5333, 0.7000, 0.7000, // Y=1
745-
0.2333, 0.2333, 0.4000, 0.5667, 0.7333, 0.7333, // Y=2
746-
0.2667, 0.2667, 0.4333, 0.6000, 0.7667, 0.7667, // Y=3
747-
0.3000, 0.3000, 0.4667, 0.6333, 0.8000, 0.8000, // Y=4
748-
0.3000, 0.3000, 0.4667, 0.6333, 0.8000, 0.8000, // Y=5
749-
],
750-
),
748+
expected: Tensor::from([
749+
[0.2000, 0.2000, 0.3667, 0.5333, 0.7000, 0.7000],
750+
[0.2000, 0.2000, 0.3667, 0.5333, 0.7000, 0.7000],
751+
[0.2333, 0.2333, 0.4000, 0.5667, 0.7333, 0.7333],
752+
[0.2667, 0.2667, 0.4333, 0.6000, 0.7667, 0.7667],
753+
[0.3000, 0.3000, 0.4667, 0.6333, 0.8000, 0.8000],
754+
[0.3000, 0.3000, 0.4667, 0.6333, 0.8000, 0.8000],
755+
])
756+
.into_shape([1, 1, 6, 6].as_slice()),
751757
},
752758
];
753759

Diff for: tools/compare-tensors.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,38 @@
11
from argparse import ArgumentParser
2+
import json
23
import sys
34

45
import numpy as np
56

67
from debug_utils import read_tensor
78

9+
def read_json_tensor(path: str):
10+
"""
11+
Load a tensor from a JSON file.
12+
13+
The JSON data format is `{ "data": [elements...], "shape": [dims...] }`.
14+
This matches rten-tensor's serde serialization for the `Tensor` type.
15+
"""
16+
with open(path) as tensor_fp:
17+
tensor_json = json.load(tensor_fp)
18+
return np.array(tensor_json["data"]).reshape(tensor_json["shape"])
19+
20+
821
def main():
922
parser = ArgumentParser(description="Compare two binary tensors")
1023
parser.add_argument('tensor_a', help="File containing first tensor")
1124
parser.add_argument('tensor_b', help="File containing second_tensor")
1225
args = parser.parse_args()
1326

14-
x = read_tensor(args.tensor_a)
15-
y = read_tensor(args.tensor_b)
27+
if args.tensor_a.endswith(".json"):
28+
x = read_json_tensor(args.tensor_a)
29+
else:
30+
x = read_tensor(args.tensor_a)
31+
32+
if args.tensor_b.endswith(".json"):
33+
y = read_json_tensor(args.tensor_b)
34+
else:
35+
y = read_tensor(args.tensor_b)
1636

1737
print(f"X shape {x.shape} Y shape {y.shape}")
1838

0 commit comments

Comments
 (0)