Skip to content

Commit c7d0730

Browse files
committed
Improve speed of style transfer demo.
1 parent b8c3509 commit c7d0730

6 files changed

Lines changed: 20 additions & 13 deletions

File tree

bridge/lib/bridge.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ extern "C" void debug_cpu_only_mode(bool_t mode) {
6767
} else {
6868
best_device = get_best_device();
6969
}
70+
std::cout << "Debug CPU only mode: " << (debug_cpu_only ? "ON" : "OFF") << std::endl;
71+
std::cout.flush();
7072
}
7173

7274
extern "C" bool_t accelerator_available() {
@@ -131,20 +133,19 @@ extern "C" void free_bridge_tensor(bridge_tensor_t bt) {
131133
at::Tensor bridge_to_torch(bridge_tensor_t &bt) {
132134
std::vector<int64_t> sizes_vec(bt.sizes, bt.sizes + bt.dim);
133135
auto shape = torch::IntArrayRef(sizes_vec);
134-
return torch::from_blob(bt.data, shape, torch::kFloat32);
136+
return torch::from_blob(bt.data, shape, torch::kFloat);
135137
}
136138

137139
at::Tensor bridge_to_torch(bridge_tensor_t &bt,torch::Device device, bool copy,torch::ScalarType dtype = torch::kFloat32) {
138140
std::vector<int64_t> sizes_vec(bt.sizes, bt.sizes + bt.dim);
139141
auto shape = torch::IntArrayRef(sizes_vec);
140-
auto t = torch::from_blob(bt.data, shape, torch::kFloat32);
142+
auto t = torch::from_blob(bt.data, shape, torch::kFloat);
141143
if (device != torch::kCPU)
142144
copy = true;
143145
if (copy)
144146
return t.to(device, dtype, /*non_blocking=*/false, /*copy=*/true);
145147
else
146148
return t.to(device, dtype, /*non_blocking=*/false, /*copy=*/false);
147-
148149
}
149150

150151
extern "C" float32_t* unsafe(const float32_t* arr) {

demos/video/chapel-webcam/main.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,10 @@ int main(int argc, char* argv[]) {
130130

131131
int code = mirror();
132132

133-
std::size_t start = cv::getTickCount();
134-
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
135-
std::size_t end = cv::getTickCount();
136-
std::cout << "Total time taken: " << (end - start) / cv::getTickFrequency() << " seconds" << std::endl;
133+
// std::size_t start = cv::getTickCount();
134+
// std::this_thread::sleep_for(std::chrono::milliseconds(2000));
135+
// std::size_t end = cv::getTickCount();
136+
// std::cout << "Total time taken: " << (end - start) / cv::getTickFrequency() << " seconds" << std::endl;
137137

138138

139139
chpl_library_finalize();

demos/video/chapel-webcam/smol.chpl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,25 @@ const acceleratorScaleFactor = accelScale;
1212

1313

1414
export proc getScaledFrameWidth(width: int): int do
15-
if Bridge.acceleratorAvailable() then
15+
if Env.acceleratorAvailable() then
1616
return (width:real * acceleratorScaleFactor):int;
1717
else
1818
return (width:real * cpuScaleFactor):int;
1919

2020
export proc getScaledFrameHeight(height: int): int do
21-
if Bridge.acceleratorAvailable() then
21+
if Env.acceleratorAvailable() then
2222
return (height:real * acceleratorScaleFactor):int;
2323
else
2424
return (height:real * cpuScaleFactor):int;
2525

2626

2727
// if debugCPUOnly then
2828
// writeln("Debugging CPU only!");
29-
// Bridge.debugCpuOnlyMode(debugCPUOnly);
29+
// Env.debugCpuOnlyMode(debugCPUOnly);
3030

3131
writeln("CPU Scale Factor: ", cpuScaleFactor);
3232
writeln("Accelerator Scale Factor: ", acceleratorScaleFactor);
33-
writeln("Accelerator Available: ", Bridge.acceleratorAvailable());
33+
writeln("Accelerator Available: ", Env.acceleratorAvailable());
3434

3535

3636
use Time;

lib/Env.chpl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,9 @@ module Env {
1515
// Maximum rank of dynamicTensor
1616
config param maxRank = if developmentAndTesting then minRankNeeded else maxRankNeeded;
1717

18-
import Bridge.acceleratorAvailable;
18+
private import Bridge;
19+
20+
inline proc acceleratorAvailable(): bool do
21+
return Bridge.acceleratorAvailable();
22+
1923
}

lib/Layer.chpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ module Layer {
44
private use Env;
55
private import Utilities as util;
66
private use OrderedDict;
7+
private import Bridge;
78

89
class ReLU : Module(?) {
910

lib/Tensor.chpl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ module Tensor {
99
public import Utilities as util;
1010
public import Utilities.Standard;
1111
public import Testing;
12-
public import Bridge;
12+
// public import Bridge;
13+
public import Env;
1314
}

0 commit comments

Comments
 (0)