diff --git a/bridge/include/bridge.h b/bridge/include/bridge.h index 39f4b9b91..d747c1e26 100644 --- a/bridge/include/bridge.h +++ b/bridge/include/bridge.h @@ -104,6 +104,11 @@ proto_bridge_simple(softsign); proto_bridge_simple(tanhshrink); +void split_loop(int64_t idx, int64_t n); +void split_loop_filler(int64_t n,int64_t* ret); + +void show_webcam(void); + // bridge_tensor_t conv2d( // bridge_tensor_t input, diff --git a/bridge/include/bridge.h.pch b/bridge/include/bridge.h.pch new file mode 100644 index 000000000..338831a66 Binary files /dev/null and b/bridge/include/bridge.h.pch differ diff --git a/bridge/lib/bridge.cpp b/bridge/lib/bridge.cpp index 9356b8ab4..b34db7554 100644 --- a/bridge/lib/bridge.cpp +++ b/bridge/lib/bridge.cpp @@ -13,6 +13,11 @@ #include #include #include +#include +#include + +#include + #define def_bridge_simple(Name) \ extern "C" bridge_tensor_t Name(bridge_tensor_t input) { \ @@ -381,3 +386,55 @@ extern "C" float sumArray(float* arr, int* sizes, int dim) { // auto t = torch::from_blob(arr, shape, torch::kFloat); // return t.sum().item(); } + + +extern "C" void split_loop(int64_t idx, int64_t n) { + for (int i = 0; i < n; ++i) { + std::cout << "idx(" << idx << "," << n << ") = " << i << std::endl; + std::cout.flush(); + } +} + +extern "C" void split_loop_filler(int64_t n,int64_t* ret) { + for (int i = 0; i < n; ++i) { + *ret = i; + std::this_thread::sleep_for(std::chrono::seconds(0)); + } +} + + + +cv::VideoCapture open_camera(int cam_index) { + cv::VideoCapture cap(cam_index, cv::CAP_AVFOUNDATION); + if (!cap.isOpened()) { + std::cerr << "Could not open camera index " << cam_index << std::endl; + return cv::VideoCapture(); + } + cap.set(cv::CAP_PROP_BUFFERSIZE, 1); // minimal internal buffering + cap.set(cv::CAP_PROP_FPS, 60); // request higher FPS if possible + return cap; +} + + +extern "C" void show_webcam(void) { + cv::VideoCapture cap; + cap = open_camera(0); + + cv::Mat frame_bgr; + + while (true) { + if (!cap.read(frame_bgr) || frame_bgr.empty()) { + std::cerr << "[WARN] Empty frame, exiting" << std::endl; + break; + } + + cv::imshow("webcam", frame_bgr); + + if (cv::waitKey(1) == 27) { // ESC key + break; + } + } + + cap.release(); + cv::destroyAllWindows(); +} \ No newline at end of file diff --git a/demos/video/CMakeLists.txt b/demos/video/CMakeLists.txt index 4a883bfd7..c0b762c78 100644 --- a/demos/video/CMakeLists.txt +++ b/demos/video/CMakeLists.txt @@ -9,22 +9,19 @@ find_library(METAL Metal REQUIRED) find_library(FOUNDATION Foundation REQUIRED) + add_executable(VidStreamer - ${CMAKE_CURRENT_SOURCE_DIR}/webcam_infer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cvtool.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/imageops.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/webcam-capture/webcam_infer.cpp ) target_include_directories(VidStreamer PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include ${LIBTORCH_DIR}/include ${LIBTORCH_DIR}/include/torch/csrc/api/include ) -target_link_directories(VidStreamer - PRIVATE - ${LIBTORCH_DIR}/lib -) +target_link_directories(VidStreamer PRIVATE ${LIBTORCH_DIR}/lib) target_link_libraries(VidStreamer PRIVATE @@ -43,10 +40,58 @@ set_target_properties(VidStreamer PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} ) - if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(VidStreamer PRIVATE -Ofast -flto -ffast-math) target_link_options(VidStreamer PRIVATE -flto) endif() + + + + + +add_executable(StyleTransfer + ${CMAKE_CURRENT_SOURCE_DIR}/style-transfer/style_transfer.cpp +) + +target_include_directories(StyleTransfer + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${LIBTORCH_DIR}/include + ${LIBTORCH_DIR}/include/torch/csrc/api/include +) + +target_link_directories(StyleTransfer PRIVATE ${LIBTORCH_DIR}/lib) + +target_link_libraries(StyleTransfer + PRIVATE + -ltorch + -ltorch_cpu + -lc10 + -ltorch_global_deps + ${OpenCV_LIBS} + # ${TORCH_LIBRARIES} + ${ACCELERATE} + ${METAL} + ${FOUNDATION} +) + +set_target_properties(StyleTransfer PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} +) + +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options(StyleTransfer PRIVATE -Ofast -flto -ffast-math) + target_link_options(StyleTransfer PRIVATE -flto) +endif() + + +add_custom_command( + TARGET StyleTransfer + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${CMAKE_CURRENT_SOURCE_DIR}/style-transfer/models" + "$/style-transfer/models" + COMMENT "NOT! Copying ${PROJECT_ROOT_DIR}/examples/vgg/images to $/images" +) \ No newline at end of file diff --git a/demos/video/cvtool.hpp b/demos/video/include/cvtool.hpp similarity index 50% rename from demos/video/cvtool.hpp rename to demos/video/include/cvtool.hpp index fed25eb3b..ba3e63d40 100644 --- a/demos/video/cvtool.hpp +++ b/demos/video/include/cvtool.hpp @@ -8,6 +8,41 @@ #include +namespace cvtool { + static torch::Device default_device(torch::kCPU); + static bool default_device_set = false; + static torch::Device set_default_device(torch::Device device) { + default_device = device; + default_device_set = true; + return default_device; + } + torch::Device get_default_device() { + if (!default_device_set) { + if (torch::mps::is_available()) { + std::cout << "[INFO] Running on MPS" << std::endl; + default_device = torch::Device(torch::kMPS); + } else { + std::cout << "[INFO] MPS not available, falling back to CPU" << std::endl; + default_device = torch::Device(torch::kCPU); + } + } + return default_device; + } + + bool can_get_default_device() { + return default_device_set || !torch::mps::is_available(); + } + + torch::Device get_host_device() { + return torch::Device(torch::kCPU); + } +} + +// enum CVToColorPermutation { +// RGB_TO_BGR = cv::COLOR_RGB2BGR, +// BGR_TO_RGB = cv::COLOR_BGR2RGB, +// }; + static torch::Device default_device(torch::kCPU); torch::Device get_default_device(); @@ -69,13 +104,76 @@ std::shared_ptr create_frame_buffer_tensor(int height,int width,torc return create_buffer_tensor(sizes, torch::kFloat32); } -at::Tensor to_tensor(cv::Mat &img) { - auto t = torch::from_blob(img.data, {1, img.rows, img.cols, 3}, torch::kUInt8).clone(); - t = t.to(default_device); - t = t.to(torch::kFloat32).permute({0, 3, 1, 2}) / 255.0; - return t;//.to(default_device,true); +at::Tensor to_tensor(cv::Mat &frame, torch::Device device = default_device) { + + + auto t = at::from_blob(frame.data, {1, frame.rows, frame.cols, 3}, torch::kUInt8).permute({0, 3, 1, 2}).clone(); + auto options = at::TensorOptions() + .dtype(torch::kFloat16) + .device(device) + .requires_grad(false); + return t.to(options,true).contiguous().div_(255.0); + + // t = t.to(default_device,); + // t = t.to(torch::kFloat32).permute({0, 3, 1, 2}).contiguous() / 255.0; + + // return t;//.to(default_device,true); +} + +// at::Tensor to_tensor(cv::Mat &img, cv::ColorConversionCodes color_conversion = cv::COLOR_BGR2RGB) { +// auto t = torch::from_blob(img.data, {1, img.rows, img.cols, 3}, torch::kUInt8).clone(); +// t = t.to(default_device); +// t = t.to(torch::kFloat32).permute({0, 3, 1, 2}) / 255.0; +// return t;//.to(default_device,true); +// } + +// at::Tensor to_tensor(cv::Mat &img, cv::ColorConversionCodes color_conversion = cv::COLOR_BGR2RGB, device = ) { +// auto t = torch::from_blob(img.data, {1, img.rows, img.cols, 3}, torch::kUInt8).clone(); +// t = t.to(default_device); +// t = t.to(torch::kFloat32).permute({0, 3, 1, 2}) / 255.0; +// return t;//.to(default_device,true); +// } + +// at::Tensor to_tensor(cv::Mat &img, torch::Device device = cvtool::get_default_device()) { +// auto img_t = torch::from_blob(img.data, {1, img.rows, img.cols, 3}, torch::kUInt8); +// auto t = img_t.clone().to(device); +// t = t.to(torch::kFloat32).permute({0, 3, 1, 2}) / 255.0; +// return t;//.to(default_device,true); +// } + +//-------------------------------------------------------------------- +// • img : any H×W×C OpenCV matrix (CV_8U, CV_32F, CV_16F …, planar or packed) +// • device : torch::kCUDA, torch::kMPS or torch::kCPU (default = current CUDA if available) +//-------------------------------------------------------------------- +at::Tensor to_tensor_(const cv::Mat& img, torch::Device device = get_default_device()) +{ + // 1. Make sure the source data are contiguous + cv::Mat contiguous = img.isContinuous() ? img : img.clone(); + + // 2. Convert pixel type to 32‑bit float in [0,1] so we keep enough + // head‑room for the later FP16 cast. (OpenCV has only limited + // native FP16 support, so converting to CV_32F first is usually + // safer and portable.) + cv::Mat float32; + contiguous.convertTo(float32, CV_32F, 1.0 / 255.0); // scale if img was CV_8U + + // 3. Wrap the OpenCV buffer with a *view* tensor (no copy yet). + auto tmp = torch::from_blob( + float32.data, // raw pointer + {float32.rows, float32.cols, float32.channels()}, + torch::TensorOptions().dtype(torch::kFloat32)); + + // 4. Re‑arrange to CHW, move to wanted device, cast to FP16 *and* copy + // so that the returned tensor owns its storage (clone() is mandatory). + auto t = tmp.permute({2, 0, 1}) // HWC → CHW + .to(device, /*dtype=*/torch::kFloat16, + /*non_blocking=*/true, /*copy=*/true) // copy = true ⇒ owns memory + .clone(); // guarantees ownership + + return t; // C×H×W, float16, on CUDA / MPS / CPU } + cv::Mat to_mat(at::Tensor &tensor) { // Ensure the tensor is on the CPU and not on the GPU // at::Tensor cpu_tensor = tensor.to(torch::kCPU); @@ -87,17 +185,31 @@ cv::Mat to_mat(at::Tensor &tensor) { int height = tensor.size(2); int width = tensor.size(3); auto t = tensor - .mul(255) - .squeeze() .detach() + .squeeze() + .contiguous() + .mul(255.0) + .clamp(0, 255) .permute({1, 2, 0}) .contiguous() .to(torch::kUInt8) - // .clamp(0, 255) .clone() - .to(torch::kCPU); + .to(at::kCPU,true); + + + // auto t = tensor + // .mul(255) + // .squeeze() + // .detach() + // .permute({1, 2, 0}) + // .contiguous() + // .to(torch::kUInt8) + // // .clamp(0, 255) + // .clone() + // // .to(cvtool::get_default_device(), /*non_blocking=*/true, /*copy=*/true) + // .to(torch::kCPU); cv::Mat mat = cv::Mat(height, width, CV_8UC3, t.data_ptr()); - return mat; + return mat.clone(); @@ -112,10 +224,32 @@ cv::Mat to_mat(at::Tensor &tensor) { // return mat.clone(); } + +cv::Mat to_mat(at::Tensor &tensor, cv::ColorConversionCodes color_conversion) { + + int height = tensor.size(2); + int width = tensor.size(3); + auto t = tensor + // .to(torch::kFloat32) + .mul(255.0) + .clamp(0.0, 255.0) + .to(torch::kUInt8) + .squeeze() + .detach() + .permute({1, 2, 0}) + .contiguous() + .clone() + .to(torch::kCPU); + cv::Mat mat = cv::Mat(height, width, CV_8UC3, t.data_ptr()); + cv::Mat mat2; + cv::cvtColor(mat, mat2, color_conversion); + return mat2.clone(); +} + torch::Device get_default_device() { if (torch::mps::is_available()) { - // default_device = torch::Device(torch::kMPS); std::cout << "[INFO] Running on MPS" << std::endl; + default_device = torch::Device(torch::kMPS); } else { std::cout << "[INFO] MPS not available, falling back to CPU" << std::endl; } @@ -233,3 +367,37 @@ at::Tensor capture_webcam(int cam_index) { auto tensor = to_tensor(frame); return tensor; } + + +torch::Tensor sobel_edge_detection(torch::Tensor& input,torch::Device device = cvtool::get_default_device()) { + // // // Sobel edge detection + // auto sobel_x = torch::tensor({{-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1}}, input.dtype()).view({1, 1, 3, 3}); + // auto sobel_y = torch::tensor({{1, 2, 1}, {0, 0, 0}, {-1, -2, -1}}, input.dtype()).view({1, 1, 3, 3}); + // sobel_x.to(input.device()); + // sobel_y.to(input.device()); + + // auto edges_x = torch::nn::functional::conv2d(input.unsqueeze(0), sobel_x); + // auto edges_y = torch::nn::functional::conv2d(input.unsqueeze(0), sobel_y); + + // return (edges_x + edges_y).squeeze(0); + + + torch::Tensor sobel_dx = torch::tensor({{-1, 0, 1}, + {-2, 0, 2}, + {-1, 0, 1}}).to(input.dtype()); + torch::Tensor sobel_dy = torch::tensor({{-1, -2, -1}, + {0, 0, 0}, + {1, 2, 1}}).to(input.dtype()); + sobel_dx.to(input.device()); + sobel_dy.to(input.device()); + + + torch::Tensor sobel_kernel = torch::cat({sobel_dx, sobel_dy}, 0).unsqueeze(0).unsqueeze(0); + sobel_kernel.to(input.device()); + + return torch::conv2d(input, sobel_kernel, {}, 1, 1); +} + + + + diff --git a/demos/video/imageops.hpp b/demos/video/include/imageops.hpp similarity index 100% rename from demos/video/imageops.hpp rename to demos/video/include/imageops.hpp diff --git a/demos/video/style-transfer/.gitignore b/demos/video/style-transfer/.gitignore new file mode 100644 index 000000000..39d36c725 --- /dev/null +++ b/demos/video/style-transfer/.gitignore @@ -0,0 +1 @@ +train2014.zip diff --git a/demos/video/style-transfer/README.md b/demos/video/style-transfer/README.md new file mode 100644 index 000000000..7921f9050 --- /dev/null +++ b/demos/video/style-transfer/README.md @@ -0,0 +1,9 @@ + + + +python3 neural_style.py export --model saved_models/udnie.pth --accel +python3 neural_style.py export --model saved_models/candy.pth --accel +python3 neural_style.py export --model saved_models/mosaic.pth --accel + +python3 style_transfer_test.py --model-file=models/exports/cpu/mosaic_float16.pt --input-video-file=videos/deer.mp4 --output-video-file=videos/mosaic_deer.mp4 --show-output + diff --git a/demos/video/style-transfer/WORKING.sh b/demos/video/style-transfer/WORKING.sh new file mode 100644 index 000000000..be987cbf5 --- /dev/null +++ b/demos/video/style-transfer/WORKING.sh @@ -0,0 +1 @@ +/usr/bin/clang++ -std=c++20 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps \ No newline at end of file diff --git a/demos/video/style-transfer/build.sh b/demos/video/style-transfer/build.sh new file mode 100644 index 000000000..b8865e8ad --- /dev/null +++ b/demos/video/style-transfer/build.sh @@ -0,0 +1,3 @@ +/usr/bin/clang++ -std=c++20 -c -fPIC mirror.cpp -o mirror.o -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + +/usr/bin/clang++ -shared -o libmirror.dylib mirror.o -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps \ No newline at end of file diff --git a/demos/video/style-transfer/download_saved_models.py b/demos/video/style-transfer/download_saved_models.py new file mode 100644 index 000000000..569aee6ad --- /dev/null +++ b/demos/video/style-transfer/download_saved_models.py @@ -0,0 +1,30 @@ +import os +import zipfile + +# PyTorch 1.1 moves _download_url_to_file +# from torch.utils.model_zoo to torch.hub +# PyTorch 1.0 exists another _download_url_to_file +# 2 argument +# TODO: If you remove support PyTorch 1.0 or older, +# You should remove torch.utils.model_zoo +# Ref. PyTorch #18758 +# https://github.com/pytorch/pytorch/pull/18758/commits +try: + from torch.utils.model_zoo import _download_url_to_file +except ImportError: + try: + from torch.hub import download_url_to_file as _download_url_to_file + except ImportError: + from torch.hub import _download_url_to_file + + +def unzip(source_filename, dest_dir): + with zipfile.ZipFile(source_filename) as zf: + zf.extractall(path=dest_dir) + + +if __name__ == '__main__': + _download_url_to_file('https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1', 'saved_models.zip', None, True) + unzip('saved_models.zip', '.') + _download_url_to_file('http://images.cocodataset.org/zips/train2014.zip', 'train2014.zip', None, True) + unzip('train2014.zip', '.') diff --git a/demos/video/style-transfer/export_and_run_model.sh b/demos/video/style-transfer/export_and_run_model.sh new file mode 100644 index 000000000..d301eec94 --- /dev/null +++ b/demos/video/style-transfer/export_and_run_model.sh @@ -0,0 +1,12 @@ + + +MODEL_NAME=$1 + +# MODEL_NAME="nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000" +# MODEL_NAME="nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000" + +python3 neural_style.py export --model saved_models/${MODEL_NAME}.model --accel \ + || python3 neural_style.py export --model saved_models/${MODEL_NAME}.pth --accel + + +python3 style_transfer_test.py --model-file=models/exports/cpu/${MODEL_NAME}_float16.pt --input-video-file=videos/deer.mp4 --show-output \ No newline at end of file diff --git a/demos/video/style-transfer/export_model.sh b/demos/video/style-transfer/export_model.sh new file mode 100644 index 000000000..810321cf6 --- /dev/null +++ b/demos/video/style-transfer/export_model.sh @@ -0,0 +1,4 @@ +python3 neural_style.py export --model saved_models/ckpt_epoch_0_batch_id_18000.pth --accel +# python3 style_transfer_test.py --model-file=models/exports/cpu/ckpt_epoch_0_batch_id_18000_float16.pt --use-webcam --show-output + +python3 style_transfer_test.py --model-file=models/exports/cpu/ckpt_epoch_0_batch_id_18000_float16.pt --input-video-file=videos/deer.mp4 --show-output \ No newline at end of file diff --git a/demos/video/style-transfer/helpme.txt b/demos/video/style-transfer/helpme.txt new file mode 100644 index 000000000..926c95704 --- /dev/null +++ b/demos/video/style-transfer/helpme.txt @@ -0,0 +1,51 @@ + 5378 clang++ mirror.cpp + 5379 clang++ mirror.cpp -o mirror -I/usr/local/include/opencv4 -L/usr/local/lib -lopencv_core -lopencv_imgcodecs -lopencv_imgproc -lopencv_highgui + 5380 clang++ mirror.cpp -o mirror $(pkg-config --cflags --libs opencv) + 5381 clang++ mirror.cpp -o mirror $(pkg-config --cflags --libs opencv4) + 5382 ls + 5383 ./mirror + 5384 clang++ mirror.cpp -o mirror -I $(pkg-config --cflags --libs opencv4) + 5385 ls + 5386 clang++ mirror.cpp -o mirror $(pkg-config --cflags --libs opencv4) + 5387 pwd + 5388 clang++ mirror.cpp -o mirror $(pkg-config --cflags --libs opencv4) + 5389 ls ../../ + 5390 ls ../../../ + 5391 clang++ mirror.cpp -o mirror $(pkg-config --cflags --libs opencv4) -I ../../../libtorch/include -I ../../../include/torch/csrc/api/include + 5392 clang++ style_transfer.cpp -o styletransfer $(pkg-config --cflags --libs opencv4) -I ../../../libtorch/include -I ../../../include/torch/csrc/api/include + 5393 clang++ style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) + 5394 pwd + 5395 ls ../../ + 5396 ls ../../.. + 5397 ls ../../../libtorch + 5398 ls ../../../libtorch/include + 5399 clang++ style_transfer.cpp -o styletransfer -I../../../libtorch/include -I../../../include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) + 5400 clang style_transfer.cpp -o styletransfer -I../../../libtorch/include -I../../../include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) + 5401 clang style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) + 5402 clang style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) + 5403 /usr/bin/clang style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) + 5404 clang style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) + 5405 g++ style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) + 5406 clang -std=c++20 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) + 5407 clang -std=c++17 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) + 5408 /usr/bin/clang -std=c++17 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) + 5409 /usr/bin/clang -std=c++17 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5410 /usr/bin/clang -std=c++20 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5411 /usr/bin/clang++ -std=c++20 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5412 pwd + 5413 git add --all + 5414 git commit -m "Futile compilation attempt working. see new file. " + 5415 git push + 5416 /usr/bin/clang++ -std=c++20 style_transfer.cpp -o styletransfer -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include -I ../include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5417 ls + 5418 otool -L styletransfer + 5419 /usr/bin/clang++ -std=c++20 mirror.cpp -o mirror -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5420 /usr/bin/clang++ -std=c++20 -c -fPIC mirror.cpp -o mirror -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5421 /usr/bin/clang++ -std=c++20 -c -fPIC mirror.cpp -o mirror.o -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5422 ls + 5423 otool -L mirror.o + 5424 /usr/bin/clang++ -std=c++20 -c -fPIC mirror.cpp -o mirror.o -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5425 /usr/bin/clang++ -shared -o libmirror.dylib mirror.o + 5426 /usr/bin/clang++ -shared -o libmirror.dylib mirror.o -I ../../../libtorch/include -I ../../../libtorch/include/torch/csrc/api/include $(pkg-config --cflags --libs opencv4) -L ../../../libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps + 5427 ls + 5428 otool \ No newline at end of file diff --git a/demos/video/style-transfer/libmirror.dylib b/demos/video/style-transfer/libmirror.dylib new file mode 100755 index 000000000..163bb2120 Binary files /dev/null and b/demos/video/style-transfer/libmirror.dylib differ diff --git a/demos/video/style-transfer/mirror.chpl b/demos/video/style-transfer/mirror.chpl new file mode 100644 index 000000000..9793ef6d5 --- /dev/null +++ b/demos/video/style-transfer/mirror.chpl @@ -0,0 +1,13 @@ + +use CTypes; + +require "mirror.h", "-lmirror"; + +extern proc run_mirror(): void; + + +proc main(args: [] string) { + writeln("Hello, world!"); + + run_mirror(); +} diff --git a/demos/video/style-transfer/mirror.cpp b/demos/video/style-transfer/mirror.cpp new file mode 100644 index 000000000..52c0f8522 --- /dev/null +++ b/demos/video/style-transfer/mirror.cpp @@ -0,0 +1,26 @@ +#include +#include +#include "mirror.h" + +extern "C" void run_mirror() { + cv::VideoCapture cap(0); // Open the default camera (0) + if (!cap.isOpened()) { + std::cerr << "Error: Could not open camera." << std::endl; + } + + cv::Mat frame; + while (true) { + cap >> frame; // Capture a new frame + if (frame.empty()) { + std::cerr << "Error: Could not capture frame." << std::endl; + break; + } + + cv::imshow("Webcam", frame); // Display the captured frame + if (cv::waitKey(30) >= 0) break; // Exit on any key press + } + + cap.release(); // Release the camera + cv::destroyAllWindows(); // Close all OpenCV windows +} + diff --git a/demos/video/style-transfer/mirror.h b/demos/video/style-transfer/mirror.h new file mode 100644 index 000000000..95271acc9 --- /dev/null +++ b/demos/video/style-transfer/mirror.h @@ -0,0 +1,15 @@ + +#ifndef MIRROR_H +#define MIRROR_H + +#ifdef __cplusplus +extern "C" { +#endif + +void run_mirror(); + +#ifdef __cplusplus +} +#endif + +#endif // MIRROR_H \ No newline at end of file diff --git a/demos/video/style-transfer/mirror.o b/demos/video/style-transfer/mirror.o new file mode 100644 index 000000000..ea4bed776 Binary files /dev/null and b/demos/video/style-transfer/mirror.o differ diff --git a/demos/video/style-transfer/model.ipynb b/demos/video/style-transfer/model.ipynb new file mode 100644 index 000000000..acb96fccc --- /dev/null +++ b/demos/video/style-transfer/model.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "6e4d2e04", + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ec74c8a7", + "metadata": {}, + "outputs": [], + "source": [ + "class MyModule(torch.nn.Module):\n", + " def __init__(self, N, M):\n", + " super(MyModule, self).__init__()\n", + " self.linear = torch.nn.Linear(N, M)\n", + "\n", + " def forward(self, input):\n", + " return self.linear(input)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "180e54ac", + "metadata": {}, + "outputs": [], + "source": [ + "my_module = MyModule(10,20)\n", + "# sm = torch.jit.script(my_module)\n", + "sm = torch.jit.script(my_module)\n", + "sm.save(\"models/my_module.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5e377e0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89e90304", + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Parent directory models does not exist.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodels/my_module.pt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.venv/lib/python3.12/site-packages/torch/jit/_script.py:754\u001b[0m, in \u001b[0;36mRecursiveScriptModule.save\u001b[0;34m(self, f, **kwargs)\u001b[0m\n\u001b[1;32m 745\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21msave\u001b[39m(\u001b[38;5;28mself\u001b[39m, f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 746\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Save with a file-like object.\u001b[39;00m\n\u001b[1;32m 747\u001b[0m \n\u001b[1;32m 748\u001b[0m \u001b[38;5;124;03m save(f, _extra_files={})\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 752\u001b[0m \u001b[38;5;124;03m DO NOT confuse these two functions when it comes to the 'f' parameter functionality.\u001b[39;00m\n\u001b[1;32m 753\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 754\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_c\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Parent directory models does not exist." + ] + } + ], + "source": [ + "# sm.save(\"models/my_module.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d85b6e83", + "metadata": {}, + "outputs": [], + "source": [ + "m = torch.jit.load(\"models/my_module.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7d6255fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RecursiveScriptModule(\n", + " original_name=MyModule\n", + " (linear): RecursiveScriptModule(original_name=Linear)\n", + ")" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "0d8ff397", + "metadata": {}, + "outputs": [], + "source": [ + "x = torch.randn(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ffe62563", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([-0.5386, 0.6545, 0.4650, -0.3320, 0.2735, 0.2796, -0.4549, 0.2646,\n", + " -0.9322, -0.3031, -0.3441, -0.3761, 0.6457, 0.6456, -0.2478, -0.2270,\n", + " 0.8485, 0.9710, -0.0596, 0.6110], grad_fn=)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "51739d61", + "metadata": {}, + "outputs": [], + "source": [ + "# sm = torch.jit.script(style_model.to(torch.float32))\n", + "# sm.save(f\"models/{model_name}_float32.pt\")\n", + "\n", + "# sm = torch.jit.script(style_model.to(torch.float16))\n", + "# sm.save(f\"models/{model_name}_float16.pt\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0173e2e", + "metadata": {}, + "outputs": [], + "source": [ + "# torch::Tensor sobel_dx = torch::tensor({{-1, 0, 1},\n", + "# {-2, 0, 2},\n", + "# {-1, 0, 1}}).to(torch::kFloat32);\n", + "# torch::Tensor sobel_dy = torch::tensor({{-1, -2, -1},\n", + "# {0, 0, 0},\n", + "# {1, 2, 1}}).to(torch::kFloat32);\n", + "\n", + "# torch::Tensor sobel_kernel = torch::cat({sobel_dx, sobel_dy}, 0).unsqueeze(0).unsqueeze(0);\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "c09f3a28", + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Given groups=1, weight of size [1, 1, 6, 3], expected input[1, 3, 1428, 1904] to have 1 channels, but got 3 channels instead", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[32], line 20\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msobel_cnn(x)\n\u001b[1;32m 19\u001b[0m sobel \u001b[38;5;241m=\u001b[39m Sobel()\u001b[38;5;241m.\u001b[39mto(torch\u001b[38;5;241m.\u001b[39mfloat16)\n\u001b[0;32m---> 20\u001b[0m \u001b[43msobel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1428\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1904\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 21\u001b[0m sm \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mjit\u001b[38;5;241m.\u001b[39mscript(sobel)\n\u001b[1;32m 22\u001b[0m sm\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodels/sobel.pt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "Cell \u001b[0;32mIn[32], line 17\u001b[0m, in \u001b[0;36mSobel.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msobel_cnn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/.venv/lib/python3.12/site-packages/torch/nn/modules/conv.py:554\u001b[0m, in \u001b[0;36mConv2d.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 553\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m--> 554\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conv_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.venv/lib/python3.12/site-packages/torch/nn/modules/conv.py:549\u001b[0m, in \u001b[0;36mConv2d._conv_forward\u001b[0;34m(self, input, weight, bias)\u001b[0m\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mzeros\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m F\u001b[38;5;241m.\u001b[39mconv2d(\n\u001b[1;32m 539\u001b[0m F\u001b[38;5;241m.\u001b[39mpad(\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28minput\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reversed_padding_repeated_twice, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 547\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgroups,\n\u001b[1;32m 548\u001b[0m )\n\u001b[0;32m--> 549\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 550\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m\n\u001b[1;32m 551\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Given groups=1, weight of size [1, 1, 6, 3], expected input[1, 3, 1428, 1904] to have 1 channels, but got 3 channels instead" + ] + } + ], + "source": [ + "class Sobel(torch.nn.Module):\n", + " def __init__(self):\n", + " super(Sobel, self).__init__()\n", + " sobel_dx = torch.tensor([[-1, 0, 1],\n", + " [-2, 0, 2],\n", + " [-1, 0, 1]]).to(torch.float16)\n", + " sobel_dy = torch.tensor([[-1, -2, -1],\n", + " [0, 0, 0],\n", + " [1, 2, 1]]).to(torch.float16)\n", + " sobel_kernel = torch.cat((sobel_dx, sobel_dy), 0).unsqueeze(0).unsqueeze(0)\n", + " sobel_kernel = sobel_kernel.to(torch.float16)\n", + " self.sobel_kernel = torch.nn.Parameter(sobel_kernel, requires_grad=False)\n", + " self.sobel_cnn = torch.nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1, bias=False)\n", + " self.sobel_cnn.weight = torch.nn.Parameter(sobel_kernel, requires_grad=False)\n", + "\n", + " def forward(self, x):\n", + " return self.sobel_cnn(x)\n", + "\n", + "sobel = Sobel().to(torch.float16)\n", + "sobel(torch.randn(3, 1428, 1904).to(torch.float16))\n", + "sm = torch.jit.script(sobel)\n", + "sm.save(\"models/sobel.pt\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "3507a1fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([3, 3])\n", + "torch.Size([3, 3])\n", + "torch.Size([2, 3, 3])\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "expected stride to be a single integer value or a list of 1 values to match the convolution dimensions, but got stride=[1, 1]", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[57], line 18\u001b[0m\n\u001b[1;32m 11\u001b[0m X \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m1428\u001b[39m, \u001b[38;5;241m1904\u001b[39m)\u001b[38;5;241m.\u001b[39mto(torch\u001b[38;5;241m.\u001b[39mfloat16)\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# sobel_cnn = torch.nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1, bias=False)\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# sobel_cnn.weight = torch.nn.Parameter(sobel_kernel, requires_grad=False)\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# sobel_cnn = sobel_cnn.to(torch.float16)\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# sobel_cnn(X)\u001b[39;00m\n\u001b[0;32m---> 18\u001b[0m Y \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunctional\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2d\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msobel_kernel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstride\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mRuntimeError\u001b[0m: expected stride to be a single integer value or a list of 1 values to match the convolution dimensions, but got stride=[1, 1]" + ] + } + ], + "source": [ + "sobel_dx = torch.tensor([[-1, 0, 1],\n", + " [-2, 0, 2],\n", + " [-1, 0, 1]]).to(torch.float16)\n", + "print(sobel_dx.shape)\n", + "sobel_dy = torch.tensor([[-1, -2, -1],\n", + " [0, 0, 0],\n", + " [1, 2, 1]]).to(torch.float16)\n", + "print(sobel_dy.shape)\n", + "sobel_kernel = torch.cat([sobel_dx.unsqueeze(0), sobel_dy.unsqueeze(0)], 0)\n", + "print(sobel_kernel.shape)\n", + "X = torch.randn(1,3, 1428, 1904).to(torch.float16)\n", + "\n", + "# sobel_cnn = torch.nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1, bias=False)\n", + "# sobel_cnn.weight = torch.nn.Parameter(sobel_kernel, requires_grad=False)\n", + "# sobel_cnn = sobel_cnn.to(torch.float16)\n", + "# sobel_cnn(X)\n", + "\n", + "Y = torch.nn.functional.conv2d(X, sobel_kernel, stride=1, padding=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59d85573", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/demos/video/style-transfer/models/exports/cpu/candy_float16.pt b/demos/video/style-transfer/models/exports/cpu/candy_float16.pt new file mode 100644 index 000000000..b9ea6701f Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/candy_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/candy_float32.pt b/demos/video/style-transfer/models/exports/cpu/candy_float32.pt new file mode 100644 index 000000000..3905901fc Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/candy_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_18000_float16.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_18000_float16.pt new file mode 100644 index 000000000..33ab0aa00 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_18000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_18000_float32.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_18000_float32.pt new file mode 100644 index 000000000..7aa93300d Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_18000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_4000_float16.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_4000_float16.pt new file mode 100644 index 000000000..a590c3536 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_4000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_4000_float32.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_4000_float32.pt new file mode 100644 index 000000000..2ded75fc3 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_4000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_6000_float16.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_6000_float16.pt new file mode 100644 index 000000000..ebc9bcb06 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_6000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_6000_float32.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_6000_float32.pt new file mode 100644 index 000000000..80abc6605 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_0_batch_id_6000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_1_batch_id_20000_float16.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_1_batch_id_20000_float16.pt new file mode 100644 index 000000000..dcf4feb9f Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_1_batch_id_20000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_1_batch_id_20000_float32.pt b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_1_batch_id_20000_float32.pt new file mode 100644 index 000000000..9289e4479 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/ckpt_epoch_1_batch_id_20000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/mosaic_float16.pt b/demos/video/style-transfer/models/exports/cpu/mosaic_float16.pt new file mode 100644 index 000000000..4baab793c Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/mosaic_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/mosaic_float32.pt b/demos/video/style-transfer/models/exports/cpu/mosaic_float32.pt new file mode 100644 index 000000000..1d394c029 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/mosaic_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt new file mode 100644 index 000000000..1a968da33 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt new file mode 100644 index 000000000..5db556232 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt new file mode 100644 index 000000000..b310caa58 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt new file mode 100644 index 000000000..63473ff23 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt new file mode 100644 index 000000000..0a39be9c4 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt new file mode 100644 index 000000000..5d3d5c44f Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16.pt new file mode 100644 index 000000000..eea465323 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float32.pt new file mode 100644 index 000000000..c007ce65a Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt new file mode 100644 index 000000000..6eeb9f112 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt new file mode 100644 index 000000000..c191f84f4 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt new file mode 100644 index 000000000..ae46b430e Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt new file mode 100644 index 000000000..5aa506f4a Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float16.pt new file mode 100644 index 000000000..030f2c19a Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float32.pt new file mode 100644 index 000000000..e55af10b4 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float16.pt new file mode 100644 index 000000000..18d624f32 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float32.pt new file mode 100644 index 000000000..3cc1bd399 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float16.pt new file mode 100644 index 000000000..759bda665 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float32.pt new file mode 100644 index 000000000..2d337e11e Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt new file mode 100644 index 000000000..05484f9e8 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt new file mode 100644 index 000000000..5c67b9ab3 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/rain_princess_float16.pt b/demos/video/style-transfer/models/exports/cpu/rain_princess_float16.pt new file mode 100644 index 000000000..1fb0fc569 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/rain_princess_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/rain_princess_float32.pt b/demos/video/style-transfer/models/exports/cpu/rain_princess_float32.pt new file mode 100644 index 000000000..159ff9091 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/rain_princess_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float16.pt b/demos/video/style-transfer/models/exports/cpu/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float16.pt new file mode 100644 index 000000000..5c8fc88eb Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float32.pt b/demos/video/style-transfer/models/exports/cpu/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float32.pt new file mode 100644 index 000000000..0d1d193bf Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float16.pt b/demos/video/style-transfer/models/exports/cpu/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float16.pt new file mode 100644 index 000000000..4663b0888 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float32.pt b/demos/video/style-transfer/models/exports/cpu/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float32.pt new file mode 100644 index 000000000..e6b332ddc Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/starry_v_bt4_1e10_ep2_float16.pt b/demos/video/style-transfer/models/exports/cpu/starry_v_bt4_1e10_ep2_float16.pt new file mode 100644 index 000000000..5ba8cddba Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/starry_v_bt4_1e10_ep2_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/starry_v_bt4_1e10_ep2_float32.pt b/demos/video/style-transfer/models/exports/cpu/starry_v_bt4_1e10_ep2_float32.pt new file mode 100644 index 000000000..cf4e461d7 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/starry_v_bt4_1e10_ep2_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/udnie_float16.pt b/demos/video/style-transfer/models/exports/cpu/udnie_float16.pt new file mode 100644 index 000000000..a10ed7fd6 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/udnie_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/cpu/udnie_float32.pt b/demos/video/style-transfer/models/exports/cpu/udnie_float32.pt new file mode 100644 index 000000000..c41b66671 Binary files /dev/null and b/demos/video/style-transfer/models/exports/cpu/udnie_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/candy_float16.pt b/demos/video/style-transfer/models/exports/mps/candy_float16.pt new file mode 100644 index 000000000..af32cac44 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/candy_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/candy_float32.pt b/demos/video/style-transfer/models/exports/mps/candy_float32.pt new file mode 100644 index 000000000..2c7d26d00 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/candy_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_18000_float16.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_18000_float16.pt new file mode 100644 index 000000000..466fd8c33 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_18000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_18000_float32.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_18000_float32.pt new file mode 100644 index 000000000..9718f4fdb Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_18000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_4000_float16.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_4000_float16.pt new file mode 100644 index 000000000..21d89acdc Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_4000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_4000_float32.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_4000_float32.pt new file mode 100644 index 000000000..2ba3cde18 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_4000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_6000_float16.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_6000_float16.pt new file mode 100644 index 000000000..d1e59ff22 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_6000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_6000_float32.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_6000_float32.pt new file mode 100644 index 000000000..e3b90e9e1 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_0_batch_id_6000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_1_batch_id_20000_float16.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_1_batch_id_20000_float16.pt new file mode 100644 index 000000000..6186acb07 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_1_batch_id_20000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/ckpt_epoch_1_batch_id_20000_float32.pt b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_1_batch_id_20000_float32.pt new file mode 100644 index 000000000..44d1c8ce9 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/ckpt_epoch_1_batch_id_20000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/mosaic_float16.pt b/demos/video/style-transfer/models/exports/mps/mosaic_float16.pt new file mode 100644 index 000000000..5835d2715 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/mosaic_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/mosaic_float32.pt b/demos/video/style-transfer/models/exports/mps/mosaic_float32.pt new file mode 100644 index 000000000..1b7f290cc Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/mosaic_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt new file mode 100644 index 000000000..13e7b2e80 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt new file mode 100644 index 000000000..fe05c3304 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt new file mode 100644 index 000000000..f510f34fb Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt new file mode 100644 index 000000000..51a466826 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt new file mode 100644 index 000000000..47d460519 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt new file mode 100644 index 000000000..b449ced58 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16.pt new file mode 100644 index 000000000..5be568f7a Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float32.pt new file mode 100644 index 000000000..8487764d8 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt new file mode 100644 index 000000000..d8e508a68 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt new file mode 100644 index 000000000..2d1046684 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt new file mode 100644 index 000000000..048d7d469 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt new file mode 100644 index 000000000..335f647cc Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float16.pt new file mode 100644 index 000000000..36d9256ef Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float32.pt new file mode 100644 index 000000000..a8233ac96 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float16.pt new file mode 100644 index 000000000..ed9a2dce8 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float32.pt new file mode 100644 index 000000000..b46115943 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float16.pt new file mode 100644 index 000000000..ed170257d Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float32.pt new file mode 100644 index 000000000..1bd333e32 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt new file mode 100644 index 000000000..1f31548f6 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt new file mode 100644 index 000000000..41e3dc5fc Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/rain_princess_float16.pt b/demos/video/style-transfer/models/exports/mps/rain_princess_float16.pt new file mode 100644 index 000000000..27949620e Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/rain_princess_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/rain_princess_float32.pt b/demos/video/style-transfer/models/exports/mps/rain_princess_float32.pt new file mode 100644 index 000000000..ccf9370d0 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/rain_princess_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float16.pt b/demos/video/style-transfer/models/exports/mps/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float16.pt new file mode 100644 index 000000000..6b66a2d55 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float32.pt b/demos/video/style-transfer/models/exports/mps/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float32.pt new file mode 100644 index 000000000..f2f5e3022 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float16.pt b/demos/video/style-transfer/models/exports/mps/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float16.pt new file mode 100644 index 000000000..46e1c0833 Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float32.pt b/demos/video/style-transfer/models/exports/mps/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float32.pt new file mode 100644 index 000000000..b73626b7e Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/starry_h_bt4_5e11_ep4_epoch_0_batch_id_18000_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/starry_v_bt4_1e10_ep2_float16.pt b/demos/video/style-transfer/models/exports/mps/starry_v_bt4_1e10_ep2_float16.pt new file mode 100644 index 000000000..be2d3d54e Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/starry_v_bt4_1e10_ep2_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/starry_v_bt4_1e10_ep2_float32.pt b/demos/video/style-transfer/models/exports/mps/starry_v_bt4_1e10_ep2_float32.pt new file mode 100644 index 000000000..4ddb06abd Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/starry_v_bt4_1e10_ep2_float32.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/udnie_float16.pt b/demos/video/style-transfer/models/exports/mps/udnie_float16.pt new file mode 100644 index 000000000..e75e9d4db Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/udnie_float16.pt differ diff --git a/demos/video/style-transfer/models/exports/mps/udnie_float32.pt b/demos/video/style-transfer/models/exports/mps/udnie_float32.pt new file mode 100644 index 000000000..7b8c2f48b Binary files /dev/null and b/demos/video/style-transfer/models/exports/mps/udnie_float32.pt differ diff --git a/demos/video/style-transfer/models/mosaic.pt b/demos/video/style-transfer/models/mosaic.pt new file mode 100644 index 000000000..d9504d793 Binary files /dev/null and b/demos/video/style-transfer/models/mosaic.pt differ diff --git a/demos/video/style-transfer/models/mosaic_float16.pt b/demos/video/style-transfer/models/mosaic_float16.pt new file mode 100644 index 000000000..9718339de Binary files /dev/null and b/demos/video/style-transfer/models/mosaic_float16.pt differ diff --git a/demos/video/style-transfer/models/mosaic_float32.pt b/demos/video/style-transfer/models/mosaic_float32.pt new file mode 100644 index 000000000..e03440a7d Binary files /dev/null and b/demos/video/style-transfer/models/mosaic_float32.pt differ diff --git a/demos/video/style-transfer/models/my_module.pt b/demos/video/style-transfer/models/my_module.pt new file mode 100644 index 000000000..9cda2b666 Binary files /dev/null and b/demos/video/style-transfer/models/my_module.pt differ diff --git a/demos/video/style-transfer/models/sobel.pt b/demos/video/style-transfer/models/sobel.pt new file mode 100644 index 000000000..b62cf687a Binary files /dev/null and b/demos/video/style-transfer/models/sobel.pt differ diff --git a/demos/video/style-transfer/models/sobel_edge_float32.pt b/demos/video/style-transfer/models/sobel_edge_float32.pt new file mode 100644 index 000000000..3c522c17c Binary files /dev/null and b/demos/video/style-transfer/models/sobel_edge_float32.pt differ diff --git a/demos/video/style-transfer/models/sobel_float16.pt b/demos/video/style-transfer/models/sobel_float16.pt new file mode 100644 index 000000000..3b5609fd5 Binary files /dev/null and b/demos/video/style-transfer/models/sobel_float16.pt differ diff --git a/demos/video/style-transfer/models/sobel_float32.pt b/demos/video/style-transfer/models/sobel_float32.pt new file mode 100644 index 000000000..a373c695e Binary files /dev/null and b/demos/video/style-transfer/models/sobel_float32.pt differ diff --git a/demos/video/style-transfer/neural_style.py b/demos/video/style-transfer/neural_style.py new file mode 100644 index 000000000..4692f25c2 --- /dev/null +++ b/demos/video/style-transfer/neural_style.py @@ -0,0 +1,338 @@ +import argparse +import os +import sys +import time +import re + +import numpy as np +import torch +from torch.optim import Adam +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision import transforms +import torch.onnx + +import utils +from transformer_net import TransformerNet +from vgg import Vgg16 + + +def available_accelerator(): + """ + Check if accelerator is available. + """ + return torch.cuda.is_available() or torch.backends.mps.is_available() + +def current_accelerator(args): + """ + Get the current accelerator. + """ + if args.accel and available_accelerator(): + if torch.cuda.is_available(): + return torch.device("cuda") + elif torch.backends.mps.is_available(): + return torch.device("mps") + else: + raise RuntimeError("No accelerator available") + else: + return torch.device("cpu") + +def check_paths(args): + try: + if not os.path.exists(args.save_model_dir): + os.makedirs(args.save_model_dir) + if args.checkpoint_model_dir is not None and not (os.path.exists(args.checkpoint_model_dir)): + os.makedirs(args.checkpoint_model_dir) + except OSError as e: + print(e) + sys.exit(1) + + +def train(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + transform = transforms.Compose([ + transforms.Resize(args.image_size), + transforms.CenterCrop(args.image_size), + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + train_dataset = datasets.ImageFolder(args.dataset, transform) + train_loader = DataLoader(train_dataset, batch_size=args.batch_size) + + transformer = TransformerNet().to(device) + optimizer = Adam(transformer.parameters(), args.lr) + mse_loss = torch.nn.MSELoss() + + vgg = Vgg16(requires_grad=False).to(device) + style_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + style = utils.load_image(args.style_image, size=args.style_size) + style = style_transform(style) + style = style.repeat(args.batch_size, 1, 1, 1).to(device) + + features_style = vgg(utils.normalize_batch(style)) + gram_style = [utils.gram_matrix(y) for y in features_style] + + for e in range(args.epochs): + transformer.train() + agg_content_loss = 0. + agg_style_loss = 0. + count = 0 + for batch_id, (x, _) in enumerate(train_loader): + n_batch = len(x) + count += n_batch + optimizer.zero_grad() + + x = x.to(device) + y = transformer(x) + + y = utils.normalize_batch(y) + x = utils.normalize_batch(x) + + features_y = vgg(y) + features_x = vgg(x) + + content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) + + style_loss = 0. + for ft_y, gm_s in zip(features_y, gram_style): + gm_y = utils.gram_matrix(ft_y) + style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) + style_loss *= args.style_weight + + total_loss = content_loss + style_loss + total_loss.backward() + optimizer.step() + + agg_content_loss += content_loss.item() + agg_style_loss += style_loss.item() + + if (batch_id + 1) % args.log_interval == 0: + mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( + time.ctime(), e + 1, count, len(train_dataset), + agg_content_loss / (batch_id + 1), + agg_style_loss / (batch_id + 1), + (agg_content_loss + agg_style_loss) / (batch_id + 1) + ) + print(mesg) + + if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: + transformer.eval().cpu() + ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" + ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) + torch.save(transformer.state_dict(), ckpt_model_path) + transformer.to(device).train() + + # save model + transformer.eval().cpu() + save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( + args.content_weight) + "_" + str(args.style_weight) + ".model" + save_model_path = os.path.join(args.save_model_dir, save_model_filename) + torch.save(transformer.state_dict(), save_model_path) + + print("\nDone, trained model saved at", save_model_path) + + +def export_model(args,device=None): + if device is None: + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + with torch.no_grad(): + style_model = TransformerNet() + state_dict = torch.load(args.model) + # remove saved deprecated running_* keys in InstanceNorm from the checkpoint + for k in list(state_dict.keys()): + if re.search(r'in\d+\.running_(mean|var)$', k): + del state_dict[k] + style_model.load_state_dict(state_dict) + style_model.to(device) + style_model.eval() + + from pathlib import Path + model_name = Path(args.model).stem + + export_dir = Path(f'models/exports/{device.type}') + export_dir.mkdir(parents=True, exist_ok=True) + + sm = torch.jit.script(style_model.to(torch.float32)) + sm.save(f"models/exports/{device.type}/{model_name}_float32.pt") + + sm = torch.jit.script(style_model.to(torch.float16)) + sm.save(f"models/exports/{device.type}/{model_name}_float16.pt") + + +def stylize(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + content_image = utils.load_image(args.content_image, scale=args.content_scale) + content_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + content_image = content_transform(content_image) + content_image = content_image.unsqueeze(0).to(device) + + if args.model.endswith(".onnx"): + output = stylize_onnx(content_image, args) + else: + with torch.no_grad(): + style_model = TransformerNet() + state_dict = torch.load(args.model) + # remove saved deprecated running_* keys in InstanceNorm from the checkpoint + for k in list(state_dict.keys()): + if re.search(r'in\d+\.running_(mean|var)$', k): + del state_dict[k] + style_model.load_state_dict(state_dict) + style_model.to(device) + style_model.eval() + if args.export_onnx: + assert args.export_onnx.endswith(".onnx"), "Export model file should end with .onnx" + output = torch.onnx._export( + style_model, content_image, args.export_onnx, opset_version=11, + ).cpu() + else: + print('Content image shape:', content_image.shape) + output = style_model(content_image).cpu() + + utils.save_image(args.output_image, output[0]) + from pathlib import Path + model_name = Path(args.model).stem + + sm = torch.jit.script(style_model.to(torch.float32)) + sm.save(f"models/used/{model_name}_float32.pt") + + sm = torch.jit.script(style_model.to(torch.float16)) + sm.save(f"models/used/{model_name}_float16.pt") + + utils.save_image(args.output_image, output[0]) + + +def stylize_onnx(content_image, args): + """ + Read ONNX model and run it using onnxruntime + """ + + assert not args.export_onnx + + import onnxruntime + + ort_session = onnxruntime.InferenceSession(args.model) + + def to_numpy(tensor): + return ( + tensor.detach().cpu().numpy() + if tensor.requires_grad + else tensor.cpu().numpy() + ) + + ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(content_image)} + ort_outs = ort_session.run(None, ort_inputs) + img_out_y = ort_outs[0] + + return torch.from_numpy(img_out_y) + + +def main(): + main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style") + subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand") + + train_arg_parser = subparsers.add_parser("train", help="parser for training arguments") + train_arg_parser.add_argument("--epochs", type=int, default=2, + help="number of training epochs, default is 2") + train_arg_parser.add_argument("--batch-size", type=int, default=4, + help="batch size for training, default is 4") + train_arg_parser.add_argument("--dataset", type=str, required=True, + help="path to training dataset, the path should point to a folder " + "containing another folder with all the training images") + train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg", + help="path to style-image") + train_arg_parser.add_argument("--save-model-dir", type=str, required=True, + help="path to folder where trained model will be saved.") + train_arg_parser.add_argument("--checkpoint-model-dir", type=str, default=None, + help="path to folder where checkpoints of trained models will be saved") + train_arg_parser.add_argument("--image-size", type=int, default=256, + help="size of training images, default is 256 X 256") + train_arg_parser.add_argument("--style-size", type=int, default=None, + help="size of style-image, default is the original size of style image") + train_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + train_arg_parser.add_argument("--seed", type=int, default=42, + help="random seed for training") + train_arg_parser.add_argument("--content-weight", type=float, default=1e5, + help="weight for content-loss, default is 1e5") + train_arg_parser.add_argument("--style-weight", type=float, default=1e10, + help="weight for style-loss, default is 1e10") + train_arg_parser.add_argument("--lr", type=float, default=1e-3, + help="learning rate, default is 1e-3") + train_arg_parser.add_argument("--log-interval", type=int, default=500, + help="number of images after which the training loss is logged, default is 500") + train_arg_parser.add_argument("--checkpoint-interval", type=int, default=2000, + help="number of batches after which a checkpoint of the trained model will be created") + + eval_arg_parser = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments") + eval_arg_parser.add_argument("--content-image", type=str, required=True, + help="path to content image you want to stylize") + eval_arg_parser.add_argument("--content-scale", type=float, default=None, + help="factor for scaling down the content image") + eval_arg_parser.add_argument("--output-image", type=str, required=True, + help="path for saving the output image") + eval_arg_parser.add_argument("--model", type=str, required=True, + help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") + eval_arg_parser.add_argument("--export_onnx", type=str, + help="export ONNX model to a given file") + eval_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + + + export_arg_parser = subparsers.add_parser("export", help="parser for exporting trained style transfer model") + # export_arg_parser.add_argument("--content-scale", type=float, default=None, + # help="factor for scaling down the content image") + export_arg_parser.add_argument("--model", type=str, required=True, + help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") + export_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + + args = main_arg_parser.parse_args() + + if args.subcommand is None: + print("ERROR: specify either train or eval") + sys.exit(1) + if args.accel and not available_accelerator(): + print("ERROR: accelerator is not available, try running on CPU") + sys.exit(1) + if not args.accel and available_accelerator(): + print("WARNING: accelerator is available, run with --accel to enable it") + + if args.subcommand == "train": + check_paths(args) + train(args) + elif args.subcommand == "eval": + stylize(args) + elif args.subcommand == "export": + export_model(args,device=torch.device('cpu')) + export_model(args) + + +if __name__ == "__main__": + main() diff --git a/demos/video/style-transfer/neural_style_dev_server_working.py b/demos/video/style-transfer/neural_style_dev_server_working.py new file mode 100644 index 000000000..d48e9dfd1 --- /dev/null +++ b/demos/video/style-transfer/neural_style_dev_server_working.py @@ -0,0 +1,368 @@ +print('importing') +import argparse +import os +import sys +import time +import re + +import numpy as np +import torch +from torch.optim import Adam +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision import transforms +import torch.onnx + +import utils +from transformer_net import TransformerNet +from vgg import Vgg16 + +torch.cuda.empty_cache() + + +def available_accelerator(): + """ + Check if accelerator is available. + """ + return True + # return torch.cuda.is_available() or torch.backends.mps.is_available() + +def current_accelerator(args): + """ + Get the current accelerator. + """ + print('curr accel called') + return torch.device("cuda") + if args.accel and available_accelerator(): + if torch.cuda.is_available(): + return torch.device("cuda") + elif torch.backends.mps.is_available(): + return torch.device("mps") + else: + raise RuntimeError("No accelerator available") + else: + return torch.device("cpu") + +def check_paths(args): + try: + if not os.path.exists(args.save_model_dir): + os.makedirs(args.save_model_dir) + if args.checkpoint_model_dir is not None and not (os.path.exists(args.checkpoint_model_dir)): + os.makedirs(args.checkpoint_model_dir) + except OSError as e: + print(e) + sys.exit(1) + + +def train(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + # device = torch.device('cuda') + + print(f"Using device: {device}") + + args.seed = 0 + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + transform = transforms.Compose([ + transforms.Resize(args.image_size), + transforms.CenterCrop(args.image_size), + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + print('constructed transform') + train_dataset = datasets.ImageFolder(args.dataset, transform) + train_loader = DataLoader(train_dataset, batch_size=args.batch_size) + print('constructed transform loaders') + + print('making transformer.') + transformer = TransformerNet().to(device) + print('constructed TransformerNet') + optimizer = Adam(transformer.parameters(), args.lr) + print('created adam optimizer') + mse_loss = torch.nn.MSELoss() + + vgg = Vgg16(requires_grad=False).to(device) + print('constructed vgg16') + style_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + print('constructed style_transform') + print('block 1') + style = utils.load_image(args.style_image, size=args.style_size) + print('block 2') + style = style_transform(style) + print('block 3') + style = style.repeat(args.batch_size, 1, 1, 1).to(device) + print('block 4') + + features_style = vgg(utils.normalize_batch(style)) + print('block 5') + gram_style = [utils.gram_matrix(y) for y in features_style] + print('block 6') + + for e in range(args.epochs): + print(f'epochs: {e}') + transformer.train() + agg_content_loss = 0. + agg_style_loss = 0. + count = 0 + for batch_id, (x, _) in enumerate(train_loader): + # print(f'batch {batch_id}') + n_batch = len(x) + count += n_batch + optimizer.zero_grad() + + x = x.to(device) + y = transformer(x) + + y = utils.normalize_batch(y) + x = utils.normalize_batch(x) + + features_y = vgg(y) + features_x = vgg(x) + + content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) + + style_loss = 0. + for ft_y, gm_s in zip(features_y, gram_style): + gm_y = utils.gram_matrix(ft_y) + style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) + style_loss *= args.style_weight + + total_loss = content_loss + style_loss + total_loss.backward() + optimizer.step() + + agg_content_loss += content_loss.item() + agg_style_loss += style_loss.item() + + if (batch_id + 1) % args.log_interval == 0: + mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( + time.ctime(), e + 1, count, len(train_dataset), + agg_content_loss / (batch_id + 1), + agg_style_loss / (batch_id + 1), + (agg_content_loss + agg_style_loss) / (batch_id + 1) + ) + print(mesg) + + if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: + transformer.eval().cpu() + ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" + ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) + torch.save(transformer.state_dict(), ckpt_model_path) + transformer.to(device).train() + + # save model + transformer.eval().cpu() + save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( + args.content_weight) + "_" + str(args.style_weight) + ".model" + save_model_path = os.path.join(args.save_model_dir, save_model_filename) + torch.save(transformer.state_dict(), save_model_path) + + print("\nDone, trained model saved at", save_model_path) + + +def export_model(args,device=None): + if device is None: + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + with torch.no_grad(): + style_model = TransformerNet() + state_dict = torch.load(args.model) + # remove saved deprecated running_* keys in InstanceNorm from the checkpoint + for k in list(state_dict.keys()): + if re.search(r'in\d+\.running_(mean|var)$', k): + del state_dict[k] + style_model.load_state_dict(state_dict) + style_model.to(device) + style_model.eval() + + from pathlib import Path + model_name = Path(args.model).stem + + export_dir = Path(f'models/exports/{device.type}') + export_dir.mkdir(parents=True, exist_ok=True) + + sm = torch.jit.script(style_model.to(torch.float32)) + sm.save(f"models/exports/{device.type}/{model_name}_float32.pt") + + sm = torch.jit.script(style_model.to(torch.float16)) + sm.save(f"models/exports/{device.type}/{model_name}_float16.pt") + + +def stylize(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + content_image = utils.load_image(args.content_image, scale=args.content_scale) + content_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + content_image = content_transform(content_image) + content_image = content_image.unsqueeze(0).to(device) + + if args.model.endswith(".onnx"): + output = stylize_onnx(content_image, args) + else: + with torch.no_grad(): + style_model = TransformerNet() + state_dict = torch.load(args.model) + # remove saved deprecated running_* keys in InstanceNorm from the checkpoint + for k in list(state_dict.keys()): + if re.search(r'in\d+\.running_(mean|var)$', k): + del state_dict[k] + style_model.load_state_dict(state_dict) + style_model.to(device) + style_model.eval() + if args.export_onnx: + assert args.export_onnx.endswith(".onnx"), "Export model file should end with .onnx" + output = torch.onnx._export( + style_model, content_image, args.export_onnx, opset_version=11, + ).cpu() + else: + print('Content image shape:', content_image.shape) + output = style_model(content_image).cpu() + + utils.save_image(args.output_image, output[0]) + from pathlib import Path + model_name = Path(args.model).stem + + sm = torch.jit.script(style_model.to(torch.float32)) + sm.save(f"models/used/{model_name}_float32.pt") + + sm = torch.jit.script(style_model.to(torch.float16)) + sm.save(f"models/used/{model_name}_float16.pt") + + utils.save_image(args.output_image, output[0]) + + +def stylize_onnx(content_image, args): + """ + Read ONNX model and run it using onnxruntime + """ + + assert not args.export_onnx + + import onnxruntime + + ort_session = onnxruntime.InferenceSession(args.model) + + def to_numpy(tensor): + return ( + tensor.detach().cpu().numpy() + if tensor.requires_grad + else tensor.cpu().numpy() + ) + + ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(content_image)} + ort_outs = ort_session.run(None, ort_inputs) + img_out_y = ort_outs[0] + + return torch.from_numpy(img_out_y) + + +def main(): + print('im in main') + main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style") + subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand") + + train_arg_parser = subparsers.add_parser("train", help="parser for training arguments") + train_arg_parser.add_argument("--epochs", type=int, default=2, + help="number of training epochs, default is 2") + train_arg_parser.add_argument("--batch-size", type=int, default=4, + help="batch size for training, default is 4") + train_arg_parser.add_argument("--dataset", type=str, required=True, + help="path to training dataset, the path should point to a folder " + "containing another folder with all the training images") + train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg", + help="path to style-image") + train_arg_parser.add_argument("--save-model-dir", type=str, required=True, + help="path to folder where trained model will be saved.") + train_arg_parser.add_argument("--checkpoint-model-dir", type=str, default=None, + help="path to folder where checkpoints of trained models will be saved") + train_arg_parser.add_argument("--image-size", type=int, default=256, + help="size of training images, default is 256 X 256") + train_arg_parser.add_argument("--style-size", type=int, default=None, + help="size of style-image, default is the original size of style image") + train_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + train_arg_parser.add_argument("--seed", type=int, default=42, + help="random seed for training") + train_arg_parser.add_argument("--content-weight", type=float, default=1e5, + help="weight for content-loss, default is 1e5") + train_arg_parser.add_argument("--style-weight", type=float, default=1e10, + help="weight for style-loss, default is 1e10") + train_arg_parser.add_argument("--lr", type=float, default=1e-3, + help="learning rate, default is 1e-3") + train_arg_parser.add_argument("--log-interval", type=int, default=500, + help="number of images after which the training loss is logged, default is 500") + train_arg_parser.add_argument("--checkpoint-interval", type=int, default=2000, + help="number of batches after which a checkpoint of the trained model will be created") + + eval_arg_parser = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments") + eval_arg_parser.add_argument("--content-image", type=str, required=True, + help="path to content image you want to stylize") + eval_arg_parser.add_argument("--content-scale", type=float, default=None, + help="factor for scaling down the content image") + eval_arg_parser.add_argument("--output-image", type=str, required=True, + help="path for saving the output image") + eval_arg_parser.add_argument("--model", type=str, required=True, + help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") + eval_arg_parser.add_argument("--export_onnx", type=str, + help="export ONNX model to a given file") + eval_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + + + export_arg_parser = subparsers.add_parser("export", help="parser for exporting trained style transfer model") + # export_arg_parser.add_argument("--content-scale", type=float, default=None, + # help="factor for scaling down the content image") + export_arg_parser.add_argument("--model", type=str, required=True, + help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") + export_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + + args = main_arg_parser.parse_args() + + print('args parsed') + print(vars(args)) + + + + if args.subcommand is None: + print("ERROR: specify either train or eval") + sys.exit(1) + # if args.accel and not available_accelerator(): + # print("ERROR: accelerator is not available, try running on CPU") + # sys.exit(1) + # if not args.accel and available_accelerator(): + # print("WARNING: accelerator is available, run with --accel to enable it") + + if args.subcommand == "train": + check_paths(args) + print("Training...") + train(args) + elif args.subcommand == "eval": + stylize(args) + elif args.subcommand == "export": + export_model(args,device=torch.device('cpu')) + export_model(args) + + +if __name__ == "__main__": + main() diff --git a/demos/video/style-transfer/neural_style_dev_server_working_2.py b/demos/video/style-transfer/neural_style_dev_server_working_2.py new file mode 100644 index 000000000..f0930597f --- /dev/null +++ b/demos/video/style-transfer/neural_style_dev_server_working_2.py @@ -0,0 +1,378 @@ +print('importing') +import argparse +import os +import sys +import time +import re + +import numpy as np +import torch +from torch.optim import Adam +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision import transforms +import torch.onnx + +import utils +from transformer_net import TransformerNet +from vgg import Vgg16 + +torch.cuda.empty_cache() + + +def available_accelerator(): + """ + Check if accelerator is available. + """ + return True + # return torch.cuda.is_available() or torch.backends.mps.is_available() + +def current_accelerator(args): + """ + Get the current accelerator. + """ + print('curr accel called') + return torch.device("cuda") + if args.accel and available_accelerator(): + if torch.cuda.is_available(): + return torch.device("cuda") + elif torch.backends.mps.is_available(): + return torch.device("mps") + else: + raise RuntimeError("No accelerator available") + else: + return torch.device("cpu") + +def check_paths(args): + try: + if not os.path.exists(args.save_model_dir): + os.makedirs(args.save_model_dir) + if args.checkpoint_model_dir is not None and not (os.path.exists(args.checkpoint_model_dir)): + os.makedirs(args.checkpoint_model_dir) + except OSError as e: + print(e) + sys.exit(1) + +import math +def to_sci_not(x): + exp = math.floor(math.log10(abs(x))) + multiplier = 10 ** exp + return (x / multiplier), exp + +def get_file_name(args): + base_name,_ = os.path.splitext(os.path.basename(args.style_image)) + return f'{base_name}_ep{args.epochs}_bt{args.batch_size}_sw{to_sci_not(args.style_weight)}_cw_{to_sci_not(args.content_weight)}' + +def train(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + # device = torch.device('cuda') + + print(f"Using device: {device}") + + args.seed = 0 + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + transform = transforms.Compose([ + transforms.Resize(args.image_size), + transforms.CenterCrop(args.image_size), + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + print('constructed transform') + train_dataset = datasets.ImageFolder(args.dataset, transform) + train_loader = DataLoader(train_dataset, batch_size=args.batch_size) + print('constructed transform loaders') + + print('making transformer.') + transformer = TransformerNet().to(device) + print('constructed TransformerNet') + optimizer = Adam(transformer.parameters(), args.lr) + print('created adam optimizer') + mse_loss = torch.nn.MSELoss() + + vgg = Vgg16(requires_grad=False).to(device) + print('constructed vgg16') + style_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + print('constructed style_transform') + print('block 1') + style = utils.load_image(args.style_image, size=args.style_size) + print('block 2') + style = style_transform(style) + print('block 3') + style = style.repeat(args.batch_size, 1, 1, 1).to(device) + print('block 4') + + features_style = vgg(utils.normalize_batch(style)) + print('block 5') + gram_style = [utils.gram_matrix(y) for y in features_style] + print('block 6') + + for e in range(args.epochs): + print(f'epochs: {e}') + transformer.train() + agg_content_loss = 0. + agg_style_loss = 0. + count = 0 + for batch_id, (x, _) in enumerate(train_loader): + # print(f'batch {batch_id}') + n_batch = len(x) + count += n_batch + optimizer.zero_grad() + + x = x.to(device) + y = transformer(x) + + y = utils.normalize_batch(y) + x = utils.normalize_batch(x) + + features_y = vgg(y) + features_x = vgg(x) + + content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) + + style_loss = 0. + for ft_y, gm_s in zip(features_y, gram_style): + gm_y = utils.gram_matrix(ft_y) + style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) + style_loss *= args.style_weight + + total_loss = content_loss + style_loss + total_loss.backward() + optimizer.step() + + agg_content_loss += content_loss.item() + agg_style_loss += style_loss.item() + + if (batch_id + 1) % args.log_interval == 0: + mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( + time.ctime(), e + 1, count, len(train_dataset), + agg_content_loss / (batch_id + 1), + agg_style_loss / (batch_id + 1), + (agg_content_loss + agg_style_loss) / (batch_id + 1) + ) + print(mesg) + + if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: + transformer.eval().cpu() + ckpt_model_filename = get_file_name(args) + "_flash_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".model" + ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) + torch.save(transformer.state_dict(), ckpt_model_path) + transformer.to(device).train() + + # save model + transformer.eval().cpu() + # save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( + # args.content_weight) + "_" + str(args.style_weight) + ".model" + save_model_filename = get_file_name(args) + '.model' + save_model_path = os.path.join(args.save_model_dir, save_model_filename) + torch.save(transformer.state_dict(), save_model_path) + + print("\nDone, trained model saved at", save_model_path) + + +def export_model(args,device=None): + if device is None: + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + with torch.no_grad(): + style_model = TransformerNet() + state_dict = torch.load(args.model) + # remove saved deprecated running_* keys in InstanceNorm from the checkpoint + for k in list(state_dict.keys()): + if re.search(r'in\d+\.running_(mean|var)$', k): + del state_dict[k] + style_model.load_state_dict(state_dict) + style_model.to(device) + style_model.eval() + + from pathlib import Path + model_name = Path(args.model).stem + + export_dir = Path(f'models/exports/{device.type}') + export_dir.mkdir(parents=True, exist_ok=True) + + sm = torch.jit.script(style_model.to(torch.float32)) + sm.save(f"models/exports/{device.type}/{model_name}_float32.pt") + + sm = torch.jit.script(style_model.to(torch.float16)) + sm.save(f"models/exports/{device.type}/{model_name}_float16.pt") + + +def stylize(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + content_image = utils.load_image(args.content_image, scale=args.content_scale) + content_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + content_image = content_transform(content_image) + content_image = content_image.unsqueeze(0).to(device) + + if args.model.endswith(".onnx"): + output = stylize_onnx(content_image, args) + else: + with torch.no_grad(): + style_model = TransformerNet() + state_dict = torch.load(args.model) + # remove saved deprecated running_* keys in InstanceNorm from the checkpoint + for k in list(state_dict.keys()): + if re.search(r'in\d+\.running_(mean|var)$', k): + del state_dict[k] + style_model.load_state_dict(state_dict) + style_model.to(device) + style_model.eval() + if args.export_onnx: + assert args.export_onnx.endswith(".onnx"), "Export model file should end with .onnx" + output = torch.onnx._export( + style_model, content_image, args.export_onnx, opset_version=11, + ).cpu() + else: + print('Content image shape:', content_image.shape) + output = style_model(content_image).cpu() + + utils.save_image(args.output_image, output[0]) + from pathlib import Path + model_name = Path(args.model).stem + + sm = torch.jit.script(style_model.to(torch.float32)) + sm.save(f"models/used/{model_name}_float32.pt") + + sm = torch.jit.script(style_model.to(torch.float16)) + sm.save(f"models/used/{model_name}_float16.pt") + + utils.save_image(args.output_image, output[0]) + + +def stylize_onnx(content_image, args): + """ + Read ONNX model and run it using onnxruntime + """ + + assert not args.export_onnx + + import onnxruntime + + ort_session = onnxruntime.InferenceSession(args.model) + + def to_numpy(tensor): + return ( + tensor.detach().cpu().numpy() + if tensor.requires_grad + else tensor.cpu().numpy() + ) + + ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(content_image)} + ort_outs = ort_session.run(None, ort_inputs) + img_out_y = ort_outs[0] + + return torch.from_numpy(img_out_y) + + +def main(): + print('im in main') + main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style") + subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand") + + train_arg_parser = subparsers.add_parser("train", help="parser for training arguments") + train_arg_parser.add_argument("--epochs", type=int, default=2, + help="number of training epochs, default is 2") + train_arg_parser.add_argument("--batch-size", type=int, default=4, + help="batch size for training, default is 4") + train_arg_parser.add_argument("--dataset", type=str, required=True, + help="path to training dataset, the path should point to a folder " + "containing another folder with all the training images") + train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg", + help="path to style-image") + train_arg_parser.add_argument("--save-model-dir", type=str, required=True, + help="path to folder where trained model will be saved.") + train_arg_parser.add_argument("--checkpoint-model-dir", type=str, default=None, + help="path to folder where checkpoints of trained models will be saved") + train_arg_parser.add_argument("--image-size", type=int, default=256, + help="size of training images, default is 256 X 256") + train_arg_parser.add_argument("--style-size", type=int, default=None, + help="size of style-image, default is the original size of style image") + train_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + train_arg_parser.add_argument("--seed", type=int, default=42, + help="random seed for training") + train_arg_parser.add_argument("--content-weight", type=float, default=1e5, + help="weight for content-loss, default is 1e5") + train_arg_parser.add_argument("--style-weight", type=float, default=1e10, + help="weight for style-loss, default is 1e10") + train_arg_parser.add_argument("--lr", type=float, default=1e-3, + help="learning rate, default is 1e-3") + train_arg_parser.add_argument("--log-interval", type=int, default=500, + help="number of images after which the training loss is logged, default is 500") + train_arg_parser.add_argument("--checkpoint-interval", type=int, default=2000, + help="number of batches after which a checkpoint of the trained model will be created") + + eval_arg_parser = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments") + eval_arg_parser.add_argument("--content-image", type=str, required=True, + help="path to content image you want to stylize") + eval_arg_parser.add_argument("--content-scale", type=float, default=None, + help="factor for scaling down the content image") + eval_arg_parser.add_argument("--output-image", type=str, required=True, + help="path for saving the output image") + eval_arg_parser.add_argument("--model", type=str, required=True, + help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") + eval_arg_parser.add_argument("--export_onnx", type=str, + help="export ONNX model to a given file") + eval_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + + + export_arg_parser = subparsers.add_parser("export", help="parser for exporting trained style transfer model") + # export_arg_parser.add_argument("--content-scale", type=float, default=None, + # help="factor for scaling down the content image") + export_arg_parser.add_argument("--model", type=str, required=True, + help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") + export_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + + args = main_arg_parser.parse_args() + + print('args parsed') + print(vars(args)) + + + + if args.subcommand is None: + print("ERROR: specify either train or eval") + sys.exit(1) + # if args.accel and not available_accelerator(): + # print("ERROR: accelerator is not available, try running on CPU") + # sys.exit(1) + # if not args.accel and available_accelerator(): + # print("WARNING: accelerator is available, run with --accel to enable it") + + if args.subcommand == "train": + check_paths(args) + print("Training...") + train(args) + elif args.subcommand == "eval": + stylize(args) + elif args.subcommand == "export": + export_model(args,device=torch.device('cpu')) + export_model(args) + + +if __name__ == "__main__": + main() diff --git a/demos/video/style-transfer/saved_models.zip b/demos/video/style-transfer/saved_models.zip new file mode 100644 index 000000000..cdb980749 Binary files /dev/null and b/demos/video/style-transfer/saved_models.zip differ diff --git a/demos/video/style-transfer/saved_models/candy.pth b/demos/video/style-transfer/saved_models/candy.pth new file mode 100644 index 000000000..f7767f70a Binary files /dev/null and b/demos/video/style-transfer/saved_models/candy.pth differ diff --git a/demos/video/style-transfer/saved_models/mosaic.pth b/demos/video/style-transfer/saved_models/mosaic.pth new file mode 100644 index 000000000..7b75d0d82 Binary files /dev/null and b/demos/video/style-transfer/saved_models/mosaic.pth differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5.model new file mode 100644 index 000000000..440729bd8 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000.model new file mode 100644 index 000000000..b2d557b09 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_1_batch_id_8000.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000.model new file mode 100644 index 000000000..33b76bff7 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_16000.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000.model new file mode 100644 index 000000000..eb16d2571 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_flash_epoch_2_batch_id_2000.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5.model new file mode 100644 index 000000000..f4e67ce14 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000.model new file mode 100644 index 000000000..02481fbb4 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_16000.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000.model new file mode 100644 index 000000000..2a362bd23 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw1e11_cw_1e5_flash_epoch_2_batch_id_2000.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000.model new file mode 100644 index 000000000..0dbf5eefb Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_12000.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000.model new file mode 100644 index 000000000..4bc641533 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_0_batch_id_20000.model differ diff --git a/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000.model b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000.model new file mode 100644 index 000000000..c8457f6b4 Binary files /dev/null and b/demos/video/style-transfer/saved_models/nature_oil_painting_ep4_bt4_sw5e9_cw_1e5_flash_epoch_1_batch_id_8000.model differ diff --git a/demos/video/style-transfer/saved_models/oil_h_bt4_5e11_ep4_epoch_0_batch_id_18000.pth b/demos/video/style-transfer/saved_models/oil_h_bt4_5e11_ep4_epoch_0_batch_id_18000.pth new file mode 100644 index 000000000..71ed76034 Binary files /dev/null and b/demos/video/style-transfer/saved_models/oil_h_bt4_5e11_ep4_epoch_0_batch_id_18000.pth differ diff --git a/demos/video/style-transfer/saved_models/rain_princess.pth b/demos/video/style-transfer/saved_models/rain_princess.pth new file mode 100644 index 000000000..3a20f1710 Binary files /dev/null and b/demos/video/style-transfer/saved_models/rain_princess.pth differ diff --git a/demos/video/style-transfer/saved_models/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800.pth b/demos/video/style-transfer/saved_models/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800.pth new file mode 100644 index 000000000..521125012 Binary files /dev/null and b/demos/video/style-transfer/saved_models/starry_h_bt10_1e10_ep5_epoch_2_batch_id_4800.pth differ diff --git a/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2.model b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2.model new file mode 100644 index 000000000..7cbcd7233 Binary files /dev/null and b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2.model differ diff --git a/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_18000.pth b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_18000.pth new file mode 100644 index 000000000..c761339be Binary files /dev/null and b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_18000.pth differ diff --git a/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_4000.pth b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_4000.pth new file mode 100644 index 000000000..2a4a2ad72 Binary files /dev/null and b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_4000.pth differ diff --git a/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_6000.pth b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_6000.pth new file mode 100644 index 000000000..ba2a8f97f Binary files /dev/null and b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_0_batch_id_6000.pth differ diff --git a/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_1_batch_id_20000.pth b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_1_batch_id_20000.pth new file mode 100644 index 000000000..0374a90b0 Binary files /dev/null and b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_1_batch_id_20000.pth differ diff --git a/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_1_batch_id_8000.pth b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_1_batch_id_8000.pth new file mode 100644 index 000000000..735729f26 Binary files /dev/null and b/demos/video/style-transfer/saved_models/starry_v_bt4_1e10_ep2_epoch_1_batch_id_8000.pth differ diff --git a/demos/video/style-transfer/saved_models/udnie.pth b/demos/video/style-transfer/saved_models/udnie.pth new file mode 100644 index 000000000..85e384569 Binary files /dev/null and b/demos/video/style-transfer/saved_models/udnie.pth differ diff --git a/demos/video/style-transfer/sobel.ipynb b/demos/video/style-transfer/sobel.ipynb new file mode 100644 index 000000000..1eb6e48ee --- /dev/null +++ b/demos/video/style-transfer/sobel.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "id": "8299f21a", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "\n", + "sobel_dx = torch.tensor([[-1, 0, 1],\n", + " [-2, 0, 2],\n", + " [-1, 0, 1]], dtype=torch.float32)\n", + "\n", + "sobel_dy = torch.tensor([[-1, -2, -1],\n", + " [ 0, 0, 0],\n", + " [ 1, 2, 1]], dtype=torch.float32)\n", + "\n", + "kernel = torch.stack([sobel_dx, sobel_dy]) # [2,3,3]\n", + "kernel = kernel.unsqueeze(1).repeat(1, 3, 1, 1) # [2,3,3,3]\n", + "\n", + "def sobel_filter(img: torch.Tensor) -> torch.Tensor:\n", + " \"\"\"\n", + " img: Nx3xHxW float32 in [0,1] or [0,255]\n", + " returns: Nx2xHxW (channel 0 = ∂I/∂x, channel 1 = ∂I/∂y)\n", + " \"\"\"\n", + " return F.conv2d(img, kernel, padding=1)\n", + "\n", + "def sobel_magnitude(img: torch.Tensor) -> torch.Tensor:\n", + " g = sobel_filter(img)\n", + " return (g ** 2).sum(1, keepdim=True).sqrt()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "76b4a97a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([1, 2, 1428, 1904])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "img = torch.rand(1, 3, 1428, 1904)\n", + "sobel_filter(img).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2bb81778", + "metadata": {}, + "outputs": [], + "source": [ + "class Sobel(torch.nn.Module):\n", + " def __init__(self):\n", + " super(Sobel, self).__init__()\n", + " sobel_dx = torch.tensor([[-1, 0, 1],\n", + " [-2, 0, 2],\n", + " [-1, 0, 1]], dtype=torch.float32)\n", + "\n", + " sobel_dy = torch.tensor([[-1, -2, -1],\n", + " [ 0, 0, 0],\n", + " [ 1, 2, 1]], dtype=torch.float32)\n", + "\n", + " sobel_kernel = torch.stack([sobel_dx, sobel_dy]) # [2,3,3]\n", + " sobel_kernel = sobel_kernel.unsqueeze(1).repeat(1, 3, 1, 1) # [2,3,3,3]\n", + " sobel_kernel = sobel_kernel.to(torch.float32)\n", + "\n", + " self.sobel_kernel = torch.nn.Parameter(sobel_kernel, requires_grad=False)\n", + " # self.sobel_cnn = torch.nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1, bias=False).to(torch.float16)\n", + " # self.sobel_cnn.weight = torch.nn.Parameter(sobel_kernel, requires_grad=False)\n", + "\n", + " def forward(self, x):\n", + " # return self.sobel_cnn(x)\n", + " return F.conv2d(x, self.sobel_kernel, padding=1,stride=1)\n", + "\n", + "sobel = Sobel().to('mps').to(torch.float32)\n", + "sm = torch.jit.script(sobel)\n", + "sm.save(\"models/sobel_float32.pt\")\n", + "sobel = Sobel().to('mps').to(torch.float16)\n", + "sm = torch.jit.script(sobel)\n", + "sm.save(\"models/sobel_float16.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48b8c033", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "805e5f83", + "metadata": {}, + "outputs": [], + "source": [ + "m = torch.jit.load(\"models/sobel.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2b79f222", + "metadata": {}, + "outputs": [], + "source": [ + "img = torch.rand(1, 3, 1428, 1904).to(torch.float16)\n", + "existing_model_output = sobel(img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b29aea58", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "torch.Size([1, 2, 1428, 1904])\n", + "torch.Size([1, 2, 1428, 1904])\n" + ] + } + ], + "source": [ + "loaded_model_output = m(img)\n", + "print(torch.allclose(existing_model_output, loaded_model_output, atol=1e-5))\n", + "print(existing_model_output.shape)\n", + "print(loaded_model_output.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24038040", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-05-06 14:52:39.256 Python[52849:2569153] WARNING: AVCaptureDeviceTypeExternal is deprecated for Continuity Cameras. Please use AVCaptureDeviceTypeContinuityCamera and add NSCameraUseContinuityCameraDeviceType to your Info.plist.\n" + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], + "source": [ + "import cv2\n", + "\n", + "# Open the default camera\n", + "cam = cv2.VideoCapture(0)\n", + "\n", + "# Get the default frame width and height\n", + "frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))\n", + "frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", + "\n", + "# Define the codec and create VideoWriter object\n", + "fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n", + "out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (frame_width, frame_height))\n", + "\n", + "while True:\n", + " ret, frame = cam.read()\n", + "\n", + " # Write the frame to the output file\n", + " out.write(frame)\n", + "\n", + " # Display the captured frame\n", + " cv2.imshow('Camera', frame)\n", + "\n", + " # Press 'q' to exit the loop\n", + " if cv2.waitKey(1) == ord('q'):\n", + " break\n", + "\n", + "# Release the capture and writer objects\n", + "cam.release()\n", + "out.release()\n", + "cv2.destroyAllWindows()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad2f6628", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/demos/video/style-transfer/sobel.py b/demos/video/style-transfer/sobel.py new file mode 100644 index 000000000..1f920047f --- /dev/null +++ b/demos/video/style-transfer/sobel.py @@ -0,0 +1,267 @@ +import cv2 +import torch +import numpy as np +import utils +import torchvision +import argparse + + + +# default_device = torch.device('cpu') +# if torch.backends.mps.is_available(): +# default_device = torch.device('mps') +# print('using mps') + +# if torch.backends.cuda.is_available(): +# default_device = torch.device('cuda') +# print('using cuda') + +# print('using default device:', default_device) + +# torch.set_default_device(default_device) + + + +# Open the default camera +cam = cv2.VideoCapture(0) + +# Get the default frame width and height +frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)) +frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) + +# Define the codec and create VideoWriter object +fourcc = cv2.VideoWriter_fourcc(*'mp4v') +# out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (frame_width, frame_height)) + + +import torch.nn.functional as F + + +# sobel_dx = torch.tensor([[-1, 0, 1], +# [-2, 0, 2], +# [-1, 0, 1]], dtype=torch.float32) + +# sobel_dy = torch.tensor([[-1, -2, -1], +# [ 0, 0, 0], +# [ 1, 2, 1]], dtype=torch.float32) + +# kernel = torch.cat([sobel_dx.unsqueeze(0), sobel_dy.unsqueeze(0)],0) # [2,3,3] +# kernel = kernel.unsqueeze(1).to('mps') # [2,3,3,3] + +# # kernel = kernel.unsqueeze(1).repeat(1, 3, 1, 1).to('mps') # [2,3,3,3] + +# def sobel_filter(img: torch.Tensor) -> torch.Tensor: +# """ +# img: Nx3xHxW float32 in [0,1] or [0,255] +# returns: Nx2xHxW (channel 0 = ∂I/∂x, channel 1 = ∂I/∂y) +# """ +# return F.conv2d(img, kernel, padding=1) + +# def sobel_magnitude(img: torch.Tensor) -> torch.Tensor: +# g = sobel_filter(img) +# return (g ** 2).sum(1, keepdim=True).sqrt() + + +def sobel_edges(rgb: torch.Tensor) -> torch.Tensor: + """ + rgb : (N, 3, H, W) float tensor in the range [0, 1] or [-1, 1] + (any range is fine as long as it's float) + + Returns + ------- + edges : (N, 3, H, W) tensor – per‑channel Sobel edge magnitude, + same H and W as the input (no cropping or padding artifacts). + """ + # --- 1. Build Sobel kernels ------------------------------------------------ + sobel_x = torch.tensor([[-1., 0., 1.], + [-2., 0., 2.], + [-1., 0., 1.]],requires_grad=False).to('mps') + sobel_y = sobel_x.T + + # Each colour channel must be convolved with *its own* kernel. + # We therefore use depth‑wise (grouped) convolution with groups=3. + # Weight shape for conv2d: (out_channels, in_channels/groups, kH, kW) + # Here: out_channels = in_channels = 3 and groups = 3 + weight_x = sobel_x.expand(3, 1, 3, 3).to(rgb) # (3,1,3,3) + weight_y = sobel_y.expand(3, 1, 3, 3).to(rgb) + + # --- 2. Apply the 2D convolutions ----------------------------------------- + # Kernel size is 3 ⇒ one‑pixel border is enough to keep size unchanged. + grad_x = F.conv2d(rgb, weight_x, padding=1, groups=3) + grad_y = F.conv2d(rgb, weight_y, padding=1, groups=3) + + # --- 3. Edge magnitude per channel ---------------------------------------- + # A small epsilon avoids a zero‑gradient sqrt warning. + edges = torch.sqrt(grad_x**2 + grad_y**2 + 1e-6) + + return edges + + +def tensor_to_bgr(frame_tensor, *, undo_normalise=False, mean=None, std=None): + """ + Args + ---- + frame_tensor : torch.Tensor + (C,H,W) or (1,C,H,W) ― float or half ― RGB + undo_normalise : bool + True if you previously applied (x - mean) / std + mean, std : list/tuple of 3 floats + Same numbers you used for normalising (e.g. ImageNet) + Returns + ------- + frame_bgr : np.ndarray (H,W,3) uint8 BGR contiguous + """ + # 1) squeeze batch dimension if present + if frame_tensor.ndim == 4: + frame_tensor = frame_tensor[0] + + # 2) move to CPU & float32 for math + img = frame_tensor.detach() + + # 3) (optional) reverse mean/std normalisation + if undo_normalise: + if mean is None or std is None: + raise ValueError("Supply mean and std to undo normalisation") + mean = torch.tensor(mean).to(img).view(3,1,1) + std = torch.tensor(std).to(img).view(3,1,1) + img = img * std + mean + + # 4) scale back to 0‑255, clamp, uint8 + img = (img * 255.0) + # img = img # .to(torch.float16) + img = img.clamp(0,255).byte() + + # 5) channel‑last & numpy + img = img.permute(1,2,0).cpu().numpy() # H,W,C RGB + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # → BGR + img = np.ascontiguousarray(img) # ensure OpenCV‑happy + return img + + +def undo_normalize(tensor): + mean = (0.485, 0.456, 0.406) # ImageNet defaults (RGB) + std = (0.229, 0.224, 0.225) + mean = torch.tensor(mean, dtype=tensor.dtype, device=tensor.device)[:, None, None] + std = torch.tensor(std, dtype=tensor.dtype, device=tensor.device)[:, None, None] + return (tensor * std + mean).clamp(0, 1) + + +class Sobel(torch.nn.Module): + def __init__(self): + super(Sobel, self).__init__() + + # self.sobel_kernel = torch.nn.Parameter(sobel_kernel, requires_grad=False) + # self.sobel_cnn = torch.nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1, bias=False).to(torch.float16) + # self.sobel_cnn.weight = torch.nn.Parameter(sobel_kernel, requires_grad=False) + + def forward(self, rgb): + # return self.sobel_cnn(x) + sobel_x = torch.tensor([[-1., 0., 1.], + [-2., 0., 2.], + [-1., 0., 1.]],requires_grad=False) + sobel_y = sobel_x.T + + # Each colour channel must be convolved with *its own* kernel. + # We therefore use depth‑wise (grouped) convolution with groups=3. + # Weight shape for conv2d: (out_channels, in_channels/groups, kH, kW) + # Here: out_channels = in_channels = 3 and groups = 3 + weight_x = sobel_x.expand(3, 1, 3, 3).to(rgb) # (3,1,3,3) + weight_y = sobel_y.expand(3, 1, 3, 3).to(rgb) + + # --- 2. Apply the 2D convolutions ----------------------------------------- + # Kernel size is 3 ⇒ one‑pixel border is enough to keep size unchanged. + grad_x = F.conv2d(rgb, weight_x, padding=1, groups=3) + grad_y = F.conv2d(rgb, weight_y, padding=1, groups=3) + + # --- 3. Edge magnitude per channel ---------------------------------------- + # A small epsilon avoids a zero‑gradient sqrt warning. + edges = torch.sqrt(grad_x**2 + grad_y**2 + 1e-6) + # edges = grad_x + grad_y + return edges + + +sobel = Sobel().to('mps').to(torch.float32) +sm = torch.jit.script(sobel) +sm.save("models/sobel_edge_float32.pt") + +sm = torch.jit.load("models/sobel_edge_float32.pt") +# sm = torch.jit.load("models/mosaic_float32.pt") +# sm.to('mps') + +mosaic = torch.jit.load("models/mosaic_float16.pt") +mosaic.to('mps') + +# print(sm) + +import sys +# sys.exit(0) +import time + +ticks = 1 + +while True: + ret, frame_bgr = cam.read() + + # Write the frame to the output file + # out.write(frame) + + frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) + + # 3) Ensure the array is contiguous (torch needs it) ------------------------- + frame_rgb = np.ascontiguousarray(frame_rgb) + + # 4) numpy -> torch, move channels, scale, add batch if wanted -------------- + tensor = torch.from_numpy(frame_rgb) # H x W x C, uint8 → int tensor + tensor = tensor.to("mps", non_blocking=True) + + tensor = tensor.permute(2, 0, 1) # C x H x W + tensor = tensor.to(torch.float32).div(255.0) # float32, [0,1] + + # normalize tensor to ImageNet mean and std + # mean = (0.485, 0.456, 0.406) # ImageNet defaults (RGB) + # std = (0.229, 0.224, 0.225) + # mean = torch.tensor(mean, dtype=tensor.dtype, device=tensor.device)[:, None, None] + # std = torch.tensor(std, dtype=tensor.dtype, device=tensor.device)[:, None, None] + # tensor.sub_(mean).div_(std) + + # 5) (Optional) add a batch dim and push to GPU ------------------------------ + tensor = tensor.unsqueeze(0) # 1 x C x H x W + + # if ticks == 3: + # tensor = tensor.to(torch.float16) + # mosaic = torch.jit.load("models/mosaic_float16.pt") + # mosaic.to('mps') + # mosaic_output = mosaic(tensor) / 255.0 + # # mosaic_output = undo_normalize(mosaic_output) + # print('input:',tensor.shape,tensor.dtype) + # print('mosaic output:',mosaic_output.shape,mosaic_output.dtype) + # torchvision.utils.save_image(tensor[0], 'input_tensor.png') + # torchvision.utils.save_image(mosaic_output[0], 'mosaic_output.png') + + # sys.exit(0) + + output_tensor = sm(tensor.to(torch.float16)) + # print('input:',tensor.shape,tensor.dtype) + # print('output:',output_tensor.shape) + + + # frame_bgr_out = tensor_to_bgr(output_tensor, undo_normalise=True,mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + frame_bgr_out = tensor_to_bgr(output_tensor) + + # Display the captured frame + cv2.imshow('Camera', frame_bgr_out) + + # time.sleep(1.0) + + # Press 'q' to exit the loop + # if ticks > 10: + # break + + if cv2.waitKey(1) == ord('q'): + break + ticks += 1 + +# Release the capture and writer objects +cam.release() +# out.release() +cv2.destroyAllWindows() \ No newline at end of file diff --git a/demos/video/style-transfer/style_transfer.cpp b/demos/video/style-transfer/style_transfer.cpp new file mode 100644 index 000000000..09fd1462e --- /dev/null +++ b/demos/video/style-transfer/style_transfer.cpp @@ -0,0 +1,312 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + + +int run_webcam_model(torch::jit::Module& module, int cam_index, int max_fps, bool is_video_loop, std::string vid_path); + +static torch::Device default_device_st = torch::Device(torch::kMPS); + + +torch::jit::Module load_model(const std::string& model_path) { + std::cout << "Loading model from path: " << model_path << std::endl; + torch::jit::Module module; + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + module = torch::jit::load(model_path); + std::cout << "Model loaded successfully." << std::endl; + + std::cout << "Moving model to device..." << std::endl; + auto device = cvtool::get_default_device(); + module.to(device); + std::cout << "Model moved to device." << std::endl; + + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n" << e.msg(); + } + std::cout << "Model loaded successfully." << std::endl; + return module; + +} + +torch::Tensor preprocess_input(const torch::Tensor& input) { + // Preprocess the input tensor as needed + // For example, normalize the input tensor + // auto mean = torch::tensor({0.485, 0.456, 0.406}).view({1, 3, 1, 1}); + // auto std = torch::tensor({0.229, 0.224, 0.225}).view({1, 3, 1, 1}); + // return (input - mean) / std; + return input; +} + +at::Tensor run_model(torch::jit::Module& module, const at::Tensor& input) { + + auto input_dtype = input.dtype(); + // std::cout.flush(); + // std::cout << "Input dtype: " << input.dtype() << std::endl; + // std::cout << "Input sizes: " << input.sizes() << std::endl; + // std::cout << "Input device: " << input.device() << std::endl; + // std::cout.flush(); + // std::system("pause"); + + // auto model_dtype = module.dtype(); + // std::cout << "Module: " << module << std::endl; + + + // module.to(torch::kMPS); + // module.eval(); + + + std::vector inputs; + inputs.push_back(input); + + // std::cout << "Input tensor: " << input.sizes() << std::endl; + // auto output = module.forward(inputs).toTensor(); + auto output = module.forward(inputs).toTensor(); + + // std::cout << "Model output: " << output.sizes() << std::endl; + return output; +} + +torch::Tensor eval_model(torch::jit::Module& module, const torch::Tensor& input) { + std::vector inputs; + inputs.push_back(input); + + // Forward pass + auto output = module.forward(inputs).toTensor(); + + return output; +} + +torch::indexing::Slice slice() { + return torch::indexing::Slice(); +} + +torch::Tensor test_channel(torch::Tensor& input) { + std::cout << "Input device: " << input.device() << std::endl; + + int channel_to_disable = 0; + // auto img = input.select(1, channel_to_disable).zero(); + auto output = input.clone(); + output.select(1, channel_to_disable).zero_(); + // auto output = img; + return output; +} + + + +int main() { + // Load the model + // std::string model_path = "style-transfer/models/my_module.pt"; + // torch::jit::Module module = load_model(model_path); + // torch::Tensor input = torch::randn({10}); + // torch::Tensor output = run_model(module, input); + + + if (torch::mps::is_available()) { + default_device_st = torch::Device(torch::kMPS); + std::cout << "MPS is available and set as the default device." << std::endl; + } else { + default_device_st = torch::Device(torch::kCPU); + std::cout << "MPS is not available. Using CPU instead. " << std::endl; + } + cvtool::set_default_device(default_device_st); + + auto device = cvtool::get_default_device(); + + // default_device = default_device_st; + + // std::string model_path = "style-transfer/models/mosaic_float32.pt"; + std::string model_path = "style-transfer/models/mosaic_float16.pt" ; + torch::jit::Module module = load_model(model_path); +/* + // module.to(torch::kFloat16); + torch::Tensor input = torch::randn({1, 3, 1080, 1920}, device); + std::cout << "Input tensor: " << input.sizes() << std::endl; + std::cout << "Input tensor dtype: " << input.dtype() << std::endl; + std::cout << "Input tensor device: " << input.device() << std::endl; + // std::cout << "Model device: " << module.device() << std::endl; + // std::cout << "Model dtype: " << module.dtype() << std::endl; + + torch::Tensor output = run_model(module, input); + + // Print the output tensor + std::cout << "Output tensor: " << output.sizes() << std::endl; +*/ + return run_webcam_model(module, 0, 60, false, ""); + +} + + +int run_webcam_model(torch::jit::Module& module, int cam_index, int max_fps, bool is_video_loop, std::string vid_path = "") { + + torch::Device device = cvtool::get_default_device(); + + module.eval(); + module.to(device); + + bool video_loop = false; + cv::VideoCapture cap; + if (is_video_loop) { + cap = open_camera(vid_path); + video_loop = true; + } else { + cap = open_camera(cam_index); + } + + auto camera_resolution = get_camera_resolution(cap); + int height = std::get<0>(camera_resolution); + int width = std::get<1>(camera_resolution); + + + cv::Mat frame_bgr; + cv::Mat output_bgr; + + const auto to_mps = [&](torch::Tensor& t){ return device.is_mps() ? t.to(device, /*non_blocking=*/true) : t; }; + + torch::NoGradGuard no_grad; // inference only + + std::chrono::time_point start_total = std::chrono::system_clock::now(); + std::chrono::time_point last_update = std::chrono::system_clock::now(); + + size_t frame_count = 0; + size_t last_frame_count = 0; + + while (true) { + // std::cout << "\r[INFO] Processing frame... " << frame_count + 1 << std::flush; + + if (!cap.read(frame_bgr) || frame_bgr.empty()) { + if (video_loop && frame_count > 0) { + cap = open_camera(vid_path); + frame_count = 0; + last_frame_count = 0; + start_total = std::chrono::system_clock::now(); + last_update = std::chrono::system_clock::now(); // ??? not sure + cap.set(cv::CAP_PROP_POS_FRAMES, 0); + std::cout << "[INFO] Replaying video..." << std::endl; + continue; + } + std::cerr << "[WARN] Empty frame, exiting" << std::endl; + break; + } + + + ++frame_count; + const std::chrono::time_point now = std::chrono::system_clock::now(); + auto delta = now - last_update; + double delta_time = std::chrono::duration_cast>(delta).count(); + auto fps = 1.0 / delta_time; + std::cout << "\r[INFO] FPS: " << fps << " fps" << std::flush; + double sleep_time = (1.0 / ((double)max_fps)) - delta_time; + std::this_thread::sleep_for(std::chrono::duration(sleep_time)); + + + + bool skip = true; + if (skip) { + + auto start = std::chrono::high_resolution_clock::now(); + + cv::Mat frame_rgb; + cv::cvtColor(frame_bgr, frame_rgb, cv::COLOR_BGR2RGB); + + auto input_tensor = to_tensor(frame_rgb,device); + + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + std::cout << "Elapsed time (1): " << elapsed.count() * 1000.0 << " ms\n"; + + + // // // works + start = std::chrono::high_resolution_clock::now(); + auto input = input_tensor.div_(255.0); + end = std::chrono::high_resolution_clock::now(); + elapsed = end - start; + std::cout << "Elapsed time (2): " << elapsed.count() * 1000.0 << " ms\n"; + + // auto input = input_tensor.to(device,true).to(torch::kFloat16) / 255.0; + + start = std::chrono::high_resolution_clock::now(); + auto model_output = run_model(module,input).div_(255.0); + end = std::chrono::high_resolution_clock::now(); + elapsed = end - start; + std::cout << "Elapsed time (3): " << elapsed.count() * 1000.0 << " ms\n"; + + + start = std::chrono::high_resolution_clock::now(); + output_bgr = to_mat(model_output, cv::COLOR_RGB2BGR); + end = std::chrono::high_resolution_clock::now(); + elapsed = end - start; + std::cout << "Elapsed time (4): " << elapsed.count() * 1000.0 << " ms\n"; + + // // works + // auto processed_input = prepped_input; + // auto out_processed_input = processed_input.to(torch::kCPU,true); + // frame_rgb = to_mat(out_processed_input); + // cv::cvtColor(frame_rgb, output_bgr, cv::COLOR_RGB2BGR); + + // works + // auto out_mps_tensor = mps_tensor.to(torch::kCPU,true); + // frame_rgb = to_mat(out_mps_tensor); + // cv::cvtColor(frame_rgb, output_bgr, cv::COLOR_RGB2BGR); + + + + // // works + // frame_rgb = to_mat(input_tensor); + // cv::cvtColor(frame_rgb, output_bgr, cv::COLOR_RGB2BGR); + + + + } else { + + auto input_tensor = to_tensor(frame_bgr); + + auto mps_tensor = input_tensor.to(device,true); + + auto prepped_input = preprocess_input(mps_tensor); + + // Forward pass + auto output = eval_model(module, prepped_input); + auto processed_output = output.to(torch::kCPU,true); + + output_bgr = to_mat(processed_output); + } + + cv::imshow("webcam", output_bgr); + + // Display FPS + + // std::cout << "[INFO] dt: " << delta_time << std::endl; + // std::cout << "[INFO] FPS: " << fps << std::endl; + + + + + // std::thread::sleep_for(std::chrono::milliseconds(700)); + + // std::thread::sleep_for() + + + // std::thread::sleep_for(std::chrono::milliseconds(expected_time_index - (delta_time + last_time_index))); + + + last_frame_count = frame_count; + last_update = now; // std::chrono::system_clock::now(); + if (cv::waitKey(1) == 27) { // ESC key + break; + } + } + + cap.release(); + cv::destroyAllWindows(); + return 0; +} \ No newline at end of file diff --git a/demos/video/style-transfer/style_transfer_test.py b/demos/video/style-transfer/style_transfer_test.py new file mode 100644 index 000000000..7bc5436bf --- /dev/null +++ b/demos/video/style-transfer/style_transfer_test.py @@ -0,0 +1,205 @@ +import argparse +import os +import sys +import time +from pathlib import Path + + +import numpy as np +import torch +import torch.nn as nn +import torchvision +import cv2 +import utils + +def dir_path(path): + if os.path.isdir(path): + return path + else: + raise argparse.ArgumentTypeError(f'readable_dir:{path} is not a valid path') + +def torch_device(device_name): + if device_name == 'cpu': + return torch.device('cpu') + elif device_name == 'cuda': + if torch.cuda.is_available(): + return torch.device('cuda') + else: + raise argparse.ArgumentTypeError(f'cuda is not available') + elif device_name == 'mps': + if torch.backends.mps.is_available(): + return torch.device('mps') + else: + raise argparse.ArgumentTypeError(f'mps is not available') + elif device_name == None: + if torch.backends.mps.is_available(): + return torch.device('mps') + elif torch.cuda.is_available(): + return torch.device('cuda') + else: + return torch.device('cpu') + else: + raise argparse.ArgumentTypeError(f'unknown device name: {device_name}') + +parser = argparse.ArgumentParser(description='Process files in a directory.') +parser.add_argument('--device', dest='device', type=torch_device, default=None, + help='Device to use for computation (default: cpu).') + +parser.add_argument('--model-file', type=Path, required=True, + help='Path to the model file (e.g., .pt).') + +parser.add_argument('--use-webcam', action='store_true', + help='Use webcam for input (default: False).') + +parser.add_argument('--input-video-file', type=Path, help='Path to the input video file (default: webcam).', default=None) +parser.add_argument('--output-video-file', type=Path, help='Path to the output video file (default: webcam).', default=None) + +parser.add_argument('--show-output', action='store_true', + help='Show output video in a window (default: False).') + + +args = parser.parse_args() + +arg_dict = vars(args) +for arg in arg_dict: + print(f'args.{arg}: {arg_dict[arg]}') + +if args.use_webcam or args.input_video_file: + if args.input_video_file and args.use_webcam: + raise argparse.ArgumentTypeError('Cannot use both webcam and input video file at the same time.') + if args.input_video_file: + print('using input video file:', args.input_video_file) + args.use_webcam = False + else: + args.input_video_file = None + args.use_webcam = True + print('using webcam for input video') + + + +default_device = args.device +if default_device is None: + default_device = torch.device('cpu') + if torch.backends.mps.is_available(): + default_device = torch.device('mps') + print('using mps') + + if torch.cuda.is_available(): + default_device = torch.device('cuda') + print('using cuda') + +print('using default device:', default_device) + +torch.set_default_device(default_device) + + +# Open the default camera +cam = cv2.VideoCapture(str(args.input_video_file) if args.input_video_file else 0) +if not cam.isOpened(): + print("Error: Could not open video.") + sys.exit() + +# Get the default frame width and height +frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)) +frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) +capture_fps = int(cam.get(cv2.CAP_PROP_FPS)) +# Define the codec and create VideoWriter object +if args.output_video_file: + if not args.output_video_file.exists(): + args.output_video_file.parent.mkdir(parents=True, exist_ok=True) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = out = cv2.VideoWriter(str(args.output_video_file), fourcc, capture_fps, (frame_width, frame_height)) + + + +model = torch.jit.load(str(args.model_file)) +model = model.to(default_device) +model.eval() +print('Loaded model:', args.model_file) + + +done_writing_to_output = False + +while True: + ret, frame_bgr = cam.read() + if not ret: + if args.use_webcam: + print("Error: Could not read frame from webcam.") + if args.input_video_file and args.show_output: + done_writing_to_output = True + cam = cv2.VideoCapture(str(args.input_video_file)) + ret, frame_bgr = cam.read() + if not ret: + print("Error: Could not read frame from input video file.") + break + else: + continue + else: + break + + + + frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) + + # 3) Ensure the array is contiguous (torch needs it) ------------------------- + frame_rgb = np.ascontiguousarray(frame_rgb) + + # 4) numpy -> torch, move channels, scale, add batch if wanted -------------- + tensor = torch.from_numpy(frame_rgb) # H x W x C, uint8 → int tensor + tensor = tensor.to(default_device, non_blocking=True) + + tensor = tensor.permute(2, 0, 1) # C x H x W + tensor = tensor.to(torch.float32).div(255.0) # float32, [0,1] + + # normalize tensor to ImageNet mean and std + # mean = (0.485, 0.456, 0.406) # ImageNet defaults (RGB) + # std = (0.229, 0.224, 0.225) + # mean = torch.tensor(mean, dtype=tensor.dtype, device=tensor.device)[:, None, None] + # std = torch.tensor(std, dtype=tensor.dtype, device=tensor.device)[:, None, None] + # tensor.sub_(mean).div_(std) + + # 5) (Optional) add a batch dim and push to GPU ------------------------------ + tensor = tensor.unsqueeze(0) # 1 x C x H x W + + # if ticks == 3: + # tensor = tensor.to(torch.float16) + # mosaic = torch.jit.load("models/mosaic_float16.pt") + # mosaic.to('mps') + # mosaic_output = mosaic(tensor) / 255.0 + # # mosaic_output = undo_normalize(mosaic_output) + # print('input:',tensor.shape,tensor.dtype) + # print('mosaic output:',mosaic_output.shape,mosaic_output.dtype) + # torchvision.utils.save_image(tensor[0], 'input_tensor.png') + # torchvision.utils.save_image(mosaic_output[0], 'mosaic_output.png') + + # sys.exit(0) + + + if args.model_file.name == 'sobel_edge_float32.pt': + output_tensor = model(tensor.to(torch.float16)) + else: + output_tensor = model(tensor.to(torch.float16)) / 255.0 + # print('input:',tensor.shape,tensor.dtype) + # print('output:',output_tensor.shape) + + + # frame_bgr_out = tensor_to_bgr(output_tensor, undo_normalise=True,mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + frame_bgr_out = utils.tensor_to_bgr(output_tensor) + + + if args.show_output or args.use_webcam: + cv2.imshow('Frame', frame_bgr_out) + if cv2.waitKey(25) & 0xFF == ord('q'): + break + + if args.output_video_file and done_writing_to_output: + out.write(frame_bgr_out) + + +# Release the capture and writer objects +if args.output_video_file: + out.release() +if args.use_webcam: + cam.release() +if args.show_output: + cv2.destroyAllWindows() diff --git a/demos/video/style-transfer/transformer_net.py b/demos/video/style-transfer/transformer_net.py new file mode 100644 index 000000000..c0f69a9a3 --- /dev/null +++ b/demos/video/style-transfer/transformer_net.py @@ -0,0 +1,103 @@ +import torch + + +class TransformerNet(torch.nn.Module): + def __init__(self): + super(TransformerNet, self).__init__() + # Initial convolution layers + self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1) + self.in1 = torch.nn.InstanceNorm2d(32, affine=True) + self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2) + self.in2 = torch.nn.InstanceNorm2d(64, affine=True) + self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2) + self.in3 = torch.nn.InstanceNorm2d(128, affine=True) + # Residual layers + self.res1 = ResidualBlock(128) + self.res2 = ResidualBlock(128) + self.res3 = ResidualBlock(128) + self.res4 = ResidualBlock(128) + self.res5 = ResidualBlock(128) + # Upsampling Layers + self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2) + self.in4 = torch.nn.InstanceNorm2d(64, affine=True) + self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2) + self.in5 = torch.nn.InstanceNorm2d(32, affine=True) + self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1) + # Non-linearities + self.relu = torch.nn.ReLU() + + def forward(self, X): + y = self.relu(self.in1(self.conv1(X))) + y = self.relu(self.in2(self.conv2(y))) + y = self.relu(self.in3(self.conv3(y))) + y = self.res1(y) + y = self.res2(y) + y = self.res3(y) + y = self.res4(y) + y = self.res5(y) + y = self.relu(self.in4(self.deconv1(y))) + y = self.relu(self.in5(self.deconv2(y))) + y = self.deconv3(y) + return y + + +class ConvLayer(torch.nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride): + super(ConvLayer, self).__init__() + reflection_padding = kernel_size // 2 + self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding) + self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride) + + def forward(self, x): + out = self.reflection_pad(x) + out = self.conv2d(out) + return out + + +class ResidualBlock(torch.nn.Module): + """ResidualBlock + introduced in: https://arxiv.org/abs/1512.03385 + recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html + """ + + def __init__(self, channels): + super(ResidualBlock, self).__init__() + self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1) + self.in1 = torch.nn.InstanceNorm2d(channels, affine=True) + self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1) + self.in2 = torch.nn.InstanceNorm2d(channels, affine=True) + self.relu = torch.nn.ReLU() + + def forward(self, x): + residual = x + out = self.relu(self.in1(self.conv1(x))) + out = self.in2(self.conv2(out)) + out = out + residual + return out + + +class UpsampleConvLayer(torch.nn.Module): + """UpsampleConvLayer + Upsamples the input and then does a convolution. This method gives better results + compared to ConvTranspose2d. + ref: http://distill.pub/2016/deconv-checkerboard/ + """ + + def __init__(self, in_channels, out_channels, kernel_size, stride, upsample): + super(UpsampleConvLayer, self).__init__() + # self.upsample = upsample + self.upsample = torch.nn.Upsample(scale_factor=2, mode='nearest') + reflection_padding = kernel_size // 2 + self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding) + self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride) + + def forward(self, x): + x_in = x + # print('upsample', self.upsample) + # x_in = torch.nn.functional.interpolate(x_in, mode='nearest', scale_factor=self.upsample) + # if self.upsample: + # x_in = torch.nn.functional.interpolate(x_in, mode='nearest', scale_factor=self.upsample) + out = self.upsample(x_in) + out = self.reflection_pad(out) + out = self.conv2d(out) + return out diff --git a/demos/video/style-transfer/utils.py b/demos/video/style-transfer/utils.py new file mode 100644 index 000000000..bbfa07a34 --- /dev/null +++ b/demos/video/style-transfer/utils.py @@ -0,0 +1,140 @@ +import torch +from PIL import Image +import PIL + +import cv2 +import numpy as np + +def load_image(filename, size=None, scale=None): + img = Image.open(filename).convert('RGB') + if size is not None: + img = img.resize((size, size), PIL.Image.Resampling.LANCZOS) + elif scale is not None: + img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), PIL.Image.Resampling.LANCZOS) + return img + + +def save_image(filename, data): + img = data.clone().clamp(0, 255).numpy() + img = img.transpose(1, 2, 0).astype("uint8") + img = Image.fromarray(img) + img.save(filename) + + +def gram_matrix(y): + (b, ch, h, w) = y.size() + features = y.view(b, ch, w * h) + features_t = features.transpose(1, 2) + gram = features.bmm(features_t) / (ch * h * w) + return gram + + +def normalize_batch(batch): + # normalize using imagenet mean and std + mean = batch.new_tensor([0.485, 0.456, 0.406]).view(-1, 1, 1) + std = batch.new_tensor([0.229, 0.224, 0.225]).view(-1, 1, 1) + batch = batch.div_(255.0) + return (batch - mean) / std + + +def bgr_to_tensor( + frame_bgr: np.ndarray, + *, + normalize: bool = False, + mean: tuple[float, float, float] | None = None, + std: tuple[float, float, float] | None = None, + add_batch: bool = False, + device: torch.device | str | None = None, +) -> torch.Tensor: + """ + Convert an OpenCV BGR frame (H x W x 3, uint8) to a PyTorch tensor + (C x H x W, float32, RGB). Optionally normalise with mean / std. + + Parameters + ---------- + frame_bgr : np.ndarray + Raw image from cv2 (BGR, uint8, H x W x 3). + normalize : bool, default False + If True, apply (tensor - mean) / std after scaling to [0,1]. + mean, std : tuple of 3 floats, optional + Normalisation stats in **RGB** order. If `normalize` is True and + these are omitted, ImageNet values are used. + add_batch : bool, default False + If True, adds a leading batch dim → (1, C, H, W). + device : torch.device | str | None + Target device (e.g. "cuda"). If None, tensor stays on CPU. + + Returns + ------- + torch.Tensor + The converted (and optionally normalised) tensor. + """ + # --- 1. BGR → RGB ------------------------------------------------------- + frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) + + # --- 2. Ensure contiguous memory for zero‑copy conversion -------------- + frame_rgb = np.ascontiguousarray(frame_rgb) + + # --- 3. numpy → torch, reorder to C,H,W -------------------------------- + tensor = torch.from_numpy(frame_rgb).permute(2, 0, 1).float() # C,H,W + + # --- 4. Scale to [0,1] -------------------------------------------------- + tensor = tensor.div_(255.0) + + # --- 5. Optional normalisation ----------------------------------------- + if normalize: + if mean is None or std is None: + mean = (0.485, 0.456, 0.406) # ImageNet defaults (RGB) + std = (0.229, 0.224, 0.225) + mean = torch.tensor(mean, dtype=tensor.dtype, device=tensor.device)[:, None, None] + std = torch.tensor(std, dtype=tensor.dtype, device=tensor.device)[:, None, None] + tensor.sub_(mean).div_(std) + + # --- 6. Optional batch and device move --------------------------------- + if add_batch: + tensor = tensor.unsqueeze(0) # N,C,H,W + if device is not None: + tensor = tensor.to(device, non_blocking=True) + + return tensor + + +def tensor_to_bgr(frame_tensor, *, undo_normalise=False, mean=None, std=None): + """ + Args + ---- + frame_tensor : torch.Tensor + (C,H,W) or (1,C,H,W) ― float or half ― RGB + undo_normalise : bool + True if you previously applied (x - mean) / std + mean, std : list/tuple of 3 floats + Same numbers you used for normalising (e.g. ImageNet) + Returns + ------- + frame_bgr : np.ndarray (H,W,3) uint8 BGR contiguous + """ + # 1) squeeze batch dimension if present + if frame_tensor.ndim == 4: + frame_tensor = frame_tensor[0] + + # 2) move to CPU & float32 for math + img = frame_tensor.detach() + + # 3) (optional) reverse mean/std normalisation + if undo_normalise: + if mean is None or std is None: + raise ValueError("Supply mean and std to undo normalisation") + mean = torch.tensor(mean).to(img).view(3,1,1) + std = torch.tensor(std).to(img).view(3,1,1) + img = img * std + mean + + # 4) scale back to 0‑255, clamp, uint8 + img = (img * 255.0) + # img = img # .to(torch.float16) + img = img.clamp(0,255).byte() + + # 5) channel‑last & numpy + img = img.permute(1,2,0).cpu().numpy() # H,W,C RGB + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # → BGR + img = np.ascontiguousarray(img) # ensure OpenCV‑happy + return img diff --git a/demos/video/style-transfer/vgg.py b/demos/video/style-transfer/vgg.py new file mode 100644 index 000000000..35fd25848 --- /dev/null +++ b/demos/video/style-transfer/vgg.py @@ -0,0 +1,38 @@ +from collections import namedtuple + +import torch +from torchvision import models + + +class Vgg16(torch.nn.Module): + def __init__(self, requires_grad=False): + super(Vgg16, self).__init__() + vgg_pretrained_features = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + for x in range(4): + self.slice1.add_module(str(x), vgg_pretrained_features[x]) + for x in range(4, 9): + self.slice2.add_module(str(x), vgg_pretrained_features[x]) + for x in range(9, 16): + self.slice3.add_module(str(x), vgg_pretrained_features[x]) + for x in range(16, 23): + self.slice4.add_module(str(x), vgg_pretrained_features[x]) + if not requires_grad: + for param in self.parameters(): + param.requires_grad = False + + def forward(self, X): + h = self.slice1(X) + h_relu1_2 = h + h = self.slice2(h) + h_relu2_2 = h + h = self.slice3(h) + h_relu3_3 = h + h = self.slice4(h) + h_relu4_3 = h + vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3']) + out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3) + return out diff --git a/demos/video/style-transfer/videos/candy_deer.mp4 b/demos/video/style-transfer/videos/candy_deer.mp4 new file mode 100644 index 000000000..b10efa9aa Binary files /dev/null and b/demos/video/style-transfer/videos/candy_deer.mp4 differ diff --git a/demos/video/style-transfer/videos/deer.mp4 b/demos/video/style-transfer/videos/deer.mp4 new file mode 100644 index 000000000..54ff7126b Binary files /dev/null and b/demos/video/style-transfer/videos/deer.mp4 differ diff --git a/demos/video/style-transfer/videos/edge_deer.mp4 b/demos/video/style-transfer/videos/edge_deer.mp4 new file mode 100644 index 000000000..c19d5eaf5 Binary files /dev/null and b/demos/video/style-transfer/videos/edge_deer.mp4 differ diff --git a/demos/video/style-transfer/videos/mosaic_deer.mp4 b/demos/video/style-transfer/videos/mosaic_deer.mp4 new file mode 100644 index 000000000..b54e27793 Binary files /dev/null and b/demos/video/style-transfer/videos/mosaic_deer.mp4 differ diff --git a/demos/video/style-transfer/videos/udnie_deer.mp4 b/demos/video/style-transfer/videos/udnie_deer.mp4 new file mode 100644 index 000000000..22d6070eb Binary files /dev/null and b/demos/video/style-transfer/videos/udnie_deer.mp4 differ diff --git a/demos/video/webcam_infer.cpp b/demos/video/webcam-capture/webcam_infer.cpp similarity index 100% rename from demos/video/webcam_infer.cpp rename to demos/video/webcam-capture/webcam_infer.cpp diff --git a/examples/.gitignore b/examples/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 361a58c0e..277b468c9 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,3 +3,6 @@ add_subdirectory(my_example) add_subdirectory(torch_model_loading) + + +add_subdirectory(split_loop) \ No newline at end of file diff --git a/examples/pytorch-examples/fast_neural_style/.gitignore b/examples/pytorch-examples/fast_neural_style/.gitignore new file mode 100644 index 000000000..e5694acc2 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/.gitignore @@ -0,0 +1 @@ +saved_models/* diff --git a/examples/pytorch-examples/fast_neural_style/README.md b/examples/pytorch-examples/fast_neural_style/README.md new file mode 100644 index 000000000..9b5834ed7 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/README.md @@ -0,0 +1,66 @@ +# fast-neural-style :city_sunrise: :rocket: + +This repository contains a pytorch implementation of an algorithm for artistic style transfer. The algorithm can be used to mix the content of an image with the style of another image. For example, here is a photograph of a door arch rendered in the style of a stained glass painting. + +The model uses the method described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155) along with [Instance Normalization](https://arxiv.org/pdf/1607.08022.pdf). The saved-models for examples shown in the README can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0). + +

+ + + +

+ +## Requirements + +The program is written in Python, and uses [pytorch](http://pytorch.org/), [scipy](https://www.scipy.org). A GPU is not necessary, but can provide a significant speed up especially for training a new model. Regular sized images can be styled on a laptop or desktop using saved models. + +## Usage + +Stylize image + +``` +python neural_style/neural_style.py eval --content-image --model --output-image --accel +``` + +- `--content-image`: path to content image you want to stylize. +- `--model`: saved model to be used for stylizing the image (eg: `mosaic.pth`) +- `--output-image`: path for saving the output image. +- `--content-scale`: factor for scaling down the content image if memory is an issue (eg: value of 2 will halve the height and width of content-image) +- `--accel`: use accelerator + +Train model + +```bash +python neural_style/neural_style.py train --dataset --style-image --save-model-dir --epochs 2 --accel +``` + +There are several command line arguments, the important ones are listed below + +- `--dataset`: path to training dataset, the path should point to a folder containing another folder with all the training images. I used COCO 2014 Training images dataset [80K/13GB] [(download)](https://cocodataset.org/#download). +- `--style-image`: path to style-image. +- `--save-model-dir`: path to folder where trained model will be saved. +- `--accel`: use accelerator. + +If `--accel` argument is given, pytorch will search for available hardware acceleration device and attempt to use it. This example is known to work on CUDA, MPS and XPU devices. + +Refer to `neural_style/neural_style.py` for other command line arguments. For training new models you might have to tune the values of `--content-weight` and `--style-weight`. The mosaic style model shown above was trained with `--content-weight 1e5` and `--style-weight 1e10`. The remaining 3 models were also trained with similar order of weight parameters with slight variation in the `--style-weight` (`5e10` or `1e11`). + +## Models + +Models for the examples shown below can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0) or by running the script `download_saved_models.py`. + +
+ +
+ +
+ + + + +
+ + + + +
diff --git a/examples/pytorch-examples/fast_neural_style/accel.ipynb b/examples/pytorch-examples/fast_neural_style/accel.ipynb new file mode 100644 index 000000000..8d6273572 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/accel.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "63f6c39d", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d4a5bb07", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.backends.mps.is_available()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc478ec7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/pytorch-examples/fast_neural_style/download_saved_models.py b/examples/pytorch-examples/fast_neural_style/download_saved_models.py new file mode 100644 index 000000000..691c2c0a3 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/download_saved_models.py @@ -0,0 +1,28 @@ +import os +import zipfile + +# PyTorch 1.1 moves _download_url_to_file +# from torch.utils.model_zoo to torch.hub +# PyTorch 1.0 exists another _download_url_to_file +# 2 argument +# TODO: If you remove support PyTorch 1.0 or older, +# You should remove torch.utils.model_zoo +# Ref. PyTorch #18758 +# https://github.com/pytorch/pytorch/pull/18758/commits +try: + from torch.utils.model_zoo import _download_url_to_file +except ImportError: + try: + from torch.hub import download_url_to_file as _download_url_to_file + except ImportError: + from torch.hub import _download_url_to_file + + +def unzip(source_filename, dest_dir): + with zipfile.ZipFile(source_filename) as zf: + zf.extractall(path=dest_dir) + + +if __name__ == '__main__': + _download_url_to_file('https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1', 'saved_models.zip', None, True) + unzip('saved_models.zip', '.') diff --git a/examples/pytorch-examples/fast_neural_style/iain_hike-mosaic.jpg b/examples/pytorch-examples/fast_neural_style/iain_hike-mosaic.jpg new file mode 100644 index 000000000..438f5c26f Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/iain_hike-mosaic.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/iain_hike.jpg b/examples/pytorch-examples/fast_neural_style/iain_hike.jpg new file mode 100644 index 000000000..c174227e0 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/iain_hike.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/iain_river-mosaic.jpg b/examples/pytorch-examples/fast_neural_style/iain_river-mosaic.jpg new file mode 100644 index 000000000..d0c849105 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/iain_river-mosaic.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/iain_river.jpeg b/examples/pytorch-examples/fast_neural_style/iain_river.jpeg new file mode 100644 index 000000000..8efcf6294 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/iain_river.jpeg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/content-images/amber.jpg b/examples/pytorch-examples/fast_neural_style/images/content-images/amber.jpg new file mode 100644 index 000000000..22f6390f4 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/content-images/amber.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/output-images/amber-candy.jpg b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-candy.jpg new file mode 100644 index 000000000..f585fdaae Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-candy.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/output-images/amber-mosaic.jpg b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-mosaic.jpg new file mode 100644 index 000000000..5af32e759 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-mosaic.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/output-images/amber-rain-princess.jpg b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-rain-princess.jpg new file mode 100644 index 000000000..4f9efeb20 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-rain-princess.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/output-images/amber-udnie.jpg b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-udnie.jpg new file mode 100644 index 000000000..4e7261603 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/output-images/amber-udnie.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/style-images/candy.jpg b/examples/pytorch-examples/fast_neural_style/images/style-images/candy.jpg new file mode 100644 index 000000000..f40e5a33e Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/style-images/candy.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/style-images/mosaic.jpg b/examples/pytorch-examples/fast_neural_style/images/style-images/mosaic.jpg new file mode 100644 index 000000000..63aa06fe4 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/style-images/mosaic.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/style-images/rain-princess-cropped.jpg b/examples/pytorch-examples/fast_neural_style/images/style-images/rain-princess-cropped.jpg new file mode 100644 index 000000000..00a83ea48 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/style-images/rain-princess-cropped.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/style-images/rain-princess.jpg b/examples/pytorch-examples/fast_neural_style/images/style-images/rain-princess.jpg new file mode 100644 index 000000000..520f6a227 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/style-images/rain-princess.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/images/style-images/udnie.jpg b/examples/pytorch-examples/fast_neural_style/images/style-images/udnie.jpg new file mode 100644 index 000000000..3dbb29cf8 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/images/style-images/udnie.jpg differ diff --git a/examples/pytorch-examples/fast_neural_style/models/mosaic_float16.pt b/examples/pytorch-examples/fast_neural_style/models/mosaic_float16.pt new file mode 100644 index 000000000..9718339de Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/models/mosaic_float16.pt differ diff --git a/examples/pytorch-examples/fast_neural_style/models/mosaic_float32.pt b/examples/pytorch-examples/fast_neural_style/models/mosaic_float32.pt new file mode 100644 index 000000000..e03440a7d Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/models/mosaic_float32.pt differ diff --git a/examples/pytorch-examples/fast_neural_style/neural_style/__init__.py b/examples/pytorch-examples/fast_neural_style/neural_style/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/pytorch-examples/fast_neural_style/neural_style/neural_style.py b/examples/pytorch-examples/fast_neural_style/neural_style/neural_style.py new file mode 100644 index 000000000..9695d088f --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/neural_style/neural_style.py @@ -0,0 +1,293 @@ +import argparse +import os +import sys +import time +import re + +import numpy as np +import torch +from torch.optim import Adam +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision import transforms +import torch.onnx + +import utils +from transformer_net import TransformerNet +from vgg import Vgg16 + + +def available_accelerator(): + """ + Check if accelerator is available. + """ + return torch.cuda.is_available() or torch.backends.mps.is_available() + +def current_accelerator(args): + """ + Get the current accelerator. + """ + if args.accel and available_accelerator(): + if torch.cuda.is_available(): + return torch.device("cuda") + elif torch.backends.mps.is_available(): + return torch.device("mps") + else: + raise RuntimeError("No accelerator available") + else: + return torch.device("cpu") + +def check_paths(args): + try: + if not os.path.exists(args.save_model_dir): + os.makedirs(args.save_model_dir) + if args.checkpoint_model_dir is not None and not (os.path.exists(args.checkpoint_model_dir)): + os.makedirs(args.checkpoint_model_dir) + except OSError as e: + print(e) + sys.exit(1) + + +def train(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + transform = transforms.Compose([ + transforms.Resize(args.image_size), + transforms.CenterCrop(args.image_size), + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + train_dataset = datasets.ImageFolder(args.dataset, transform) + train_loader = DataLoader(train_dataset, batch_size=args.batch_size) + + transformer = TransformerNet().to(device) + optimizer = Adam(transformer.parameters(), args.lr) + mse_loss = torch.nn.MSELoss() + + vgg = Vgg16(requires_grad=False).to(device) + style_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + style = utils.load_image(args.style_image, size=args.style_size) + style = style_transform(style) + style = style.repeat(args.batch_size, 1, 1, 1).to(device) + + features_style = vgg(utils.normalize_batch(style)) + gram_style = [utils.gram_matrix(y) for y in features_style] + + for e in range(args.epochs): + transformer.train() + agg_content_loss = 0. + agg_style_loss = 0. + count = 0 + for batch_id, (x, _) in enumerate(train_loader): + n_batch = len(x) + count += n_batch + optimizer.zero_grad() + + x = x.to(device) + y = transformer(x) + + y = utils.normalize_batch(y) + x = utils.normalize_batch(x) + + features_y = vgg(y) + features_x = vgg(x) + + content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) + + style_loss = 0. + for ft_y, gm_s in zip(features_y, gram_style): + gm_y = utils.gram_matrix(ft_y) + style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) + style_loss *= args.style_weight + + total_loss = content_loss + style_loss + total_loss.backward() + optimizer.step() + + agg_content_loss += content_loss.item() + agg_style_loss += style_loss.item() + + if (batch_id + 1) % args.log_interval == 0: + mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( + time.ctime(), e + 1, count, len(train_dataset), + agg_content_loss / (batch_id + 1), + agg_style_loss / (batch_id + 1), + (agg_content_loss + agg_style_loss) / (batch_id + 1) + ) + print(mesg) + + if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: + transformer.eval().cpu() + ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" + ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) + torch.save(transformer.state_dict(), ckpt_model_path) + transformer.to(device).train() + + # save model + transformer.eval().cpu() + save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( + args.content_weight) + "_" + str(args.style_weight) + ".model" + save_model_path = os.path.join(args.save_model_dir, save_model_filename) + torch.save(transformer.state_dict(), save_model_path) + + print("\nDone, trained model saved at", save_model_path) + + +def stylize(args): + if args.accel: + device = current_accelerator(args) + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + content_image = utils.load_image(args.content_image, scale=args.content_scale) + content_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x.mul(255)) + ]) + content_image = content_transform(content_image) + content_image = content_image.unsqueeze(0).to(device) + + if args.model.endswith(".onnx"): + output = stylize_onnx(content_image, args) + else: + with torch.no_grad(): + style_model = TransformerNet() + state_dict = torch.load(args.model) + # remove saved deprecated running_* keys in InstanceNorm from the checkpoint + for k in list(state_dict.keys()): + if re.search(r'in\d+\.running_(mean|var)$', k): + del state_dict[k] + style_model.load_state_dict(state_dict) + style_model.to(device) + style_model.eval() + if args.export_onnx: + assert args.export_onnx.endswith(".onnx"), "Export model file should end with .onnx" + output = torch.onnx._export( + style_model, content_image, args.export_onnx, opset_version=11, + ).cpu() + else: + print('Content image shape:', content_image.shape) + output = style_model(content_image).cpu() + + utils.save_image(args.output_image, output[0]) + from pathlib import Path + model_name = Path(args.model).stem + + sm = torch.jit.script(style_model.to(torch.float32)) + sm.save(f"models/{model_name}_float32.pt") + + sm = torch.jit.script(style_model.to(torch.float16)) + sm.save(f"models/{model_name}_float16.pt") + + utils.save_image(args.output_image, output[0]) + + +def stylize_onnx(content_image, args): + """ + Read ONNX model and run it using onnxruntime + """ + + assert not args.export_onnx + + import onnxruntime + + ort_session = onnxruntime.InferenceSession(args.model) + + def to_numpy(tensor): + return ( + tensor.detach().cpu().numpy() + if tensor.requires_grad + else tensor.cpu().numpy() + ) + + ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(content_image)} + ort_outs = ort_session.run(None, ort_inputs) + img_out_y = ort_outs[0] + + return torch.from_numpy(img_out_y) + + +def main(): + main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style") + subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand") + + train_arg_parser = subparsers.add_parser("train", help="parser for training arguments") + train_arg_parser.add_argument("--epochs", type=int, default=2, + help="number of training epochs, default is 2") + train_arg_parser.add_argument("--batch-size", type=int, default=4, + help="batch size for training, default is 4") + train_arg_parser.add_argument("--dataset", type=str, required=True, + help="path to training dataset, the path should point to a folder " + "containing another folder with all the training images") + train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg", + help="path to style-image") + train_arg_parser.add_argument("--save-model-dir", type=str, required=True, + help="path to folder where trained model will be saved.") + train_arg_parser.add_argument("--checkpoint-model-dir", type=str, default=None, + help="path to folder where checkpoints of trained models will be saved") + train_arg_parser.add_argument("--image-size", type=int, default=256, + help="size of training images, default is 256 X 256") + train_arg_parser.add_argument("--style-size", type=int, default=None, + help="size of style-image, default is the original size of style image") + train_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + train_arg_parser.add_argument("--seed", type=int, default=42, + help="random seed for training") + train_arg_parser.add_argument("--content-weight", type=float, default=1e5, + help="weight for content-loss, default is 1e5") + train_arg_parser.add_argument("--style-weight", type=float, default=1e10, + help="weight for style-loss, default is 1e10") + train_arg_parser.add_argument("--lr", type=float, default=1e-3, + help="learning rate, default is 1e-3") + train_arg_parser.add_argument("--log-interval", type=int, default=500, + help="number of images after which the training loss is logged, default is 500") + train_arg_parser.add_argument("--checkpoint-interval", type=int, default=2000, + help="number of batches after which a checkpoint of the trained model will be created") + + eval_arg_parser = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments") + eval_arg_parser.add_argument("--content-image", type=str, required=True, + help="path to content image you want to stylize") + eval_arg_parser.add_argument("--content-scale", type=float, default=None, + help="factor for scaling down the content image") + eval_arg_parser.add_argument("--output-image", type=str, required=True, + help="path for saving the output image") + eval_arg_parser.add_argument("--model", type=str, required=True, + help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") + eval_arg_parser.add_argument("--export_onnx", type=str, + help="export ONNX model to a given file") + eval_arg_parser.add_argument('--accel', action='store_true', + help='use accelerator') + + args = main_arg_parser.parse_args() + + if args.subcommand is None: + print("ERROR: specify either train or eval") + sys.exit(1) + if args.accel and not available_accelerator(): + print("ERROR: accelerator is not available, try running on CPU") + sys.exit(1) + if not args.accel and available_accelerator(): + print("WARNING: accelerator is available, run with --accel to enable it") + + if args.subcommand == "train": + check_paths(args) + train(args) + else: + stylize(args) + + +if __name__ == "__main__": + main() diff --git a/examples/pytorch-examples/fast_neural_style/neural_style/transformer_net.py b/examples/pytorch-examples/fast_neural_style/neural_style/transformer_net.py new file mode 100644 index 000000000..c0f69a9a3 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/neural_style/transformer_net.py @@ -0,0 +1,103 @@ +import torch + + +class TransformerNet(torch.nn.Module): + def __init__(self): + super(TransformerNet, self).__init__() + # Initial convolution layers + self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1) + self.in1 = torch.nn.InstanceNorm2d(32, affine=True) + self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2) + self.in2 = torch.nn.InstanceNorm2d(64, affine=True) + self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2) + self.in3 = torch.nn.InstanceNorm2d(128, affine=True) + # Residual layers + self.res1 = ResidualBlock(128) + self.res2 = ResidualBlock(128) + self.res3 = ResidualBlock(128) + self.res4 = ResidualBlock(128) + self.res5 = ResidualBlock(128) + # Upsampling Layers + self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2) + self.in4 = torch.nn.InstanceNorm2d(64, affine=True) + self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2) + self.in5 = torch.nn.InstanceNorm2d(32, affine=True) + self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1) + # Non-linearities + self.relu = torch.nn.ReLU() + + def forward(self, X): + y = self.relu(self.in1(self.conv1(X))) + y = self.relu(self.in2(self.conv2(y))) + y = self.relu(self.in3(self.conv3(y))) + y = self.res1(y) + y = self.res2(y) + y = self.res3(y) + y = self.res4(y) + y = self.res5(y) + y = self.relu(self.in4(self.deconv1(y))) + y = self.relu(self.in5(self.deconv2(y))) + y = self.deconv3(y) + return y + + +class ConvLayer(torch.nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride): + super(ConvLayer, self).__init__() + reflection_padding = kernel_size // 2 + self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding) + self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride) + + def forward(self, x): + out = self.reflection_pad(x) + out = self.conv2d(out) + return out + + +class ResidualBlock(torch.nn.Module): + """ResidualBlock + introduced in: https://arxiv.org/abs/1512.03385 + recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html + """ + + def __init__(self, channels): + super(ResidualBlock, self).__init__() + self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1) + self.in1 = torch.nn.InstanceNorm2d(channels, affine=True) + self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1) + self.in2 = torch.nn.InstanceNorm2d(channels, affine=True) + self.relu = torch.nn.ReLU() + + def forward(self, x): + residual = x + out = self.relu(self.in1(self.conv1(x))) + out = self.in2(self.conv2(out)) + out = out + residual + return out + + +class UpsampleConvLayer(torch.nn.Module): + """UpsampleConvLayer + Upsamples the input and then does a convolution. This method gives better results + compared to ConvTranspose2d. + ref: http://distill.pub/2016/deconv-checkerboard/ + """ + + def __init__(self, in_channels, out_channels, kernel_size, stride, upsample): + super(UpsampleConvLayer, self).__init__() + # self.upsample = upsample + self.upsample = torch.nn.Upsample(scale_factor=2, mode='nearest') + reflection_padding = kernel_size // 2 + self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding) + self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride) + + def forward(self, x): + x_in = x + # print('upsample', self.upsample) + # x_in = torch.nn.functional.interpolate(x_in, mode='nearest', scale_factor=self.upsample) + # if self.upsample: + # x_in = torch.nn.functional.interpolate(x_in, mode='nearest', scale_factor=self.upsample) + out = self.upsample(x_in) + out = self.reflection_pad(out) + out = self.conv2d(out) + return out diff --git a/examples/pytorch-examples/fast_neural_style/neural_style/utils.py b/examples/pytorch-examples/fast_neural_style/neural_style/utils.py new file mode 100644 index 000000000..801ed82d0 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/neural_style/utils.py @@ -0,0 +1,34 @@ +import torch +from PIL import Image +import PIL + +def load_image(filename, size=None, scale=None): + img = Image.open(filename).convert('RGB') + if size is not None: + img = img.resize((size, size), PIL.Image.Resampling.LANCZOS) + elif scale is not None: + img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), PIL.Image.Resampling.LANCZOS) + return img + + +def save_image(filename, data): + img = data.clone().clamp(0, 255).numpy() + img = img.transpose(1, 2, 0).astype("uint8") + img = Image.fromarray(img) + img.save(filename) + + +def gram_matrix(y): + (b, ch, h, w) = y.size() + features = y.view(b, ch, w * h) + features_t = features.transpose(1, 2) + gram = features.bmm(features_t) / (ch * h * w) + return gram + + +def normalize_batch(batch): + # normalize using imagenet mean and std + mean = batch.new_tensor([0.485, 0.456, 0.406]).view(-1, 1, 1) + std = batch.new_tensor([0.229, 0.224, 0.225]).view(-1, 1, 1) + batch = batch.div_(255.0) + return (batch - mean) / std diff --git a/examples/pytorch-examples/fast_neural_style/neural_style/vgg.py b/examples/pytorch-examples/fast_neural_style/neural_style/vgg.py new file mode 100644 index 000000000..35fd25848 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/neural_style/vgg.py @@ -0,0 +1,38 @@ +from collections import namedtuple + +import torch +from torchvision import models + + +class Vgg16(torch.nn.Module): + def __init__(self, requires_grad=False): + super(Vgg16, self).__init__() + vgg_pretrained_features = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + for x in range(4): + self.slice1.add_module(str(x), vgg_pretrained_features[x]) + for x in range(4, 9): + self.slice2.add_module(str(x), vgg_pretrained_features[x]) + for x in range(9, 16): + self.slice3.add_module(str(x), vgg_pretrained_features[x]) + for x in range(16, 23): + self.slice4.add_module(str(x), vgg_pretrained_features[x]) + if not requires_grad: + for param in self.parameters(): + param.requires_grad = False + + def forward(self, X): + h = self.slice1(X) + h_relu1_2 = h + h = self.slice2(h) + h_relu2_2 = h + h = self.slice3(h) + h_relu3_3 = h + h = self.slice4(h) + h_relu4_3 = h + vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3']) + out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3) + return out diff --git a/examples/pytorch-examples/fast_neural_style/requirements.txt b/examples/pytorch-examples/fast_neural_style/requirements.txt new file mode 100644 index 000000000..54d4c008f --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/requirements.txt @@ -0,0 +1,3 @@ +numpy +torch>=2.6 +torchvision diff --git a/examples/pytorch-examples/fast_neural_style/run.md b/examples/pytorch-examples/fast_neural_style/run.md new file mode 100644 index 000000000..5a06a3eb9 --- /dev/null +++ b/examples/pytorch-examples/fast_neural_style/run.md @@ -0,0 +1,6 @@ + + + +```bash +python3 neural_style/neural_style.py eval --content-image iain_hike.jpg --model saved_models/mosaic.pth --output-image iain_hike-mosaic.jpg --content-scale 3 --accel +``` \ No newline at end of file diff --git a/examples/pytorch-examples/fast_neural_style/saved_models.zip b/examples/pytorch-examples/fast_neural_style/saved_models.zip new file mode 100644 index 000000000..cdb980749 Binary files /dev/null and b/examples/pytorch-examples/fast_neural_style/saved_models.zip differ diff --git a/examples/split_loop/CMakeLists.txt b/examples/split_loop/CMakeLists.txt new file mode 100644 index 000000000..0cca4d929 --- /dev/null +++ b/examples/split_loop/CMakeLists.txt @@ -0,0 +1,86 @@ +find_package(OpenCV 4 REQUIRED) + +find_library(ACCELERATE Accelerate REQUIRED) +find_library(METAL Metal REQUIRED) +find_library(FOUNDATION Foundation REQUIRED) + + + +add_library(bridge_cv OBJECT ${BRIDGE_DIR}/include/bridge.h ${BRIDGE_DIR}/lib/bridge.cpp) + +target_link_directories(bridge_cv PRIVATE ${LIBTORCH_DIR}/lib) + +target_link_libraries( + bridge_cv + PRIVATE + -ltorch + -ltorch_cpu + -lc10 + -ltorch_global_deps + ${OpenCV_LIBS} + # ${TORCH_LIBRARIES} + ${ACCELERATE} + ${METAL} + ${FOUNDATION} +) + +target_include_directories( + bridge_cv + PRIVATE + ${BRIDGE_DIR}/include + ${LIBTORCH_DIR}/include + ${LIBTORCH_DIR}/include/torch/csrc/api/include + # ${BRIDGE_DIR}/util +) + +# if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") +# target_compile_options(bridge_cv PRIVATE -Ofast -flto -ffast-math) +# target_link_options(bridge_cv PRIVATE -flto) +# endif() + + +set(BRIDGE_CV_OBJECT_FILES $) + + +set(CHAI_CV_LINKER_ARGS + -M ${PROJECT_ROOT_DIR}/lib + ${BRIDGE_DIR}/include/bridge.h + ${BRIDGE_CV_OBJECT_FILES} + -L ${LIBTORCH_DIR}/lib + ${LIBTORCH_LIBS_LINKER_ARGS} + --ldflags "-Wl,-rpath,${LIBTORCH_DIR}/lib" +) + + + + + + + + +add_executable(SplitLoop + ${CMAKE_CURRENT_SOURCE_DIR}/split_loop.chpl + ${CHAI_LIB_FILES} +) + +add_dependencies(SplitLoop bridge_cv) +# add_dependencies(SplitLoop ChAI) +target_link_options(SplitLoop + PRIVATE + ${CHAI_CV_LINKER_ARGS} +) + +cmake_print_variables(CHAI_CV_LINKER_ARGS) +cmake_print_variables(OpenCV_LIBS) +cmake_print_variables(ACCELERATE) +cmake_print_variables(METAL) +cmake_print_variables(FOUNDATION) + +set_target_properties(SplitLoop PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} +) + +# if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") +# target_compile_options(SplitLoop PRIVATE -Ofast -flto -ffast-math) +# target_link_options(SplitLoop PRIVATE -flto) +# endif() \ No newline at end of file diff --git a/examples/split_loop/split_loop.chpl b/examples/split_loop/split_loop.chpl new file mode 100644 index 000000000..9c633b35a --- /dev/null +++ b/examples/split_loop/split_loop.chpl @@ -0,0 +1,27 @@ +use Tensor; +use CTypes; + +proc main(args: [] string) { + writeln("Hello, world!"); + + // cobegin { + // for i in 0..<100 { + // begin Bridge.splitLoop(i,100); + // } + // } + var n: int(64) = 0; + var nr = c_ptrTo(n); + cobegin { + begin Bridge.splitLoopFiller(1000000,nr); + + for i in 0..<10 { + writeln("Hello from ", nr.deref()); + } + + } + + + Bridge.showWebcam(); + + writeln("Done!"); +} \ No newline at end of file diff --git a/lib/Bridge.chpl b/lib/Bridge.chpl index c0216c3b5..06e90e529 100644 --- a/lib/Bridge.chpl +++ b/lib/Bridge.chpl @@ -87,6 +87,12 @@ module Bridge { in a: bridge_tensor_t, in b: bridge_tensor_t): bridge_tensor_t; + extern "split_loop" proc splitLoop(idx: int(64), n: int(64)): void; + + extern "split_loop_filler" proc splitLoopFiller(n: int(64),ret: c_ptr(int(64))): void; + + extern "show_webcam" proc showWebcam(): void; + // extern "capture_webcam_bridge" proc captureWebcam( // in cam_index: int(32)): bridge_tensor_t; diff --git a/syntax.tentract b/syntax.tentract new file mode 100644 index 000000000..50b0e6316 --- /dev/null +++ b/syntax.tentract @@ -0,0 +1,40 @@ + + +// Builtin constructs +operators + * - / ; + +// Kronecker delta +d[i,...,j] = if i = ... = j then 1 else 0; + +// + + +(A _ B)[i] = A[i] _ B[i]; + +(A + B)[i,j] = A[i,j] + B[i,j]; + +(A + B)[j,k] = A[i,j,k] * B[i,j,k]; + +(A + B)[:i] = A[:i] + B[:i]; + + +(A @ B)[i,j] = A[i,k] * B[k,j]; +(A @ B)[:,i,j] = A[:,i,k] * B[:,k,j]; + +(A ** B)[i,j,k,l] = A[i,m,k] * B[j,l,m]; + +(transpose A)[i,j] = A[j,i]; + + + + +kernel mat_mul(A: float<2>, B: float<2>) -> float<2>; +mat_mul A B = A @ B; + +kernel mat_mul(in A: float<2>, in B: float<2>, out C: float<2>) { + C = A @ B; +} + +kernel mat_mul_delta(in A: float<2>, in B: float<2>, out C: float<2>) -> float<2>; +mat_mul_delta A B = ∂(A @ B) / ∂A; +