Iainmon · Iainmon · May 23, 2025 · May 21, 2025 · May 22, 2025 · May 22, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -228,12 +228,8 @@ add_dependencies(TinyLayerTest ChAI)
 target_link_options(TinyLayerTest
     PRIVATE
         --main-module layer_test.chpl
-        -M ${PROJECT_ROOT_DIR}/lib
-        ${BRIDGE_DIR}/include/bridge.h
-        ${BRIDGE_OBJECT_FILES}
-        -L ${LIBTORCH_DIR}/lib
-        ${LIBTORCH_LIBS_LINKER_ARGS}
-        --ldflags "-Wl,-rpath,${LIBTORCH_DIR}/lib"
+        # -M ${PROJECT_ROOT_DIR}/lib
+        ${CHAI_LINKER_ARGS}
 )
 # chpl test/tiny/layer_test.chpl -M lib bridge/include/bridge.h build/CMakeFiles/bridge.dir/bridge/lib/bridge.cpp.o -L libtorch/lib -ltorch -ltorch_cpu -lc10 -ltorch_global_deps --ldflags "-Wl,-rpath,libtorch/lib"
 

diff --git a/bridge/.DS_Store b/bridge/.DS_Store
diff --git a/bridge/include/bridge.h b/bridge/include/bridge.h
@@ -17,6 +17,8 @@ typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
 
+void debug_cpu_only_mode(bool_t mode);
+
 typedef struct bridge_tensor_t {
     float* data;
     int* sizes;
@@ -51,9 +53,9 @@ bridge_tensor_t load_run_model(const uint8_t* model_path, bridge_tensor_t input)
 
 bridge_pt_model_t load_model(const uint8_t* model_path);
 
-bridge_tensor_t model_forward(bridge_pt_model_t model, bridge_tensor_t input);
-
+bool_t accelerator_available(void);
 
+bridge_tensor_t model_forward(bridge_pt_model_t model, bridge_tensor_t input);
 bridge_tensor_t model_forward_style_transfer(bridge_pt_model_t model, bridge_tensor_t input);
 
 bridge_tensor_t resize(bridge_tensor_t input,int height,int width);

diff --git a/bridge/lib/bridge.cpp b/bridge/lib/bridge.cpp
@@ -1,6 +1,8 @@
 #include <bridge.h>
 
 #include <torch/torch.h>
+#include <Aten/ATen.h>
+
 #include <torch/script.h>
 
 // #include <torch/script.h>
@@ -27,6 +29,58 @@
 
 
 
+// Globals
+
+
+torch::Device get_best_device();
+torch::ScalarType get_best_dtype();
+
+auto best_device = get_best_device();
+auto best_dtype = get_best_dtype();
+
+torch::NoGradGuard no_grad;
+torch::AutoGradMode enable_grad(false);
+
+bool debug_cpu_only = false;
+
+
+
+torch::Device get_best_device() {
+    if (debug_cpu_only) 
+        return torch::Device(torch::kCPU);
+
+    if (torch::hasMPS()) {
+        return torch::Device(torch::kMPS);
+    } else if (torch::hasCUDA()) {
+        return torch::Device(torch::kCUDA);
+    } else {
+        return torch::Device(torch::kCPU);
+    }
+}
+
+extern "C" void debug_cpu_only_mode(bool_t mode) {
+    debug_cpu_only = mode;
+    if (debug_cpu_only) {
+        best_device = torch::Device(torch::kCPU);
+    } else {
+        best_device = get_best_device();
+    }
+}
+
+extern "C" bool_t accelerator_available() {
+    return (best_device == torch::Device(torch::kCUDA) || best_device == torch::Device(torch::kMPS));
+}
+
+torch::ScalarType get_best_dtype() {
+    if (torch::hasMPS()) {
+        return torch::kFloat16;
+    } else if (torch::hasCUDA()) {
+        return torch::kFloat16;
+    } else {
+        return torch::kFloat32;
+    }
+}
+
 int bridge_tensor_elements(bridge_tensor_t &bt) {
     int size = 1;
     for (int i = 0; i < bt.dim; ++i) {
@@ -39,14 +93,14 @@ size_t bridge_tensor_size(bridge_tensor_t &bt) {
     return sizeof(float32_t) * bridge_tensor_elements(bt);
 }
 
-void store_tensor(torch::Tensor &input, float32_t* dest) {
+void store_tensor(at::Tensor &input, float32_t* dest) {
     float32_t * data = input.data_ptr<float32_t>();
     size_t bytes_size = sizeof(float32_t) * input.numel();
     // std::memmove(dest,data,bytes_size);
     std::memcpy(dest,data,bytes_size);
 }
 
-bridge_tensor_t torch_to_bridge(torch::Tensor &tensor) {
+bridge_tensor_t torch_to_bridge(at::Tensor &tensor) {
     bridge_tensor_t result;
     result.created_by_c = true;
     result.dim = tensor.dim();
@@ -59,13 +113,13 @@ bridge_tensor_t torch_to_bridge(torch::Tensor &tensor) {
     return result;
 }
 
-torch::Tensor bridge_to_torch(bridge_tensor_t &bt) {
+at::Tensor bridge_to_torch(bridge_tensor_t &bt) {
     std::vector<int64_t> sizes_vec(bt.sizes, bt.sizes + bt.dim);
     auto shape = torch::IntArrayRef(sizes_vec);
     return torch::from_blob(bt.data, shape, torch::kFloat);
 }
 
-torch::Tensor bridge_to_torch(bridge_tensor_t &bt,torch::Device device, bool copy,torch::ScalarType dtype = torch::kFloat32) {
+at::Tensor bridge_to_torch(bridge_tensor_t &bt,torch::Device device, bool copy,torch::ScalarType dtype = torch::kFloat32) {
     std::vector<int64_t> sizes_vec(bt.sizes, bt.sizes + bt.dim);
     auto shape = torch::IntArrayRef(sizes_vec);
     auto t = torch::from_blob(bt.data, shape, torch::kFloat);
@@ -144,6 +198,8 @@ extern "C" bridge_tensor_t load_run_model(const uint8_t* model_path, bridge_tens
 }
 
 
+
+
 extern "C" bridge_pt_model_t load_model(const uint8_t* model_path) {
 
     std::cout << "Begin loading model from path: " << model_path << std::endl;
@@ -153,21 +209,12 @@ extern "C" bridge_pt_model_t load_model(const uint8_t* model_path) {
     std::cout.flush();
 
     try {
-
         auto* module = new torch::jit::Module(torch::jit::load(path));
-        module->to(torch::kMPS,torch::kFloat16,false);
+        module->to(best_device,best_dtype,false);
         module->eval();
         std::cout << "Model loaded successfully!" << std::endl;
         std::cout.flush();
         return { static_cast<void*>(module) };
-
-        // torch::jit::Module tmp = torch::jit::load(path);
-        // std::cout << "Model loaded successfully!" << std::endl;
-        // std::cout.flush();
-        // auto* module = new torch::jit::Module(std::move(tmp));
-        // std::cout << "Model moved successfully!" << std::endl;
-        // std::cout.flush();
-        // return { static_cast<void*>(module) };
     } catch (const c10::Error& e) {
         std::cerr << "error loading the model\n" << e.msg();
         std::cout << "error loading the model\n" << e.msg();
@@ -178,49 +225,30 @@ extern "C" bridge_pt_model_t load_model(const uint8_t* model_path) {
     std::cout.flush();
 
     return { nullptr };
-
-
-
-    // bridge_pt_model_t model_wrapper;
-    // torch::jit::Module* pt_module = new torch::jit::Module(); // = (torch::jit::Module*) model_wrapper.pt_module;
-    // try {
-    //     *pt_module = torch::jit::load(mp);
-    //     std::cout << "Model loaded successfully!" << std::endl;
-    //     std::cout.flush();
-    //     model_wrapper.pt_module = pt_module;
-    // } catch (const c10::Error& e) {
-    //     std::cerr << "error loading the model\n" << e.msg();
-    //     std::cout << "error loading the model\n" << e.msg();
-    //     std::cout.flush();
-    //     std::cerr.flush();
-    // }
-
-    // std::cout << pt_module->dump_to_str(false,false,false) << std::endl;
-    // std::cout.flush();
-
-    // return model_wrapper;
 }
 
 
 
 bridge_tensor_t model_forward(bridge_pt_model_t model, bridge_tensor_t input, bool is_vgg_based_model) {
-
-    auto tn_mps = bridge_to_torch(input,torch::kMPS,true,torch::kFloat16);
-    // auto tn_mps = tn.to(torch::kMPS,false,true);
+    auto tn_mps = bridge_to_torch(input,best_device,true,best_dtype);
+    // tn_mps = tn_mps.permute({2, 0, 1}).contiguous();
+    // tn_mps.unsqueeze_(0);//.contiguous();
     auto tn = tn_mps.permute({2, 0, 1}).unsqueeze(0).contiguous();
 
     std::vector<torch::jit::IValue> ins;
     ins.push_back(tn);
 
     auto* module = static_cast<torch::jit::Module*>(model.pt_module);
     auto o = module->forward(ins).toTensor();
+    // auto tn_out = o.squeeze(0).permute({1, 2, 0}).contiguous();
     auto tn_out = o.squeeze(0).contiguous().permute({1, 2, 0}).contiguous();
 
     if (is_vgg_based_model) {
-        tn_out = tn_out / 255.0;
+        tn_out.div_(255.0);
     }
 
     auto tn_out_cpu = tn_out.to(torch::kCPU,torch::kFloat32,false,true);
+
     return torch_to_bridge(tn_out_cpu);
 
 }
@@ -233,6 +261,22 @@ extern "C" bridge_tensor_t model_forward_style_transfer(bridge_pt_model_t model,
     return model_forward(model, input, true);
 }
 
+// std::tuple<uint64_t, uint64_t> get_cpu_frame_size(uint64_t width, uint64_t height, float32_t scale_factor) {
+//     // if (best_device == torch::kMPS || best_device == torch::kCUDA)
+//     if (accelerator_available())
+//         return std::make_tuple(width, height);
+//     uint64_t new_width = static_cast<uint64_t>(width * scale_factor);
+//     uint64_t new_height = static_cast<uint64_t>(height * scale_factor);
+//     return std::make_tuple(new_width, new_height);
+// }
+
+// extern "C" uint64_t get_cpu_frame_width(uint64_t width,float32_t scale_factor) {
+//     return std::get<0>(get_cpu_frame_size(width, 0, scale_factor));
+// }
+// extern "C" uint64_t get_cpu_frame_height(uint64_t height,float32_t scale_factor) {
+//     return std::get<1>(get_cpu_frame_size(0, height, scale_factor));
+// }
+
 
 extern "C" void hello_world(void) {
     std::cout << "Hello from C++!" << std::endl;

diff --git a/demos/models/readme.md b/demos/models/readme.md
@@ -0,0 +1 @@
+This folder contains the model architectures used in the demos.
diff --git a/demos/models/transformer_net.py b/demos/models/transformer_net.py
@@ -0,0 +1,103 @@
+import torch
+
+
+class TransformerNet(torch.nn.Module):
+    def __init__(self):
+        super(TransformerNet, self).__init__()
+        # Initial convolution layers
+        self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
+        self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
+        self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
+        self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
+        self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
+        self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
+        # Residual layers
+        self.res1 = ResidualBlock(128)
+        self.res2 = ResidualBlock(128)
+        self.res3 = ResidualBlock(128)
+        self.res4 = ResidualBlock(128)
+        self.res5 = ResidualBlock(128)
+        # Upsampling Layers
+        self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
+        self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
+        self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
+        self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
+        self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
+        # Non-linearities
+        self.relu = torch.nn.ReLU()
+
+    def forward(self, X):
+        y = self.relu(self.in1(self.conv1(X)))
+        y = self.relu(self.in2(self.conv2(y)))
+        y = self.relu(self.in3(self.conv3(y)))
+        y = self.res1(y)
+        y = self.res2(y)
+        y = self.res3(y)
+        y = self.res4(y)
+        y = self.res5(y)
+        y = self.relu(self.in4(self.deconv1(y)))
+        y = self.relu(self.in5(self.deconv2(y)))
+        y = self.deconv3(y)
+        return y
+
+
+class ConvLayer(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride):
+        super(ConvLayer, self).__init__()
+        reflection_padding = kernel_size // 2
+        self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
+        self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
+
+    def forward(self, x):
+        out = self.reflection_pad(x)
+        out = self.conv2d(out)
+        return out
+
+
+class ResidualBlock(torch.nn.Module):
+    """ResidualBlock
+    introduced in: https://arxiv.org/abs/1512.03385
+    recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
+    """
+
+    def __init__(self, channels):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
+        self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
+        self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
+        self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
+        self.relu = torch.nn.ReLU()
+
+    def forward(self, x):
+        residual = x
+        out = self.relu(self.in1(self.conv1(x)))
+        out = self.in2(self.conv2(out))
+        out = out + residual
+        return out
+
+
+class UpsampleConvLayer(torch.nn.Module):
+    """UpsampleConvLayer
+    Upsamples the input and then does a convolution. This method gives better results
+    compared to ConvTranspose2d.
+    ref: http://distill.pub/2016/deconv-checkerboard/
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride, upsample):
+        super(UpsampleConvLayer, self).__init__()
+        # self.upsample = upsample
+        self.upsample = torch.nn.Upsample(scale_factor=2, mode='nearest')
+        reflection_padding = kernel_size // 2
+        self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
+        self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
+
+    def forward(self, x):
+        x_in = x
+        # print('upsample', self.upsample)
+        # x_in = torch.nn.functional.interpolate(x_in, mode='nearest', scale_factor=self.upsample)
+        # if self.upsample:
+        #     x_in = torch.nn.functional.interpolate(x_in, mode='nearest', scale_factor=self.upsample)
+        out = self.upsample(x_in)
+        out = self.reflection_pad(out)
+        out = self.conv2d(out)
+        return out
diff --git a/demos/video/chapel-webcam/lib/Makefile.smol b/demos/video/chapel-webcam/lib/Makefile.smol
@@ -6,7 +6,7 @@ CHPL_THIRD_PARTY = /opt/homebrew/Cellar/chapel/2.4.0_1/libexec/third-party
 
 CHPL_HOME = /opt/homebrew/Cellar/chapel/2.4.0_1/libexec
 
-CHPL_CFLAGS = -Ilib -Wno-unused -Wno-uninitialized -Wno-pointer-sign -Wno-incompatible-pointer-types -Wno-tautological-compare -I/opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/internal -I/opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/packages -I$(CHPL_RUNTIME_INCL)/localeModels/flat -I$(CHPL_RUNTIME_INCL)/localeModels -I$(CHPL_RUNTIME_INCL)/comm/none -I$(CHPL_RUNTIME_INCL)/comm -I$(CHPL_RUNTIME_INCL)/tasks/qthreads -I$(CHPL_RUNTIME_INCL)/. -I$(CHPL_RUNTIME_INCL)/./qio -I$(CHPL_RUNTIME_INCL)/./atomics/cstdlib -I$(CHPL_RUNTIME_INCL)/./mem/jemalloc -I$(CHPL_THIRD_PARTY)/utf8-decoder -I$(CHPL_THIRD_PARTY)/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/include -Wno-error=unused-variable -I$(CHPL_THIRD_PARTY)/re2/install/darwin-arm64-native-llvm-none/include -I. -I/opt/homebrew/Cellar/gmp/6.3.0/include -I/opt/homebrew/Cellar/hwloc/2.12.0/include -I/opt/homebrew/Cellar/jemalloc/5.3.0/include -I/opt/homebrew/include
+CHPL_CFLAGS = -Ilib -Wno-unused -Wno-uninitialized -Wno-pointer-sign -Wno-incompatible-pointer-types -Wno-tautological-compare -I/opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/internal -I/opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/packages -I../../../lib -I$(CHPL_RUNTIME_INCL)/localeModels/flat -I$(CHPL_RUNTIME_INCL)/localeModels -I$(CHPL_RUNTIME_INCL)/comm/none -I$(CHPL_RUNTIME_INCL)/comm -I$(CHPL_RUNTIME_INCL)/tasks/qthreads -I$(CHPL_RUNTIME_INCL)/. -I$(CHPL_RUNTIME_INCL)/./qio -I$(CHPL_RUNTIME_INCL)/./atomics/cstdlib -I$(CHPL_RUNTIME_INCL)/./mem/jemalloc -I$(CHPL_THIRD_PARTY)/utf8-decoder -I$(CHPL_THIRD_PARTY)/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/include -Wno-error=unused-variable -I$(CHPL_THIRD_PARTY)/re2/install/darwin-arm64-native-llvm-none/include -I. -I/opt/homebrew/Cellar/gmp/6.3.0/include -I/opt/homebrew/Cellar/hwloc/2.12.0/include -I/opt/homebrew/Cellar/jemalloc/5.3.0/include -I/opt/homebrew/include
 
 CHPL_LDFLAGS = -Llib -lsmol -ltorch -ltorch_cpu -lc10 -ltorch_global_deps -lbridge_objs -L$(CHPL_RUNTIME_LIB)/darwin/llvm/arm64/cpu-native/loc-flat/comm-none/tasks-qthreads/tmr-generic/unwind-none/mem-jemalloc/atomics-cstdlib/hwloc-system/re2-bundled/fs-none/lib_pic-none/san-none -lchpl -L$(CHPL_THIRD_PARTY)/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/lib -Wl,-rpath,$(CHPL_THIRD_PARTY)/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/lib -lqthread -L/opt/homebrew/Cellar/hwloc/2.12.0/lib -L$(CHPL_THIRD_PARTY)/re2/install/darwin-arm64-native-llvm-none/lib -lre2 -Wl,-rpath,$(CHPL_THIRD_PARTY)/re2/install/darwin-arm64-native-llvm-none/lib -lm -lpthread -L/opt/homebrew/Cellar/gmp/6.3.0/lib -lgmp -L/opt/homebrew/Cellar/hwloc/2.12.0/lib -Wl,-rpath,/opt/homebrew/Cellar/hwloc/2.12.0/lib -lhwloc -L/opt/homebrew/Cellar/jemalloc/5.3.0/lib -Wl,-rpath,/opt/homebrew/Cellar/jemalloc/5.3.0/lib -ljemalloc -L/opt/homebrew/lib
 

diff --git a/demos/video/chapel-webcam/lib/smol.cmake b/demos/video/chapel-webcam/lib/smol.cmake
@@ -6,7 +6,7 @@ set(CHPL_THIRD_PARTY /opt/homebrew/Cellar/chapel/2.4.0_1/libexec/third-party)
 
 set(CHPL_HOME /opt/homebrew/Cellar/chapel/2.4.0_1/libexec)
 
-set(smol_INCLUDE_DIRS ${CMAKE_CURRENT_LIST_DIR}  /opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/internal /opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/packages ${CHPL_RUNTIME_INCL}/localeModels/flat ${CHPL_RUNTIME_INCL}/localeModels ${CHPL_RUNTIME_INCL}/comm/none ${CHPL_RUNTIME_INCL}/comm ${CHPL_RUNTIME_INCL}/tasks/qthreads ${CHPL_RUNTIME_INCL}/. ${CHPL_RUNTIME_INCL}/./qio ${CHPL_RUNTIME_INCL}/./atomics/cstdlib ${CHPL_RUNTIME_INCL}/./mem/jemalloc ${CHPL_THIRD_PARTY}/utf8-decoder ${CHPL_THIRD_PARTY}/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/include -Wno-error=unused-variable ${CHPL_THIRD_PARTY}/re2/install/darwin-arm64-native-llvm-none/include . /opt/homebrew/Cellar/gmp/6.3.0/include /opt/homebrew/Cellar/hwloc/2.12.0/include /opt/homebrew/Cellar/jemalloc/5.3.0/include /opt/homebrew/include)
+set(smol_INCLUDE_DIRS ${CMAKE_CURRENT_LIST_DIR}  /opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/internal /opt/homebrew/Cellar/chapel/2.4.0_1/libexec/modules/packages ../../../lib ${CHPL_RUNTIME_INCL}/localeModels/flat ${CHPL_RUNTIME_INCL}/localeModels ${CHPL_RUNTIME_INCL}/comm/none ${CHPL_RUNTIME_INCL}/comm ${CHPL_RUNTIME_INCL}/tasks/qthreads ${CHPL_RUNTIME_INCL}/. ${CHPL_RUNTIME_INCL}/./qio ${CHPL_RUNTIME_INCL}/./atomics/cstdlib ${CHPL_RUNTIME_INCL}/./mem/jemalloc ${CHPL_THIRD_PARTY}/utf8-decoder ${CHPL_THIRD_PARTY}/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/include -Wno-error=unused-variable ${CHPL_THIRD_PARTY}/re2/install/darwin-arm64-native-llvm-none/include . /opt/homebrew/Cellar/gmp/6.3.0/include /opt/homebrew/Cellar/hwloc/2.12.0/include /opt/homebrew/Cellar/jemalloc/5.3.0/include /opt/homebrew/include)
 
 set(smol_LINK_LIBS -L${CMAKE_CURRENT_LIST_DIR} -lsmol  -ltorch -ltorch_cpu -lc10 -ltorch_global_deps -lbridge_objs -L${CHPL_RUNTIME_LIB}/darwin/llvm/arm64/cpu-native/loc-flat/comm-none/tasks-qthreads/tmr-generic/unwind-none/mem-jemalloc/atomics-cstdlib/hwloc-system/re2-bundled/fs-none/lib_pic-none/san-none -lchpl -L${CHPL_THIRD_PARTY}/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/lib -Wl,-rpath,${CHPL_THIRD_PARTY}/qthread/install/darwin-arm64-native-llvm-none-flat-jemalloc-system/lib -lqthread -L/opt/homebrew/Cellar/hwloc/2.12.0/lib -L${CHPL_THIRD_PARTY}/re2/install/darwin-arm64-native-llvm-none/lib -lre2 -Wl,-rpath,${CHPL_THIRD_PARTY}/re2/install/darwin-arm64-native-llvm-none/lib -lm -lpthread -L/opt/homebrew/Cellar/gmp/6.3.0/lib -lgmp -L/opt/homebrew/Cellar/hwloc/2.12.0/lib -Wl,-rpath,/opt/homebrew/Cellar/hwloc/2.12.0/lib -lhwloc -L/opt/homebrew/Cellar/jemalloc/5.3.0/lib -Wl,-rpath,/opt/homebrew/Cellar/jemalloc/5.3.0/lib -ljemalloc -L/opt/homebrew/lib -lsmol)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		This folder contains the model architectures used in the demos.