@@ -640,10 +640,94 @@ class llama_model_kv_override(ctypes.Structure):
640
640
value : Union [int , float , bool , bytes ]
641
641
642
642
643
+
644
+ # struct ggml_backend_buffer_type_i {
645
+ # const char * (*get_name) (ggml_backend_buffer_type_t buft);
646
+ # // allocate a buffer of this type
647
+ # ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
648
+ # // tensor alignment
649
+ # size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
650
+ # // (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
651
+ # size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
652
+ # // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
653
+ # size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
654
+ # // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
655
+ # bool (*is_host) (ggml_backend_buffer_type_t buft);
656
+ # };
657
+ class ggml_backend_buffer_type_i (ctypes .Structure ):
658
+ _fields_ = [
659
+ ("get_name" , ctypes .c_void_p ), # NOTE: Unused
660
+ ("alloc_buffer" , ctypes .c_void_p ), # NOTE: Unused
661
+ ("get_alignment" , ctypes .c_void_p ), # NOTE: Unused
662
+ ("get_max_size" , ctypes .c_void_p ), # NOTE: Unused
663
+ ("get_alloc_size" , ctypes .c_void_p ), # NOTE: Unused
664
+ ("is_host" , ctypes .c_void_p ) # NOTE: Unused
665
+ ]
666
+
667
+ # typedef struct ggml_backend_device * ggml_backend_dev_t;
668
+ ggml_backend_dev_t = ctypes .c_void_p # NOTE: Unused
669
+
670
+ # struct ggml_backend_buffer_type {
671
+ # struct ggml_backend_buffer_type_i iface;
672
+ # ggml_backend_dev_t device;
673
+ # void * context;
674
+ # };
675
+ class ggml_backend_buffer_type (ctypes .Structure ):
676
+ _fields_ = [
677
+ ("iface" , ggml_backend_buffer_type_i ),
678
+ ("device" , ggml_backend_dev_t ),
679
+ ("context" , ctypes .c_void_p )
680
+ ]
681
+
682
+ # typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
683
+ ggml_backend_buffer_type_t = ctypes .POINTER (ggml_backend_buffer_type )
684
+
643
685
# struct llama_model_tensor_buft_override {
644
686
# const char * pattern;
645
687
# ggml_backend_buffer_type_t buft;
646
688
# };
689
+ class llama_model_tensor_buft_override (ctypes .Structure ):
690
+ _fields_ = [
691
+ ("pattern" , ctypes .c_char_p ),
692
+ ("buft" , ggml_backend_buffer_type_t ),
693
+ ]
694
+
695
+
696
+ # GGML_API size_t ggml_backend_dev_count(void);
697
+ @ctypes_function (
698
+ "ggml_backend_dev_count" ,
699
+ [],
700
+ ctypes .c_size_t ,
701
+ )
702
+ def ggml_backend_dev_count () -> int :
703
+ ...
704
+
705
+ # GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
706
+ @ctypes_function (
707
+ "ggml_backend_dev_get" ,
708
+ [ctypes .c_size_t ],
709
+ ggml_backend_dev_t ,
710
+ )
711
+ def ggml_backend_dev_get (index : int , / ) -> ggml_backend_dev_t :
712
+ ...
713
+
714
+ # GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
715
+ @ctypes_function (
716
+ "ggml_backend_dev_buffer_type" ,
717
+ [ggml_backend_dev_t ],
718
+ ggml_backend_buffer_type_t ,
719
+ )
720
+ def ggml_backend_dev_buffer_type (device : ggml_backend_dev_t , / ) -> ggml_backend_buffer_type_t :
721
+ ...
722
+
723
+ # GGML_API const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft);
724
+ @ctypes_function (
725
+ "ggml_backend_buft_name" ,
726
+ [ggml_backend_buffer_type_t ],
727
+ ctypes .c_char_p ,
728
+ )
729
+ def ggml_backend_buft_name (buft : ggml_backend_buffer_type_t , / ) -> bytes :
730
+ ...
647
731
648
732
649
733
# struct llama_model_params {
@@ -703,7 +787,7 @@ class llama_model_params(ctypes.Structure):
703
787
704
788
if TYPE_CHECKING :
705
789
devices : CtypesArray [ctypes .c_void_p ] # NOTE: unused
706
- tensor_buft_overrides : CtypesArray [llama_model_tensor_buft_override ] # NOTE: unused
790
+ tensor_buft_overrides : CtypesArray [llama_model_tensor_buft_override ]
707
791
n_gpu_layers : int
708
792
split_mode : int
709
793
main_gpu : int
@@ -718,7 +802,7 @@ class llama_model_params(ctypes.Structure):
718
802
719
803
_fields_ = [
720
804
("devices" , ctypes .c_void_p ), # NOTE: unnused
721
- ("tensor_buft_overrides" , ctypes .c_void_p ), # NOTE: unused
805
+ ("tensor_buft_overrides" , ctypes .POINTER ( llama_model_tensor_buft_override )),
722
806
("n_gpu_layers" , ctypes .c_int32 ),
723
807
("split_mode" , ctypes .c_int ),
724
808
("main_gpu" , ctypes .c_int32 ),
0 commit comments