Skip to content

Commit f917222

Browse files
committed
Fix compilation errors after 0.10.1 merge: API compat fixes
1 parent 02d80c2 commit f917222

22 files changed

Lines changed: 118 additions & 102 deletions

File tree

candle-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ criterion = { workspace = true }
4646

4747
[features]
4848
default = []
49-
cuda = ["cudarc", "dep:candle-kernels", "float8/cuda", "candle-ug?/cuda"]
49+
cuda = ["cudarc", "dep:candle-kernels", "candle-ug?/cuda"]
5050
cudnn = ["cuda", "cudarc/cudnn"]
5151
nccl = ["cuda", "cudarc/nccl"]
5252
mkl = ["dep:libc", "dep:intel-mkl-src"]

candle-core/src/cpu_backend/mod.rs

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2745,7 +2745,7 @@ impl BackendStorage for CpuStorage {
27452745
let kernel_l = Layout::contiguous_with_offset((1, n, k), kernel_l.start_offset())
27462746
.transpose(1, 2)?
27472747
.broadcast_as((b, k, n))?;
2748-
col.matmul(kernel, (b, m, n, k), &col_l, &kernel_l)?
2748+
col.matmul_with_alpha(kernel, None, (b, m, n, k), &col_l, &kernel_l)?
27492749
} else {
27502750
// Make the kernel contiguous if not already the case.
27512751
let mut kernel_c = unsafe {
@@ -2756,7 +2756,7 @@ impl BackendStorage for CpuStorage {
27562756
let kernel_l = Layout::contiguous_with_offset((1, n, k), kernel_l.start_offset())
27572757
.transpose(1, 2)?
27582758
.broadcast_as((b, k, n))?;
2759-
col.matmul(kernel, (b, m, n, k), &col_l, &kernel_l)?
2759+
col.matmul_with_alpha(kernel, None, (b, m, n, k), &col_l, &kernel_l)?
27602760
};
27612761
let res_l = Layout::contiguous((b, l_out, params.c_out)).transpose(1, 2)?;
27622762
let mut res_t = unsafe { self.device().alloc_uninit(res_l.shape(), res.dtype())? };
@@ -2797,8 +2797,9 @@ impl BackendStorage for CpuStorage {
27972797
vec![0, k_size * c_out, 1],
27982798
kernel_l.start_offset(),
27992799
);
2800-
self.matmul(
2800+
self.matmul_with_alpha(
28012801
kernel,
2802+
None,
28022803
(
28032804
b_size,
28042805
/* m */ l_in,
@@ -2942,14 +2943,39 @@ impl BackendStorage for CpuStorage {
29422943
}
29432944
}
29442945

2945-
fn matmul(
2946+
fn matmul_with_alpha_beta(
29462947
&self,
29472948
rhs: &Self,
2949+
c: &mut Self,
2950+
s: Option<f64>,
2951+
bmnk: (usize, usize, usize, usize),
2952+
lhs_l: &Layout,
2953+
rhs_l: &Layout,
2954+
c_layout: &Layout,
2955+
) -> Result<()> {
2956+
let mm = self.matmul_with_alpha(rhs, s, bmnk, lhs_l, rhs_l)?;
2957+
let mm_l = Layout::contiguous(c_layout.shape());
2958+
*c = c.binary_impl::<crate::op::Add>(&mm, c_layout, &mm_l)?;
2959+
Ok(())
2960+
}
2961+
2962+
fn matmul_with_alpha(
2963+
&self,
2964+
rhs: &Self,
2965+
s: Option<f64>,
29482966
bmnk: (usize, usize, usize, usize),
29492967
lhs_l: &Layout,
29502968
rhs_l: &Layout,
29512969
) -> Result<Self> {
2952-
MatMul(bmnk).map(self, lhs_l, rhs, rhs_l)
2970+
let mm = MatMul(bmnk).map(self, lhs_l, rhs, rhs_l)?;
2971+
match s {
2972+
None => Ok(mm),
2973+
Some(alpha) => {
2974+
let (b, m, n, _) = bmnk;
2975+
let mm_l = Layout::contiguous((b, m, n));
2976+
mm.affine(&mm_l, alpha, 0.0)
2977+
}
2978+
}
29532979
}
29542980

29552981
fn device(&self) -> &Self::Device {

candle-core/src/dtype.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,6 @@ with_dtype!(bf16, BF16, bf16::from_f64, bf16::to_f64);
237237
with_dtype!(f32, F32, |v: f64| v as f32, |v: f32| v as f64);
238238
with_dtype!(f64, F64, |v: f64| v, |v: f64| v);
239239
with_dtype!(f8e4m3, F8E4M3, f8e4m3::from_f64, |v: f8e4m3| v.to_f64());
240-
with_dtype!(f8e4m3, F8E4M3, f8e4m3::from_f64, |v: f8e4m3| v.to_f64());
241240

242241
pub trait IntDType: WithDType + num_traits::Bounded {
243242
fn is_true(&self) -> bool;

candle-core/src/storage.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -791,10 +791,9 @@ impl Storage {
791791
(Self::Metal(lhs), Self::Metal(rhs), Self::Metal(c)) => {
792792
lhs.matmul_with_alpha_beta(rhs, c, s, bmnk, lhs_layout, rhs_layout, c_layout)
793793
}
794-
(lhs, rhs, c) => Err(Error::DeviceMismatchBinaryOp3 {
794+
(lhs, rhs, _c) => Err(Error::DeviceMismatchBinaryOp {
795795
lhs: lhs.device().location(),
796796
rhs: rhs.device().location(),
797-
c: c.device().location(),
798797
op: "matmul_with_alpha_beta",
799798
}
800799
.bt()),

candle-core/src/tensor.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1527,8 +1527,9 @@ impl Tensor {
15271527
.bt())?
15281528
}
15291529

1530-
let storage = self.storage().matmul(
1530+
let storage = self.storage().matmul_with_alpha(
15311531
&rhs.storage(),
1532+
None,
15321533
(batching, m, n, k),
15331534
self.layout(),
15341535
rhs.layout(),

candle-examples/examples/mamba-minimal/model.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
/// https://github.com/johnma2006/mamba-minimal/blob/master/model.py
33
/// Simple, minimal implementation of Mamba in one file of PyTorch.
44
use candle::{IndexOp, Module, Result, Tensor, D};
5-
use candle_nn::{layer_norm::RmsNormNonQuantized, RmsNorm, VarBuilder};
5+
use candle_nn::{RmsNorm, VarBuilder};
66

77
use candle_transformers::models::with_tracing::{linear, linear_no_bias, Linear};
88

@@ -144,12 +144,12 @@ impl Module for MambaBlock {
144144
#[derive(Clone, Debug)]
145145
pub struct ResidualBlock {
146146
mixer: MambaBlock,
147-
norm: RmsNorm<RmsNormNonQuantized>,
147+
norm: RmsNorm,
148148
}
149149

150150
impl ResidualBlock {
151151
pub fn new(cfg: &Config, vb: VarBuilder) -> Result<Self> {
152-
let norm = candle_nn::rms_norm_non_quant(cfg.d_model, 1e-5, vb.pp("norm"))?;
152+
let norm = candle_nn::rms_norm(cfg.d_model, 1e-5, vb.pp("norm"))?;
153153
let mixer = MambaBlock::new(cfg, vb.pp("mixer"))?;
154154
Ok(Self { mixer, norm })
155155
}
@@ -166,7 +166,7 @@ impl Module for ResidualBlock {
166166
pub struct Model {
167167
embedding: candle_nn::Embedding,
168168
layers: Vec<ResidualBlock>,
169-
norm_f: RmsNorm<RmsNormNonQuantized>,
169+
norm_f: RmsNorm,
170170
lm_head: Linear,
171171
}
172172

@@ -179,7 +179,7 @@ impl Model {
179179
let layer = ResidualBlock::new(cfg, vb_l.pp(layer_idx))?;
180180
layers.push(layer)
181181
}
182-
let norm_f = candle_nn::rms_norm_non_quant(cfg.d_model, 1e-5, vb.pp("norm_f"))?;
182+
let norm_f = candle_nn::rms_norm(cfg.d_model, 1e-5, vb.pp("norm_f"))?;
183183
let lm_head = Linear::from_weights(embedding.embeddings().clone(), None);
184184
Ok(Self {
185185
embedding,

candle-nn/Cargo.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ cudnn = ["candle/cudnn"]
3939
mkl = ["dep:intel-mkl-src", "candle/mkl"]
4040
metal = ["candle/metal", "dep:candle-metal-kernels", "dep:objc2-metal"]
4141
flash-attn = ["cuda", "dep:candle-flash-attn"]
42-
metal = ["candle/metal", "dep:candle-metal-kernels", "dep:objc2-metal"]
43-
flash-attn = ["cuda", "dep:candle-flash-attn"]
4442

4543
[[bench]]
4644
name = "bench_main"

candle-pyo3/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ half = { workspace = true }
2222
float8 = { workspace = true }
2323
intel-mkl-src = { workspace = true, optional = true }
2424
pyo3 = { version = "0.27", features = ["extension-module", "abi3-py313"] }
25-
float8 = { workspace = true }
2625

2726
[build-dependencies]
2827
pyo3-build-config = "0.27"

candle-transformers/src/models/based.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
88
use candle::{DType, Device, IndexOp, Module, Result, Tensor, D};
99
use candle_nn::{
10-
conv1d_no_bias, layer_norm::RmsNormNonQuantized, linear, linear_no_bias, ops::softmax_last_dim,
11-
rms_norm_non_quant, Conv1d, Conv1dConfig, Func, Linear, RmsNorm, VarBuilder,
10+
conv1d_no_bias, linear, linear_no_bias, ops::softmax_last_dim, rms_norm, Conv1d,
11+
Conv1dConfig, Func, Linear, RmsNorm, VarBuilder,
1212
};
1313
use std::sync::Arc;
1414

@@ -459,16 +459,16 @@ impl SequenceMixer {
459459
#[derive(Debug, Clone)]
460460
struct DecoderLayer {
461461
mlp: MLP,
462-
norm1: RmsNorm<RmsNormNonQuantized>,
463-
norm2: RmsNorm<RmsNormNonQuantized>,
462+
norm1: RmsNorm,
463+
norm2: RmsNorm,
464464
mixer: SequenceMixer,
465465
}
466466

467467
impl DecoderLayer {
468468
fn new(layer_idx: usize, cfg: &Config, vb: VarBuilder) -> Result<Self> {
469469
let mlp = MLP::new(cfg, vb.pp("mlp"))?;
470-
let norm1 = rms_norm_non_quant(cfg.hidden_size, cfg.layer_norm_epsilon, vb.pp("norm1"))?;
471-
let norm2 = rms_norm_non_quant(cfg.hidden_size, cfg.layer_norm_epsilon, vb.pp("norm2"))?;
470+
let norm1 = rms_norm(cfg.hidden_size, cfg.layer_norm_epsilon, vb.pp("norm1"))?;
471+
let norm2 = rms_norm(cfg.hidden_size, cfg.layer_norm_epsilon, vb.pp("norm2"))?;
472472

473473
let l_attn = cfg.alt_mixer_layers.contains(&layer_idx);
474474
let sw_attn = cfg.alt_mixer_2_layers.contains(&layer_idx);
@@ -509,7 +509,7 @@ impl DecoderLayer {
509509
pub struct Model {
510510
embed_tokens: super::with_tracing::Embedding,
511511
layers: Vec<DecoderLayer>,
512-
norm: RmsNorm<RmsNormNonQuantized>,
512+
norm: RmsNorm,
513513
lm_head: Linear,
514514
sliding_window: usize,
515515
device: Device,
@@ -528,7 +528,7 @@ impl Model {
528528
let layer = DecoderLayer::new(layer_idx, cfg, vb_l.pp(layer_idx))?;
529529
layers.push(layer)
530530
}
531-
let norm = rms_norm_non_quant(cfg.hidden_size, cfg.layer_norm_epsilon, vb_m.pp("ln_f"))?;
531+
let norm = rms_norm(cfg.hidden_size, cfg.layer_norm_epsilon, vb_m.pp("ln_f"))?;
532532
Ok(Self {
533533
embed_tokens,
534534
layers,

candle-transformers/src/models/beit.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,34 +86,34 @@ impl Attention {
8686
.contiguous()?;
8787

8888
let relative_coords = relative_coords.slice_assign(
89-
&[&(0..w_area), &(0..w_area), &(0..1)],
89+
&[0..w_area, 0..w_area, 0..1],
9090
&(relative_coords.i((0..w_area, 0..w_area, 0..1))? + (WINDOW_SIZE - 1) as f64)?,
9191
)?;
9292
let relative_coords = relative_coords.slice_assign(
93-
&[&(0..w_area), &(0..w_area), &(1..2)],
93+
&[0..w_area, 0..w_area, 1..2],
9494
&(relative_coords.i((0..w_area, 0..w_area, 1..2))? + (WINDOW_SIZE - 1) as f64)?,
9595
)?;
9696
let relative_coords = relative_coords.slice_assign(
97-
&[&(0..w_area), &(0..w_area), &(0..1)],
97+
&[0..w_area, 0..w_area, 0..1],
9898
&(relative_coords.i((.., .., 0..1))? * (2. * (WINDOW_SIZE as f64) - 1.))?,
9999
)?;
100100

101101
Tensor::zeros((w_area + 1, w_area + 1), DType::I64, device)?
102-
.slice_assign(&[&(1..), &(1..)], &relative_coords.sum(2)?)?
102+
.slice_assign(&[1.., 1..], &relative_coords.sum(2)?)?
103103
.slice_assign(
104-
&[&(0..1), &(0..(w_area + 1))],
104+
&[0..1, 0..(w_area + 1)],
105105
&(Tensor::ones((1, w_area + 1), DType::I64, device)?
106106
* ((num_relative_distance - 3) as f64))?
107107
.to_dtype(DType::I64)?,
108108
)?
109109
.slice_assign(
110-
&[&(0..(w_area + 1)), &(0..1)],
110+
&[0..(w_area + 1), 0..1],
111111
&(Tensor::ones((w_area + 1, 1), DType::I64, device)?
112112
* ((num_relative_distance - 2) as f64))?
113113
.to_dtype(DType::I64)?,
114114
)?
115115
.slice_assign(
116-
&[&(0..1), &(0..1)],
116+
&[0..1, 0..1],
117117
&(Tensor::ones((1, 1), DType::I64, device)?
118118
* ((num_relative_distance - 1) as f64))?
119119
.to_dtype(DType::I64)?,

0 commit comments

Comments
 (0)