mlx_rs/nn/
linear.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
use std::iter::once;

use crate::{error::Exception, quantization::Quantizable, Array};
use mlx_internal_macros::{Buildable, Builder};

use crate::{
    macros::ModuleParameters,
    module::{Module, Param},
};

use super::QuantizedLinear;

/// Builder for [`Linear`] module
#[derive(Debug, Clone, Builder)]
#[builder(
    root = crate,
    build_with = build_linear,
    err = Exception,
)]
pub struct LinearBuilder {
    /// The number of input dimensions.
    pub input_dims: i32,

    /// The number of output dimensions.
    pub output_dims: i32,

    /// Whether to include bias in the linear layer. Default to [`Linear::DEFAULT_BIAS`].
    #[builder(optional, default = Linear::DEFAULT_BIAS)]
    pub bias: bool,
}

/// Builds a new [`Linear`] layer.
fn build_linear(builder: LinearBuilder) -> Result<Linear, Exception> {
    let input_dims = builder.input_dims;
    let output_dims = builder.output_dims;
    let with_bias = builder.bias;

    let scale = f32::sqrt(1.0 / (input_dims as f32));
    let weight = crate::random::uniform::<_, f32>(-scale, scale, &[output_dims, input_dims], None)?;

    let bias = if with_bias {
        Some(crate::random::uniform::<_, f32>(
            -scale,
            scale,
            &[output_dims],
            None,
        )?)
    } else {
        None
    };

    Ok(Linear {
        weight: Param::new(weight),
        bias: Param::new(bias),
    })
}

/// Applies an affine transformation to the input.
#[derive(Debug, Clone, ModuleParameters, Buildable)]
#[module(root = crate)]
#[buildable(root = crate)]
pub struct Linear {
    /// The weight of the linear layer.
    #[param]
    pub weight: Param<Array>,

    /// The bias of the linear layer.
    #[param]
    pub bias: Param<Option<Array>>,
}

impl Linear {
    /// Default value for `with_bias`
    pub const DEFAULT_BIAS: bool = true;

    /// Returns the shape of the linear layer.
    pub fn shape(&self) -> (i32, i32) {
        let weight_shape = self.weight.as_ref().shape();
        (weight_shape[0], weight_shape[1])
    }
}

impl Module<&Array> for Linear {
    type Error = Exception;
    type Output = Array;

    fn forward(&mut self, x: &Array) -> Result<Array, Self::Error> {
        match &self.bias.value {
            Some(bias) => crate::ops::addmm(bias, x, self.weight.value.t(), None, None),
            None => crate::ops::matmul(x, self.weight.value.t()),
        }
    }

    fn training_mode(&mut self, _: bool) {}
}

impl Quantizable for Linear {
    type Quantized = QuantizedLinear;
    type QuantizationError = Exception;

    fn try_into_quantized(
        self,
        group_size: i32,
        bits: i32,
    ) -> Result<Self::Quantized, Self::QuantizationError> {
        QuantizedLinear::try_from_linear(self, group_size, bits)
    }
}

/// Builder for [`Bilinear`] module
#[derive(Debug, Clone, Builder)]
#[builder(
    root = crate,
    build_with = build_bilinear,
    err = Exception,
)]
pub struct BilinearBuilder {
    /// The number of input dimensions for the first input.
    pub input_dims_1: i32,

    /// The number of input dimensions for the second input.
    pub input_dims_2: i32,

    /// The number of output dimensions.
    pub output_dims: i32,

    /// Whether to include bias in the bilinear layer. Default to [Bilinear::DEFAULT_BIAS].
    #[builder(optional, default = Bilinear::DEFAULT_BIAS)]
    pub bias: bool,
}

fn build_bilinear(builder: BilinearBuilder) -> Result<Bilinear, Exception> {
    let input_dims_1 = builder.input_dims_1;
    let input_dims_2 = builder.input_dims_2;
    let output_dims = builder.output_dims;
    let with_bias = builder.bias;

    let scale = f32::sqrt(1.0 / (input_dims_1 as f32));
    let weights = crate::random::uniform::<_, f32>(
        -scale,
        scale,
        &[output_dims, input_dims_2, input_dims_1],
        None,
    )?;

    let bias = if with_bias {
        Some(crate::random::uniform::<_, f32>(
            -scale,
            scale,
            &[output_dims],
            None,
        )?)
    } else {
        None
    };

    Ok(Bilinear {
        weights: Param::new(weights),
        bias: Param::new(bias),
    })
}

/// Applies a bilinear transformation to the inputs.
#[derive(Debug, Clone, ModuleParameters, Buildable)]
#[module(root = crate)]
#[buildable(root = crate)]
pub struct Bilinear {
    /// The weight of the bilinear layer.
    #[param]
    pub weights: Param<Array>,

    /// The bias of the bilinear layer.
    #[param]
    pub bias: Param<Option<Array>>,
}

impl Bilinear {
    /// Default value for `with_bias`
    pub const DEFAULT_BIAS: bool = true;
}

impl Module<&Array> for Bilinear {
    type Error = Exception;
    type Output = Array;

    fn forward(&mut self, x: &Array) -> Result<Array, Self::Error> {
        let shape = self.weights.shape();
        let (out, in2, in1) = (shape[0], shape[1], shape[2]);
        let x_shape = &x.shape()[..x.shape().len() - 1];
        let x1 = x.reshape(&[-1, in1])?;
        let x2 = x.reshape(&[-1, 1, in2])?;

        // perform the bilinear transform
        let w = self.weights.reshape(&[out * in2, in1])?;
        let mut y = crate::ops::matmul(&x1, w.t())?;
        y = y.reshape(&[-1, out, in2])?.swap_axes(-2, -1)?;
        y = crate::ops::matmul(&x2, &y)?;
        y = y.squeeze(&[1])?;

        // reset the shape
        let new_shape = x_shape.iter().cloned().chain(once(out)).collect::<Vec<_>>();
        y = y.reshape(&new_shape)?;

        if let Some(bias) = &self.bias.value {
            y = crate::ops::add(&y, bias)?;
        }

        Ok(y)
    }

    fn training_mode(&mut self, _: bool) {}
}

// The following tests are ported from the swift binding:
// mlx-swift/Tests/MLXTests/IntegrationTests.swift
#[cfg(test)]
mod tests {
    use crate::{random::uniform, Dtype};
    use float_eq::assert_float_eq;

    use super::*;

    #[test]
    fn test_linear() {
        crate::random::seed(744).unwrap();
        let a = uniform::<_, f32>(0.0, 1.0, &[2, 8, 16], None).unwrap();
        assert_eq!(a.shape(), &[2, 8, 16]);
        assert_eq!(a.dtype(), Dtype::Float32);
        assert_float_eq!(
            a.mean(None, None).unwrap().item::<f32>(),
            0.508_688_57,
            abs <= 0.010_173_771_5
        );
        assert_float_eq!(
            a.sum(None, None).unwrap().item::<f32>(),
            130.224_27,
            abs <= 2.604_485_5
        );
        let result = Linear::new(16, 5).unwrap().forward(&a).unwrap();
        assert_eq!(result.shape(), &[2, 8, 5]);
        assert_eq!(result.dtype(), Dtype::Float32);
        assert_float_eq!(
            result.mean(None, None).unwrap().item::<f32>(),
            0.104_193_09,
            abs <= 0.002_083_861_7
        );
        assert_float_eq!(
            result.sum(None, None).unwrap().item::<f32>(),
            8.335_447,
            abs <= 0.166_708_95
        );
    }
}