pub fn quantized_matmul_device(
    x: impl AsRef<Array>,
    w: impl AsRef<Array>,
    scales: impl AsRef<Array>,
    biases: impl AsRef<Array>,
    transpose: impl Into<Option<bool>>,
    group_size: impl Into<Option<i32>>,
    bits: impl Into<Option<i32>>,
    stream: impl AsRef<Stream>,
) -> Result<Array>Expand description
Perform the matrix multiplication with the quantized matrix w. The quantization uses one
floating point scale and bias per group_size of elements. Each element in w takes bits
bits and is packed in an unsigned 32 bit integer.