From 1d2fd99d2f507476bba475976b1629b5ed2c69fa Mon Sep 17 00:00:00 2001
From: Aisuko <urakiny@gmail.com>
Date: Thu, 2 Nov 2023 12:53:19 +1100
Subject: [PATCH] Implement MNIST model and inference

Signed-off-by: Aisuko <urakiny@gmail.com>
---
 .gitignore                                |   1 +
 backend/rust/Makefile                     |  10 ++
 backend/rust/backend-burn/Cargo.toml      |   1 +
 backend/rust/backend-burn/src/main.rs     |  21 ++-
 backend/rust/models/Cargo.toml            |   9 +-
 backend/rust/models/src/lib.rs            |  10 +-
 backend/rust/models/src/mnist/mnist.rs    | 185 ++++++++++++++++++++++
 backend/rust/models/src/mnist/mod.rs      |  33 ++++
 backend/rust/models/src/onnx/inference.rs |   1 -
 backend/rust/models/src/onnx/mod.rs       |  90 -----------
 10 files changed, 267 insertions(+), 94 deletions(-)
 create mode 100644 backend/rust/models/src/mnist/mnist.rs
 create mode 100644 backend/rust/models/src/mnist/mod.rs
 delete mode 100644 backend/rust/models/src/onnx/inference.rs
 delete mode 100644 backend/rust/models/src/onnx/mod.rs
diff --git a/.gitignore b/.gitignore
index 384da58dc30..ef15c70d86e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,4 @@ prepare
 /ggml-metal.metal
 target/
 Cargo.lock
+model.bin
diff --git a/backend/rust/Makefile b/backend/rust/Makefile
index 2b25e025084..ad550d67e9b 100644
--- a/backend/rust/Makefile
+++ b/backend/rust/Makefile
@@ -34,6 +34,16 @@ burn:
 	@echo "Burning..."
 	@cargo run --bin server --package backend-burn
 
+
+############################################################################################################
+# gRPC testing commands
+
+
+.PHONY: list
+list:
+	@echo "Burning..."
+	@grpcurl -plaintext -import-path ../../../pkg/grpc/proto -proto backend.proto list backend.Backend
+
 .PHONY: health
 health:
 	@echo "Burning..."
diff --git a/backend/rust/backend-burn/Cargo.toml b/backend/rust/backend-burn/Cargo.toml
index 007b3b44a4a..f97347d324b 100644
--- a/backend/rust/backend-burn/Cargo.toml
+++ b/backend/rust/backend-burn/Cargo.toml
@@ -13,6 +13,7 @@ path = "src/main.rs"
 
 # import bunker here
 bunker = { path = "../bunker" }
+models = { path = "../models" }
 
 tokio = "1.33.0"
 async-trait = "0.1.74"
diff --git a/backend/rust/backend-burn/src/main.rs b/backend/rust/backend-burn/src/main.rs
index 6bbb2f7c6b7..9ee1d96cf4c 100644
--- a/backend/rust/backend-burn/src/main.rs
+++ b/backend/rust/backend-burn/src/main.rs
@@ -14,6 +14,7 @@ use async_trait::async_trait;
 
 use tracing::{event, span, Level};
 
+use models::*;
 // implement BackendService trait in bunker
 
 #[derive(Default, Debug)]
@@ -35,7 +36,25 @@ impl BackendService for BurnBackend {
 
     #[tracing::instrument]
     async fn predict(&self, request: Request<PredictOptions>) -> Result<Response<Reply>, Status> {
-        todo!()
+        let mut models: Vec<Box<dyn LLM>> = vec![Box::new(models::MNINST::new())];
+        let result = models[0].predict(request.into_inner());
+
+        match result {
+            Ok(res) => {
+                let reply = Reply {
+                    message: res.into(),
+                };
+                let res = Response::new(reply);
+                Ok(res)
+            }
+            Err(e) => {
+                let reply = Reply {
+                    message: e.to_string().into(),
+                };
+                let res = Response::new(reply);
+                Ok(res)
+            }
+        }
     }
 
     #[tracing::instrument]
diff --git a/backend/rust/models/Cargo.toml b/backend/rust/models/Cargo.toml
index f092f75b6c9..8cb651876cf 100644
--- a/backend/rust/models/Cargo.toml
+++ b/backend/rust/models/Cargo.toml
@@ -5,6 +5,13 @@ edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+[features]
+default = ["ndarray"]
+
+ndarray = ["burn/ndarray"]
+wgpu = ["burn/wgpu"]
+
 [dependencies]
-burn = { version="0.10.0", features=["ndarray"] } # https://github.com/mudler/LocalAI/discussions/1219
+bunker = { path = "../bunker" }
+burn = { version="0.10.0", features=["ndarray","wgpu"] } # https://github.com/mudler/LocalAI/discussions/1219
 serde = "1.0.190"
diff --git a/backend/rust/models/src/lib.rs b/backend/rust/models/src/lib.rs
index bb2b1591647..f3302e83ef7 100644
--- a/backend/rust/models/src/lib.rs
+++ b/backend/rust/models/src/lib.rs
@@ -1 +1,9 @@
-pub(crate) mod onnx;
+pub(crate) mod mnist;
+pub use mnist::mnist::MNINST;
+
+use bunker::pb::{ModelOptions, PredictOptions};
+
+pub trait LLM {
+    fn load_model(&mut self, request: ModelOptions) -> Result<String, Box<dyn std::error::Error>>;
+    fn predict(&mut self, request: PredictOptions) -> Result<String, Box<dyn std::error::Error>>;
+}
diff --git a/backend/rust/models/src/mnist/mnist.rs b/backend/rust/models/src/mnist/mnist.rs
new file mode 100644
index 00000000000..995b2706ed0
--- /dev/null
+++ b/backend/rust/models/src/mnist/mnist.rs
@@ -0,0 +1,185 @@
+//! Defination of a mninst model and config of it.
+//! The source code is from https://github.com/burn-rs/burn/blob/main/examples/mnist-inference-web/src/model.rs
+//! The license is Apache-2.0 and MIT.
+//! Adapter by Aisuko
+
+use burn::{
+    backend::wgpu::{compute::init_async, AutoGraphicsApi, WgpuDevice},
+    module::Module,
+    nn::{self, BatchNorm, PaddingConfig2d},
+    record::{BinBytesRecorder, FullPrecisionSettings, Recorder},
+    tensor::{backend::Backend, Tensor},
+};
+
+// https://github.com/burn-rs/burn/blob/main/examples/mnist-inference-web/model.bin
+static STATE_ENCODED: &[u8] = include_bytes!("model.bin");
+
+const NUM_CLASSES: usize = 10;
+
+#[derive(Module, Debug)]
+/// A struct representing an MNINST model.
+pub struct MNINST<B: Backend> {
+    /// The first convolutional block of the model.
+    conv1: ConvBlock<B>,
+    /// The second convolutional block of the model.
+    conv2: ConvBlock<B>,
+    /// The third convolutional block of the model.
+    conv3: ConvBlock<B>,
+    /// A dropout layer used in the model.
+    dropout: nn::Dropout,
+    /// The first fully connected layer of the model.
+    fc1: nn::Linear<B>,
+    /// The second fully connected layer of the model.
+    fc2: nn::Linear<B>,
+    /// The activation function used in the model.
+    activation: nn::GELU,
+}
+
+impl<B: Backend> MNINST<B> {
+    pub fn new() -> Self {
+        let conv1 = ConvBlock::new([1, 8], [3, 3]); // 1 input channel, 8 output channels, 3x3 kernel size
+        let conv2 = ConvBlock::new([8, 16], [3, 3]); // 8 input channels, 16 output channels, 3x3 kernel size
+        let conv3 = ConvBlock::new([16, 24], [3, 3]); // 16 input channels, 24 output channels, 3x3 kernel size
+        let hidden_size = 24 * 22 * 22;
+        let fc1 = nn::LinearConfig::new(hidden_size, 32)
+            .with_bias(false)
+            .init();
+        let fc2 = nn::LinearConfig::new(32, NUM_CLASSES)
+            .with_bias(false)
+            .init();
+
+        let dropout = nn::DropoutConfig::new(0.5).init();
+
+        let instance = Self {
+            conv1: conv1,
+            conv2: conv2,
+            conv3: conv3,
+            dropout: dropout,
+            fc1: fc1,
+            fc2: fc2,
+            activation: nn::GELU::new(),
+        };
+        let record = BinBytesRecorder::<FullPrecisionSettings>::default()
+            .load(STATE_ENCODED.to_vec())
+            .expect("Failed to decode state");
+
+        instance.load_record(record)
+    }
+
+    /// Applies the forward pass of the neural network on the given input tensor.
+    ///
+    /// # Arguments
+    ///
+    /// * `input` - A 3-dimensional tensor of shape [batch_size, height, width].
+    ///
+    /// # Returns
+    ///
+    /// A 2-dimensional tensor of shape [batch_size, num_classes] containing the output of the neural network.
+    pub fn forward(&self, input: Tensor<B, 3>) -> Tensor<B, 2> {
+        // Get the dimensions of the input tensor
+        let [batch_size, height, width] = input.dims();
+        // Reshape the input tensor to have a shape of [batch_size, 1, height, width] and detach it
+        let x = input.reshape([batch_size, 1, height, width]).detach();
+        // Apply the first convolutional layer to the input tensor
+        let x = self.conv1.forward(x);
+        // Apply the second convolutional layer to the output of the first convolutional layer
+        let x = self.conv2.forward(x);
+        // Apply the third convolutional layer to the output of the second convolutional layer
+        let x = self.conv3.forward(x);
+
+        // Get the dimensions of the output tensor from the third convolutional layer
+        let [batch_size, channels, height, width] = x.dims();
+        // Reshape the output tensor to have a shape of [batch_size, channels*height*width]
+        let x = x.reshape([batch_size, channels * height * width]);
+
+        // Apply dropout to the output of the third convolutional layer
+        let x = self.dropout.forward(x);
+        // Apply the first fully connected layer to the output of the dropout layer
+        let x = self.fc1.forward(x);
+        // Apply the activation function to the output of the first fully connected layer
+        let x = self.activation.forward(x);
+
+        // Apply the second fully connected layer to the output of the activation function
+        self.fc2.forward(x)
+    }
+
+    pub fn inference(&mut self, input: &[f32]) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
+        // Reshape from the 1D array to 3d tensor [batch, height, width]
+        let input: Tensor<B, 3> = Tensor::from_floats(input).reshape([1, 28, 28]);
+
+        // Normalize input: make between [0,1] and make the mean=0 and std=1
+        // values mean=0.1307, std=0.3081
+        // Source: https://github.com/pytorch/examples/blob/54f4572509891883a947411fd7239237dd2a39c3/mnist/main.py#L122
+        let input = ((input / 255) - 0.1307) / 0.3081;
+
+        // Run the tensor input through the model
+        let output: Tensor<B, 2> = self.forward(input);
+
+        // Convert the model output into probalibility distribution using softmax formula
+        let output = burn::tensor::activation::softmax(output, 1);
+
+        // Flatten oupuut tensor with [1,10] shape into boxed slice of [f32]
+        let output = output.into_data().convert::<f32>().value;
+
+        Ok(output)
+    }
+}
+
+/// A struct representing a convolutional block in a neural network model.
+#[derive(Module, Debug)]
+pub struct ConvBlock<B: Backend> {
+    /// A 2D convolutional layer.
+    conv: nn::conv::Conv2d<B>,
+    /// A batch normalization layer.
+    norm: BatchNorm<B, 2>,
+    /// A GELU activation function.
+    activation: nn::GELU,
+}
+
+/// A convolutional block with batch normalization and GELU activation.
+impl<B: Backend> ConvBlock<B> {
+    /// Creates a new `ConvBlock` with the given number of output channels and kernel size.
+    pub fn new(channels: [usize; 2], kernel_size: [usize; 2]) -> Self {
+        // Initialize a 2D convolutional layer with the given output channels and kernel size,
+        // and set the padding to "valid".
+        let conv = nn::conv::Conv2dConfig::new(channels, kernel_size)
+            .with_padding(PaddingConfig2d::Valid)
+            .init();
+
+        // Initialize a batch normalization layer with the number of channels in the second dimension of the output.
+        let norm = nn::BatchNormConfig::new(channels[1]).init();
+
+        // Create a new `ConvBlock` with the initialized convolutional and batch normalization layers,
+        // and a GELU activation function.
+        Self {
+            conv: conv,
+            norm: norm,
+            activation: nn::GELU::new(),
+        }
+    }
+
+    /// Applies the convolutional block to the given input tensor.
+    pub fn forward(&self, input: Tensor<B, 4>) -> Tensor<B, 4> {
+        // Apply the convolutional layer to the input tensor.
+        let x = self.conv.forward(input);
+
+        // Apply the batch normalization layer to the output of the convolutional layer.
+        let x = self.norm.forward(x);
+
+        // Apply the GELU activation function to the output of the batch normalization layer.
+        self.activation.forward(x)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[cfg(feature = "ndarray")]
+    pub type Backend = burn::backend::NdArrayBackend<f32>;
+    #[test]
+    fn test_inference() {
+        let mut model = MNINST::<Backend>::new();
+        let output = model.inference(&[0.0; 28 * 28]).unwrap();
+        assert_eq!(output.len(), 10);
+    }
+}
diff --git a/backend/rust/models/src/mnist/mod.rs b/backend/rust/models/src/mnist/mod.rs
new file mode 100644
index 00000000000..d53b76c6c7a
--- /dev/null
+++ b/backend/rust/models/src/mnist/mod.rs
@@ -0,0 +1,33 @@
+use crate::LLM;
+use bunker::pb::{ModelOptions, PredictOptions};
+
+pub(crate) mod mnist;
+
+#[cfg(feature = "ndarray")]
+pub type Backend = burn::backend::NdArrayBackend<f32>;
+
+impl LLM for mnist::MNINST<Backend> {
+    fn load_model(&mut self, request: ModelOptions) -> Result<String, Box<dyn std::error::Error>> {
+        todo!("load model")
+    }
+
+    fn predict(&mut self, pre_ops: PredictOptions) -> Result<String, Box<dyn std::error::Error>> {
+        // convert prost::alloc::string::String to &[f32]
+        let input = pre_ops.prompt.as_bytes();
+        let input = input.iter().map(|x| *x as f32).collect::<Vec<f32>>();
+
+        let result = self.inference(&input);
+
+        match result {
+            Ok(output) => {
+                let output = output
+                    .iter()
+                    .map(|f| f.to_string())
+                    .collect::<Vec<String>>()
+                    .join(",");
+                Ok(output)
+            }
+            Err(e) => Err(e),
+        }
+    }
+}
diff --git a/backend/rust/models/src/onnx/inference.rs b/backend/rust/models/src/onnx/inference.rs
deleted file mode 100644
index febee9cf4b0..00000000000
--- a/backend/rust/models/src/onnx/inference.rs
+++ /dev/null
@@ -1 +0,0 @@
-use std::env::args;
diff --git a/backend/rust/models/src/onnx/mod.rs b/backend/rust/models/src/onnx/mod.rs
deleted file mode 100644
index 6cb6eb00045..00000000000
--- a/backend/rust/models/src/onnx/mod.rs
+++ /dev/null
@@ -1,90 +0,0 @@
-//! Defination of a mninst model and config of it.
-//! The source code is from https://github.com/burn-rs/burn/blob/main/examples/mnist-inference-web/src/model.rs
-//! The license is Apache-2.0 and MIT.
-//! Adapter by Aisuko
-
-pub(crate) mod inference;
-use inference::*;
-
-use burn::{
-    module::Module,
-    nn::{self, BatchNorm, PaddingConfig2d},
-    tensor::{backend::Backend, Tensor},
-};
-
-const NUM_CLASSES: usize = 10;
-
-#[derive(Module, Debug)]
-/// A struct representing an ONNX model.
-pub struct Model<B: Backend> {
-    /// The first convolutional block of the model.
-    conv1: ConvBlock<B>,
-    /// The second convolutional block of the model.
-    conv2: ConvBlock<B>,
-    /// The third convolutional block of the model.
-    conv3: ConvBlock<B>,
-    /// A dropout layer used in the model.
-    dropout: nn::Dropout,
-    /// The first fully connected layer of the model.
-    fc1: nn::Linear<B>,
-    /// The second fully connected layer of the model.
-    fc2: nn::Linear<B>,
-    /// The activation function used in the model.
-    activation: nn::GELU,
-}
-
-impl<B: Backend> Model<B> {
-    pub fn new() -> Self {
-        todo!("Implement the Model::new() function")
-    }
-
-    pub fn forward(&self, input: Tensor<B, 3>) -> Tensor<B, 2> {
-        todo!("Implement the Model::forward() function")
-    }
-}
-
-/// A struct representing a convolutional block in a neural network model.
-#[derive(Module, Debug)]
-pub struct ConvBlock<B: Backend> {
-    /// A 2D convolutional layer.
-    conv: nn::conv::Conv2d<B>,
-    /// A batch normalization layer.
-    norm: BatchNorm<B, 2>,
-    /// A GELU activation function.
-    activation: nn::GELU,
-}
-
-/// A convolutional block with batch normalization and GELU activation.
-impl<B: Backend> ConvBlock<B> {
-    /// Creates a new `ConvBlock` with the given number of output channels and kernel size.
-    pub fn new(channels: [usize; 2], kernel_size: [usize; 2]) -> Self {
-        // Initialize a 2D convolutional layer with the given output channels and kernel size,
-        // and set the padding to "valid".
-        let conv = nn::conv::Conv2dConfig::new(channels, kernel_size)
-            .with_padding(PaddingConfig2d::Valid)
-            .init();
-
-        // Initialize a batch normalization layer with the number of channels in the second dimension of the output.
-        let norm = nn::BatchNormConfig::new(channels[1]).init();
-
-        // Create a new `ConvBlock` with the initialized convolutional and batch normalization layers,
-        // and a GELU activation function.
-        Self {
-            conv: conv,
-            norm: norm,
-            activation: nn::GELU::new(),
-        }
-    }
-
-    /// Applies the convolutional block to the given input tensor.
-    pub fn forward(&self, input: Tensor<B, 4>) -> Tensor<B, 4> {
-        // Apply the convolutional layer to the input tensor.
-        let x = self.conv.forward(input);
-
-        // Apply the batch normalization layer to the output of the convolutional layer.
-        let x = self.norm.forward(x);
-
-        // Apply the GELU activation function to the output of the batch normalization layer.
-        self.activation.forward(x)
-    }
-}