Skip to content

Commit

Permalink
Tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
EricLBuehler committed Oct 12, 2024
1 parent 818df9d commit a8acde3
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 17 deletions.
4 changes: 3 additions & 1 deletion examples/server/stresstest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import httpx
import textwrap
import json
import time


def log_response(response: httpx.Response):
Expand Down Expand Up @@ -40,6 +41,7 @@ def log_response(response: httpx.Response):
for i in range(1000):
messages.append({"role": "user", "content": "Hello! How are you? Please write generic binary search function in Rust."})
print("Sending", i)
start = time.time()
completion = client.chat.completions.create(
model="mistral",
messages=messages,
Expand All @@ -49,4 +51,4 @@ def log_response(response: httpx.Response):
temperature=0,
)
resp = completion.choices[0].message.content
print("Done")
print("Done", time.time()-start)
2 changes: 0 additions & 2 deletions mistralrs-core/src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ impl Engine {
let current_completion_ids: Vec<usize> =
scheduled.completion.iter().map(|seq| *seq.id()).collect();

println!("Running...");
let res = {
let mut pipeline = get_mut_arcmutex!(self.pipeline);
let pre_op = if !self.no_kv_cache
Expand Down Expand Up @@ -180,7 +179,6 @@ impl Engine {
)
.await
};
println!("Ran...");

handle_pipeline_forward_error!(
"completion step",
Expand Down
2 changes: 0 additions & 2 deletions mistralrs-core/src/pipeline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,6 @@ pub trait Pipeline:

match &logits[0] {
ForwardInputsResult::CausalGeneration { .. } => {
println!("Sampling...");
self.sample_causal_gen(
input_seqs,
logits
Expand All @@ -389,7 +388,6 @@ pub trait Pipeline:
rng,
)
.await?;
println!("Sampled!");
}
ForwardInputsResult::Image { .. } => {
send_responses(
Expand Down
4 changes: 0 additions & 4 deletions mistralrs-core/src/sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -677,9 +677,7 @@ impl Sequence {
}

pub fn add_choice_to_group(&self, choice: Choice) {
println!("Adding choice");
get_mut_group!(self).choices.push(choice);
println!("Added choice");
self.update_time_info();
}

Expand Down Expand Up @@ -819,9 +817,7 @@ impl SequenceGroup {
sender: Sender<Response>,
) -> Result<(), SendError<Response>> {
if self.choices.len() == self.n_choices {
println!("Sending response");
sender.send(Response::Done(response)).await?;
println!("Sent response");
}

Ok(())
Expand Down
20 changes: 12 additions & 8 deletions mistralrs/examples/stresstest/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use anyhow::Result;
use mistralrs::{IsqType, MemoryUsage, TextMessageRole, TextMessages, TextModelBuilder};
use mistralrs::{IsqType, MemoryUsage, RequestBuilder, TextMessageRole, TextModelBuilder};

const N_ITERS: u64 = 1000;
const BYTES_TO_MB: usize = 1024 * 1024;
Expand All @@ -8,24 +8,28 @@ const BYTES_TO_MB: usize = 1024 * 1024;
async fn main() -> Result<()> {
let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
.with_isq(IsqType::Q4K)
.with_prefix_cache_n(None)
.with_logging()
// .with_paged_attn(|| mistralrs::PagedAttentionMetaBuilder::default().build())?
.build()
.await?;

for i in 0..N_ITERS {
let messages = TextMessages::new().add_message(
TextMessageRole::User,
"Hello! How are you? Please write generic binary search function in Rust.",
);
let messages = RequestBuilder::new()
.add_message(
TextMessageRole::User,
"Hello! How are you? Please write generic binary search function in Rust.",
)
.set_deterministic_sampler();

println!("Sending request {}...", i+1);
let _response = model.send_chat_request(messages).await?;
println!("Sending request {}...", i + 1);
let response = model.send_chat_request(messages).await?;

let amount = MemoryUsage.get_memory_available(&model.config().device)? / BYTES_TO_MB;

println!("{amount}");

println!("{}", response.usage.total_time_sec);
println!("{:?}", response.choices[0].message.content);
}

Ok(())
Expand Down

0 comments on commit a8acde3

Please sign in to comment.