From 81c1910c8634f36a183378c27d1e69096c0ca619 Mon Sep 17 00:00:00 2001 From: Jari Van Melckebeke Date: Tue, 25 Apr 2023 14:56:21 +0200 Subject: [PATCH] fixed small typo in code example (#22982) fixed typo in code example fixed a really small typo in the docs of single gpu inference --- docs/source/en/perf_infer_gpu_one.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/en/perf_infer_gpu_one.mdx b/docs/source/en/perf_infer_gpu_one.mdx index 55b3b9fd99..d97e163a64 100644 --- a/docs/source/en/perf_infer_gpu_one.mdx +++ b/docs/source/en/perf_infer_gpu_one.mdx @@ -71,7 +71,7 @@ model_name = "bigscience/bloom-2b5" tokenizer = AutoTokenizer.from_pretrained(model_name) model_8bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True) -text = "Hello, my llama is cute" +prompt = "Hello, my llama is cute" inputs = tokenizer(prompt, return_tensors="pt").to("cuda") generated_ids = model.generate(**inputs) outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) @@ -105,4 +105,4 @@ Check out the demo for running T5-11b (42GB in fp32)! Using 8-bit quantization o Or this demo for BLOOM-3B: -[![Open In Colab: BLOOM-3b demo](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1qOjXfQIAULfKvZqwCen8-MoWKGdSatZ4?usp=sharing) \ No newline at end of file +[![Open In Colab: BLOOM-3b demo](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1qOjXfQIAULfKvZqwCen8-MoWKGdSatZ4?usp=sharing)