From 98ed6d3a666e3924410d34568b3e1919709656a2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:50:56 -0700 Subject: [PATCH] Don't use flash attention on Google Colab --- Colab-TextGen-GPU.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index 739232a4..8e305e1d 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -74,7 +74,7 @@ "# Parameters\n", "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n", "branch = \"8.0bpw\" #@param {type:\"string\"}\n", - "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n", + "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n", "api = False #@param {type:\"boolean\"}\n", "\n", "if api:\n", @@ -114,4 +114,4 @@ "outputs": [] } ] -} \ No newline at end of file +}