Don't use flash attention on Google Colab

This commit is contained in:
oobabooga 2024-07-23 19:50:56 -07:00
parent 9d5513fda0
commit 98ed6d3a66

View file

@ -74,7 +74,7 @@
"# Parameters\n",
"model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n",
"branch = \"8.0bpw\" #@param {type:\"string\"}\n",
"command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
"command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n",
"api = False #@param {type:\"boolean\"}\n",
"\n",
"if api:\n",