mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-09-21 02:55:10 +02:00
commit
dc6adefd87
19 changed files with 230 additions and 130 deletions
14
README.md
14
README.md
|
@ -75,12 +75,12 @@ conda activate textgen
|
||||||
|
|
||||||
| System | GPU | Command |
|
| System | GPU | Command |
|
||||||
|--------|---------|---------|
|
|--------|---------|---------|
|
||||||
| Linux/WSL | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121` |
|
| Linux/WSL | NVIDIA | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu121` |
|
||||||
| Linux/WSL | CPU only | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu` |
|
| Linux/WSL | CPU only | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cpu` |
|
||||||
| Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6` |
|
| Linux | AMD | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/rocm5.6` |
|
||||||
| MacOS + MPS | Any | `pip3 install torch torchvision torchaudio` |
|
| MacOS + MPS | Any | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.*` |
|
||||||
| Windows | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121` |
|
| Windows | NVIDIA | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu121` |
|
||||||
| Windows | CPU only | `pip3 install torch torchvision torchaudio` |
|
| Windows | CPU only | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.*` |
|
||||||
|
|
||||||
The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
|
The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
|
||||||
|
|
||||||
|
@ -145,7 +145,7 @@ Then browse to
|
||||||
1) For Kepler GPUs and older, you will need to install CUDA 11.8 instead of 12:
|
1) For Kepler GPUs and older, you will need to install CUDA 11.8 instead of 12:
|
||||||
|
|
||||||
```
|
```
|
||||||
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu118
|
||||||
conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime
|
conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -408,6 +408,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
||||||
margin-bottom: 0 !important;
|
margin-bottom: 0 !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#default-tab .prose pre, #notebook-tab .prose pre {
|
||||||
|
overflow: scroll;
|
||||||
|
}
|
||||||
|
|
||||||
.message-body code {
|
.message-body code {
|
||||||
white-space: pre-wrap !important;
|
white-space: pre-wrap !important;
|
||||||
word-wrap: break-word !important;
|
word-wrap: break-word !important;
|
||||||
|
|
63
modules/llama_cpp_python_hijack.py
Normal file
63
modules/llama_cpp_python_hijack.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
from typing import Sequence
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
try:
|
||||||
|
import llama_cpp
|
||||||
|
except:
|
||||||
|
llama_cpp = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import llama_cpp_cuda
|
||||||
|
except:
|
||||||
|
llama_cpp_cuda = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import llama_cpp_cuda_tensorcores
|
||||||
|
except:
|
||||||
|
llama_cpp_cuda_tensorcores = None
|
||||||
|
|
||||||
|
|
||||||
|
def eval_with_progress(self, tokens: Sequence[int]):
|
||||||
|
"""
|
||||||
|
A copy of
|
||||||
|
|
||||||
|
https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py
|
||||||
|
|
||||||
|
with tqdm to show prompt processing progress.
|
||||||
|
"""
|
||||||
|
assert self._ctx.ctx is not None
|
||||||
|
assert self._batch.batch is not None
|
||||||
|
self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
|
||||||
|
|
||||||
|
if len(tokens) > 1:
|
||||||
|
progress_bar = tqdm(range(0, len(tokens), self.n_batch), desc="Prompt evaluation", leave=False)
|
||||||
|
else:
|
||||||
|
progress_bar = range(0, len(tokens), self.n_batch)
|
||||||
|
|
||||||
|
for i in progress_bar:
|
||||||
|
batch = tokens[i: min(len(tokens), i + self.n_batch)]
|
||||||
|
n_past = self.n_tokens
|
||||||
|
n_tokens = len(batch)
|
||||||
|
self._batch.set_batch(
|
||||||
|
batch=batch, n_past=n_past, logits_all=self.context_params.logits_all
|
||||||
|
)
|
||||||
|
self._ctx.decode(self._batch)
|
||||||
|
# Save tokens
|
||||||
|
self.input_ids[n_past: n_past + n_tokens] = batch
|
||||||
|
# Save logits
|
||||||
|
rows = n_tokens
|
||||||
|
cols = self._n_vocab
|
||||||
|
offset = (
|
||||||
|
0 if self.context_params.logits_all else n_tokens - 1
|
||||||
|
) # NOTE: Only save the last token logits if logits_all is False
|
||||||
|
self.scores[n_past + offset: n_past + n_tokens, :].reshape(-1)[
|
||||||
|
:
|
||||||
|
] = self._ctx.get_logits()[offset * cols: rows * cols]
|
||||||
|
# Update n_tokens
|
||||||
|
self.n_tokens += n_tokens
|
||||||
|
|
||||||
|
|
||||||
|
for lib in [llama_cpp, llama_cpp_cuda, llama_cpp_cuda_tensorcores]:
|
||||||
|
if lib is not None:
|
||||||
|
lib.Llama.eval = eval_with_progress
|
|
@ -7,7 +7,7 @@ from torch.nn import CrossEntropyLoss
|
||||||
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
||||||
from transformers.modeling_outputs import CausalLMOutputWithPast
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||||
|
|
||||||
from modules import RoPE, shared
|
from modules import RoPE, llama_cpp_python_hijack, shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -4,7 +4,7 @@ from functools import partial
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from modules import RoPE, shared
|
from modules import RoPE, llama_cpp_python_hijack, shared
|
||||||
from modules.callbacks import Iteratorize
|
from modules.callbacks import Iteratorize
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
from modules.text_generation import get_max_prompt_length
|
from modules.text_generation import get_max_prompt_length
|
||||||
|
|
|
@ -54,7 +54,7 @@ sampler_hijack.hijack_samplers()
|
||||||
|
|
||||||
|
|
||||||
def load_model(model_name, loader=None):
|
def load_model(model_name, loader=None):
|
||||||
logger.info(f"Loading {model_name}")
|
logger.info(f"Loading \"{model_name}\"")
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
shared.is_seq2seq = False
|
shared.is_seq2seq = False
|
||||||
|
@ -246,7 +246,7 @@ def llamacpp_loader(model_name):
|
||||||
else:
|
else:
|
||||||
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*.gguf'))[0]
|
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*.gguf'))[0]
|
||||||
|
|
||||||
logger.info(f"llama.cpp weights detected: {model_file}")
|
logger.info(f"llama.cpp weights detected: \"{model_file}\"")
|
||||||
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
@ -257,7 +257,7 @@ def llamacpp_HF_loader(model_name):
|
||||||
for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]:
|
for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]:
|
||||||
path = Path(f'{shared.args.model_dir}/{fname}')
|
path = Path(f'{shared.args.model_dir}/{fname}')
|
||||||
if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']):
|
if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']):
|
||||||
logger.info(f'Using tokenizer from: {path}')
|
logger.info(f'Using tokenizer from: \"{path}\"')
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
|
logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
|
||||||
|
@ -298,7 +298,7 @@ def ctransformers_loader(model_name):
|
||||||
logger.error("Could not find a model for ctransformers.")
|
logger.error("Could not find a model for ctransformers.")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
logger.info(f'ctransformers weights detected: {model_file}')
|
logger.info(f'ctransformers weights detected: \"{model_file}\"')
|
||||||
model, tokenizer = ctrans.from_pretrained(model_file)
|
model, tokenizer = ctrans.from_pretrained(model_file)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
@ -393,7 +393,7 @@ def HQQ_loader(model_name):
|
||||||
from hqq.core.quantize import HQQBackend, HQQLinear
|
from hqq.core.quantize import HQQBackend, HQQLinear
|
||||||
from hqq.engine.hf import HQQModelForCausalLM
|
from hqq.engine.hf import HQQModelForCausalLM
|
||||||
|
|
||||||
logger.info(f"Loading HQQ model with backend: {shared.args.hqq_backend}")
|
logger.info(f"Loading HQQ model with backend: \"{shared.args.hqq_backend}\"")
|
||||||
|
|
||||||
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
|
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
|
||||||
model = HQQModelForCausalLM.from_quantized(str(model_dir))
|
model = HQQModelForCausalLM.from_quantized(str(model_dir))
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import functools
|
import functools
|
||||||
|
import pprint
|
||||||
import random
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -90,7 +91,25 @@ def random_preset(state):
|
||||||
'eta_cutoff': [3, 6, 9, 12, 15, 18],
|
'eta_cutoff': [3, 6, 9, 12, 15, 18],
|
||||||
},
|
},
|
||||||
'flatten_distribution': {
|
'flatten_distribution': {
|
||||||
'temperature': [0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0],
|
'temperature': [0.1, 0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0, 5.0],
|
||||||
|
'dynamic_temperature': [
|
||||||
|
[0.1, 1],
|
||||||
|
[0.1, 1.5],
|
||||||
|
[0.1, 2],
|
||||||
|
[0.1, 5],
|
||||||
|
[0.5, 1],
|
||||||
|
[0.5, 1.5],
|
||||||
|
[0.5, 2],
|
||||||
|
[0.5, 5],
|
||||||
|
[0.8, 1],
|
||||||
|
[0.8, 1.5],
|
||||||
|
[0.8, 2],
|
||||||
|
[0.8, 5],
|
||||||
|
[1, 1.5],
|
||||||
|
[1, 2],
|
||||||
|
[1, 5]
|
||||||
|
],
|
||||||
|
'smoothing_factor': [0.2, 0.3, 0.6, 1.2]
|
||||||
},
|
},
|
||||||
'repetition': {
|
'repetition': {
|
||||||
'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
|
'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
|
||||||
|
@ -106,26 +125,42 @@ def random_preset(state):
|
||||||
for cat in params_and_values:
|
for cat in params_and_values:
|
||||||
choices = list(params_and_values[cat].keys())
|
choices = list(params_and_values[cat].keys())
|
||||||
if shared.args.loader is not None:
|
if shared.args.loader is not None:
|
||||||
choices = [x for x in choices if x in loaders_samplers[shared.args.loader]]
|
choices = [x for x in choices if loader_contains(x)]
|
||||||
|
|
||||||
if len(choices) > 0:
|
if len(choices) > 0:
|
||||||
choice = random.choice(choices)
|
choice = random.choice(choices)
|
||||||
generate_params[choice] = random.choice(params_and_values[cat][choice])
|
value = random.choice(params_and_values[cat][choice])
|
||||||
|
if choice == 'dynamic_temperature':
|
||||||
|
generate_params['dynamic_temperature'] = True
|
||||||
|
generate_params['dynatemp_low'] = value[0]
|
||||||
|
generate_params['dynatemp_high'] = value[1]
|
||||||
|
else:
|
||||||
|
generate_params[choice] = value
|
||||||
|
|
||||||
state.update(generate_params)
|
state.update(generate_params)
|
||||||
|
logger.info("GENERATED_PRESET=")
|
||||||
|
pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(state))
|
||||||
return state, *[generate_params[k] for k in presets_params()]
|
return state, *[generate_params[k] for k in presets_params()]
|
||||||
|
|
||||||
|
|
||||||
def generate_preset_yaml(state):
|
def loader_contains(sampler):
|
||||||
|
if sampler == 'dynamic_temperature' and 'dynatemp_low' in loaders_samplers[shared.args.loader]:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return sampler in loaders_samplers[shared.args.loader]
|
||||||
|
|
||||||
|
|
||||||
|
def remove_defaults(state):
|
||||||
defaults = default_preset()
|
defaults = default_preset()
|
||||||
data = {k: state[k] for k in presets_params()}
|
data = {k: state[k] for k in presets_params()}
|
||||||
|
|
||||||
# Remove entries that are identical to the defaults.
|
|
||||||
# sampler_priority is always saved because it is experimental
|
|
||||||
# and the default order may change.
|
|
||||||
|
|
||||||
for k in list(data.keys()):
|
for k in list(data.keys()):
|
||||||
if data[k] == defaults[k] and k != 'sampler_priority':
|
if data[k] == defaults[k]:
|
||||||
del data[k]
|
del data[k]
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def generate_preset_yaml(state):
|
||||||
|
data = remove_defaults(state)
|
||||||
return yaml.dump(data, sort_keys=False)
|
return yaml.dump(data, sort_keys=False)
|
||||||
|
|
|
@ -431,6 +431,7 @@ def get_logits_warper_patch(self, generation_config):
|
||||||
if shared.args.verbose:
|
if shared.args.verbose:
|
||||||
logger.info("WARPERS=")
|
logger.info("WARPERS=")
|
||||||
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint([x.__class__.__name__ for x in warpers])
|
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint([x.__class__.__name__ for x in warpers])
|
||||||
|
print()
|
||||||
|
|
||||||
if normalize is not None:
|
if normalize is not None:
|
||||||
warpers.append(normalize)
|
warpers.append(normalize)
|
||||||
|
|
|
@ -290,9 +290,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
|
||||||
if k in state:
|
if k in state:
|
||||||
generate_params[k] = state[k]
|
generate_params[k] = state[k]
|
||||||
|
|
||||||
if isinstance(state['sampler_priority'], list):
|
if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
|
||||||
generate_params['sampler_priority'] = state['sampler_priority']
|
generate_params['sampler_priority'] = state['sampler_priority']
|
||||||
elif isinstance(state['sampler_priority'], str):
|
elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
|
||||||
generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
|
generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
|
||||||
|
|
||||||
if state['negative_prompt'] != '':
|
if state['negative_prompt'] != '':
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64"
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,32 +28,33 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, no tensor cores)
|
# llama-cpp-python (CUDA, no tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, tensor cores)
|
# llama-cpp-python (CUDA, tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,17 +28,18 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.38+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.42+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.38+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.42+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,15 +28,16 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,9 +28,10 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,11 +28,12 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.38-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,7 +28,7 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,7 +28,7 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64"
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
@ -29,32 +28,33 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, no tensor cores)
|
# llama-cpp-python (CUDA, no tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, tensor cores)
|
# llama-cpp-python (CUDA, tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
|
|
@ -2,16 +2,15 @@ accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.12
|
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
hqq==0.1.2.post1
|
hqq==0.1.3
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
lm_eval==0.3.0
|
lm_eval==0.3.0
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.26.*
|
||||||
optimum==1.16.*
|
optimum==1.16.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.7.*
|
peft==0.8.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
|
|
|
@ -187,7 +187,7 @@ if __name__ == "__main__":
|
||||||
settings_file = Path('settings.json')
|
settings_file = Path('settings.json')
|
||||||
|
|
||||||
if settings_file is not None:
|
if settings_file is not None:
|
||||||
logger.info(f"Loading settings from {settings_file}")
|
logger.info(f"Loading settings from \"{settings_file}\"")
|
||||||
file_contents = open(settings_file, 'r', encoding='utf-8').read()
|
file_contents = open(settings_file, 'r', encoding='utf-8').read()
|
||||||
new_settings = json.loads(file_contents) if settings_file.suffix == "json" else yaml.safe_load(file_contents)
|
new_settings = json.loads(file_contents) if settings_file.suffix == "json" else yaml.safe_load(file_contents)
|
||||||
shared.settings.update(new_settings)
|
shared.settings.update(new_settings)
|
||||||
|
|
Loading…
Reference in a new issue