diff --git a/extensions/multimodal/README.md b/extensions/multimodal/README.md index 9a33ebe3..d462d435 100644 --- a/extensions/multimodal/README.md +++ b/extensions/multimodal/README.md @@ -13,7 +13,10 @@ https://user-images.githubusercontent.com/3718215/233817203-69b57e77-0c55-4fd6-b To run this extension, download a LLM that supports multimodality, and then start server.py with the appropriate `--multimodal-pipeline` argument. Examples: ``` +# LLaVA 1.5 13B has the best performance python server.py --model liuhaotian_llava-v1.5-13b --multimodal-pipeline llava-v1.5-13b --load-in-4bit +# LLaVA 1.5 7B is relatively weaker, but requires less memory +python server.py --model liuhaotian_llava-v1.5-7b --multimodal-pipeline llava-v1.5-7b --load-in-4bit python server.py --model TheBloke_llava-v1.5-13B-GPTQ_gptq-4bit-32g-actorder_True --multimodal-pipeline llava-v1.5-13b --disable_exllama --loader autogptq python server.py --model wojtab_llava-7b-v0-4bit-128g --multimodal-pipeline llava-7b python server.py --model wojtab_llava-13b-v0-4bit-128g --multimodal-pipeline llava-13b diff --git a/extensions/multimodal/pipelines/llava/llava.py b/extensions/multimodal/pipelines/llava/llava.py index db0afb1c..09b5aff7 100644 --- a/extensions/multimodal/pipelines/llava/llava.py +++ b/extensions/multimodal/pipelines/llava/llava.py @@ -248,3 +248,15 @@ class LLaVA_v1_5_13B_Pipeline(LLaVA_v0_13B_Pipeline): @staticmethod def placeholder_embeddings() -> torch.Tensor: return LLaVA_v0_Pipeline.embed_tokens(encode(""*576, add_bos_token=False)[0]) + +class LLaVA_v1_5_7B_Pipeline(LLaVA_v1_5_13B_Pipeline): + @staticmethod + def name() -> str: + return "llava-v1.5-7b" + + @staticmethod + def llava_projector_shape() -> Tuple[int, int]: + return (1024, 4096, 4096) + @staticmethod + def llava_projector_repo() -> str: + return "liuhaotian/llava-v1.5-7b" \ No newline at end of file diff --git a/extensions/multimodal/pipelines/llava/pipelines.py b/extensions/multimodal/pipelines/llava/pipelines.py index 975ddb84..e6833ed6 100644 --- a/extensions/multimodal/pipelines/llava/pipelines.py +++ b/extensions/multimodal/pipelines/llava/pipelines.py @@ -2,7 +2,7 @@ from typing import Optional from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline -available_pipelines = ['llava-7b', 'llava-13b', 'llava-llama-2-13b', 'llava-v1.5-13b'] +available_pipelines = ['llava-7b', 'llava-13b', 'llava-llama-2-13b', 'llava-v1.5-13b', 'llava-v1.5-7b'] def get_pipeline(name: str, params: dict) -> Optional[AbstractMultimodalPipeline]: @@ -15,6 +15,9 @@ def get_pipeline(name: str, params: dict) -> Optional[AbstractMultimodalPipeline if name == 'llava-llama-2-13b': from .llava import LLaVA_LLaMA_2_13B_Pipeline return LLaVA_LLaMA_2_13B_Pipeline(params) + if name == 'llava-v1.5-7b': + from .llava import LLaVA_v1_5_7B_Pipeline + return LLaVA_v1_5_7B_Pipeline(params) if name == 'llava-v1.5-13b': from .llava import LLaVA_v1_5_13B_Pipeline return LLaVA_v1_5_13B_Pipeline(params) @@ -32,6 +35,9 @@ def get_pipeline_from_model_name(model_name: str, params: dict) -> Optional[Abst if '13b' in model_name.lower(): from .llava import LLaVA_v1_5_13B_Pipeline return LLaVA_v1_5_13B_Pipeline(params) + if '7b' in model_name.lower(): + from .llava import LLaVA_v1_5_7B_Pipeline + return LLaVA_v1_5_7B_Pipeline(params) else: if '7b' in model_name.lower(): from .llava import LLaVA_v0_7B_Pipeline