diff --git a/docs/04 - Model Tab.md b/docs/04 - Model Tab.md
index 2585f544..762f85e8 100644
--- a/docs/04 - Model Tab.md	
+++ b/docs/04 - Model Tab.md	
@@ -47,6 +47,10 @@ Examples:
 * **no_flash_attn**: Disables flash attention. Otherwise, it is automatically used as long as the library is installed.
 * **cache_8bit**: Create a 8-bit precision cache instead of a 16-bit one. This saves VRAM but increases perplexity (I don't know by how much).
 
+### ExLlamav2
+
+The same as ExLlamav2_HF but using the internal samplers of ExLlamav2 instead of the ones in the Transformers library.
+
 ### AutoGPTQ
 
 Loads: GPTQ models.
diff --git a/docs/What Works.md b/docs/What Works.md
index 343343a1..354da1dd 100644
--- a/docs/What Works.md	
+++ b/docs/What Works.md	
@@ -6,6 +6,7 @@
 | llama.cpp      |       ❌       |           ❌            |       ❌       |          ❌          |    use llamacpp_HF    |
 | llamacpp_HF    |       ❌       |           ❌            |       ❌       |          ❌          |           ✅          |
 | ExLlamav2_HF   |       ✅       |           ✅            |       ❌       |          ❌          |           ✅          |
+| ExLlamav2      |       ✅       |           ✅            |       ❌       |          ❌          |   use ExLlamav2_HF    |
 | AutoGPTQ       |       ✅       |           ❌            |       ❌       |          ✅          |           ✅          |
 | AutoAWQ        |       ?        |           ❌            |       ?        |          ?           |           ✅          |
 | GPTQ-for-LLaMa |       ✅\*\*   |           ✅\*\*\*      |       ✅       |          ✅          |           ✅          |
diff --git a/modules/shared.py b/modules/shared.py
index 78966617..5c81c1c7 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -50,7 +50,6 @@ settings = {
     'prompt_lookup_num_tokens': 0,
     'custom_stopping_strings': '',
     'custom_token_bans': '',
-    'sampler_priority': 'temperature,top_k,top_p,typical_p,epsilon_cutoff,eta_cutoff,tfs,top_a,min_p,dynamic_temperature,quadratic_sampling,mirostat',
     'auto_max_new_tokens': False,
     'ban_eos_token': False,
     'add_bos_token': True,
@@ -130,7 +129,7 @@ group.add_argument('--numa', action='store_true', help='Activate NUMA task alloc
 group.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
 group.add_argument('--no_offload_kqv', action='store_true', help='Do not offload the  K, Q, V to the GPU. This saves VRAM but reduces the performance.')
 group.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
-group.add_argument('--row_split', action='store_true', help='Split multi-gpu by row instead of layer. Faster on some cards.')
+group.add_argument('--row_split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')
 
 # ExLlama
 group = parser.add_argument_group('ExLlama')