diff --git a/text-generation-webui/custom_monkey_patch.py b/text-generation-webui/custom_monkey_patch.py index 0f4d370..2e11db6 100644 --- a/text-generation-webui/custom_monkey_patch.py +++ b/text-generation-webui/custom_monkey_patch.py @@ -16,7 +16,7 @@ def load_model_llama(*args, **kwargs): print("Loading {} ...".format(model_path)) t0 = time.time() - model, tokenizer = load_llama_model_4bit_low_ram(config_path, model_path, groupsize=-1) + model, tokenizer = load_llama_model_4bit_low_ram(config_path, model_path, groupsize=-1, is_v1_model=True) model = PeftModel.from_pretrained(model, lora_path, device_map={'': 0}, torch_dtype=torch.float32) print('{} Lora Applied.'.format(lora_path))