diff --git a/autograd_4bit.py b/autograd_4bit.py index 476da12..5a47921 100644 --- a/autograd_4bit.py +++ b/autograd_4bit.py @@ -248,7 +248,7 @@ def load_llama_model_4bit_low_ram_and_offload(config_path, model_path, lora_path if lora_path is not None: from peft import PeftModel - from peft.tuners.lora import Linear4bitLt + from monkeypatch.peft_tuners_lora_monkey_patch import Linear4bitLt model = PeftModel.from_pretrained(model, lora_path, device_map={'': 'cpu'}, torch_dtype=torch.float32) print(Style.BRIGHT + Fore.GREEN + '{} Lora Applied.'.format(lora_path)) diff --git a/text-generation-webui/custom_monkey_patch.py b/text-generation-webui/custom_monkey_patch.py index c59132e..28bb53e 100644 --- a/text-generation-webui/custom_monkey_patch.py +++ b/text-generation-webui/custom_monkey_patch.py @@ -3,7 +3,8 @@ import torch import autograd_4bit from autograd_4bit import load_llama_model_4bit_low_ram, Autograd4bitQuantLinear from peft import PeftModel -from peft.tuners.lora import Linear4bitLt +from monkeypatch.peft_tuners_lora_monkey_patch import replace_peft_model_with_gptq_lora_model, Linear4bitLt +replace_peft_model_with_gptq_lora_model() patch_encode_func = False