diff --git a/matmul_utils_4bit.py b/matmul_utils_4bit.py index b476812..90c4f8e 100644 --- a/matmul_utils_4bit.py +++ b/matmul_utils_4bit.py @@ -1,6 +1,6 @@ import torch import numpy as np -import quant_cuda +from gptq_llama import quant_cuda # Global Buffer diff --git a/requirements.txt b/requirements.txt index c752c4b..605c0d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ accelerate bitsandbytes datasets sentencepiece +safetensors git+https://github.com/huggingface/transformers.git git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit git+https://github.com/sterlind/peft.git