diff --git a/server/server.py b/server/server.py index 7fbe833..0544070 100644 --- a/server/server.py +++ b/server/server.py @@ -1,8 +1,7 @@ -from .. import autograd_4bit import time import torch -from ..autograd_4bit import load_llama_model_4bit_low_ram, Autograd4bitQuantLinear -from alpaca_lora_4bit.model_attn_mlp_patch import make_quant_attn, make_fused_mlp, inject_lora_layers +from autograd_4bit import load_llama_model_4bit_low_ram, Autograd4bitQuantLinear +from model_attn_mlp_patch import make_quant_attn, make_fused_mlp, inject_lora_layers import zmq from transformers import StoppingCriteria, StoppingCriteriaList from io import BytesIO diff --git a/text-generation-webui/generate_monkey_patch.py b/text-generation-webui/generate_monkey_patch.py index d3c59b1..3848840 100644 --- a/text-generation-webui/generate_monkey_patch.py +++ b/text-generation-webui/generate_monkey_patch.py @@ -1,6 +1,6 @@ import modules.text_generation from modules.text_generation import * -from alpaca_lora_4bit.server import _SentinelTokenStoppingCriteria +from server import _SentinelTokenStoppingCriteria def generate_reply_patched(question, state, eos_token=None, stopping_strings=[]): if shared.model_name == 'None' or shared.model is None: