From d6791790ed2e967f968b528e3d7660acc53fa940 Mon Sep 17 00:00:00 2001 From: John Smith Date: Wed, 26 Apr 2023 13:13:54 +0800 Subject: [PATCH] fix bug --- server/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server.py b/server/server.py index ef3dae8..de40fc5 100644 --- a/server/server.py +++ b/server/server.py @@ -88,7 +88,7 @@ class ModelServer: print('Quantized attention applied.') if self.lora_path is not None: - inject_lora_layers(model, self.lora_path, device='cuda', torch_dtype=torch.float16) + inject_lora_layers(model, self.lora_path, device='cuda', dtype=torch.float16) self.model, self.tokenizer = model, tokenizer print("Loaded in {:.2f} seconds.".format(time.time() - t0))