fix bug

2023-04-26 13:13:54 +08:00 · 2023-04-26 13:13:54 +08:00 · d6791790ed
parent 1abdc99675
commit d6791790ed
1 changed files with 1 additions and 1 deletions
--- a/server/server.py
+++ b/server/server.py
@ -88,7 +88,7 @@ class ModelServer:
            print('Quantized attention applied.')

            if self.lora_path is not None:
-                inject_lora_layers(model, self.lora_path, device='cuda', torch_dtype=torch.float16)
+                inject_lora_layers(model, self.lora_path, device='cuda', dtype=torch.float16)
        
        self.model, self.tokenizer = model, tokenizer
        print("Loaded in {:.2f} seconds.".format(time.time() - t0))