fix bug
This commit is contained in:
parent
1abdc99675
commit
d6791790ed
|
|
@ -88,7 +88,7 @@ class ModelServer:
|
|||
print('Quantized attention applied.')
|
||||
|
||||
if self.lora_path is not None:
|
||||
inject_lora_layers(model, self.lora_path, device='cuda', torch_dtype=torch.float16)
|
||||
inject_lora_layers(model, self.lora_path, device='cuda', dtype=torch.float16)
|
||||
|
||||
self.model, self.tokenizer = model, tokenizer
|
||||
print("Loaded in {:.2f} seconds.".format(time.time() - t0))
|
||||
|
|
|
|||
Loading…
Reference in New Issue