fix bug
This commit is contained in:
parent
1abdc99675
commit
d6791790ed
|
|
@ -88,7 +88,7 @@ class ModelServer:
|
||||||
print('Quantized attention applied.')
|
print('Quantized attention applied.')
|
||||||
|
|
||||||
if self.lora_path is not None:
|
if self.lora_path is not None:
|
||||||
inject_lora_layers(model, self.lora_path, device='cuda', torch_dtype=torch.float16)
|
inject_lora_layers(model, self.lora_path, device='cuda', dtype=torch.float16)
|
||||||
|
|
||||||
self.model, self.tokenizer = model, tokenizer
|
self.model, self.tokenizer = model, tokenizer
|
||||||
print("Loaded in {:.2f} seconds.".format(time.time() - t0))
|
print("Loaded in {:.2f} seconds.".format(time.time() - t0))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue