Merge pull request #30 from winglian/features/python-fixes
backwards support for pre-py3.10, add datasets requirement used in train
This commit is contained in:
commit
667e43cb5b
|
|
@ -72,14 +72,13 @@ tokenizer.pad_token_id = 0
|
|||
if not ft_config.skip:
|
||||
# Load Data
|
||||
data = None
|
||||
match ft_config.ds_type:
|
||||
case "txt" if not ft_config.skip:
|
||||
#### LLaMA
|
||||
if ft_config.ds_type == "txt" and not ft_config.skip:
|
||||
#### LLaMa
|
||||
data = train_data.TrainTxt(ft_config.dataset, ft_config.val_set_size, tokenizer, ft_config.cutoff_len)
|
||||
case "alpaca" if not ft_config.skip:
|
||||
elif ft_config.ds_type == "alpaca" and not ft_config.skip:
|
||||
#### Stanford Alpaca-like Data
|
||||
data = train_data.TrainSAD(ft_config.dataset, ft_config.val_set_size, tokenizer, ft_config.cutoff_len)
|
||||
case _:
|
||||
else:
|
||||
raise NotImplementedError("ERROR: Unknown dataset format")
|
||||
data.prepare_data()
|
||||
####
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
torch
|
||||
accelerate
|
||||
bitsandbytes
|
||||
datasets
|
||||
sentencepiece
|
||||
git+https://github.com/huggingface/transformers.git
|
||||
git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit
|
||||
git+https://github.com/sterlind/peft.git
|
||||
|
|
|
|||
Loading…
Reference in New Issue