backwards support for pre-py3.10, add datasets requirement used in train

This commit is contained in:
Wing Lian 2023-03-27 16:08:20 -04:00
parent 6c8c07e7ad
commit 62e54ac1c7
2 changed files with 11 additions and 11 deletions

View File

@ -72,15 +72,14 @@ tokenizer.pad_token_id = 0
if not ft_config.skip: if not ft_config.skip:
# Load Data # Load Data
data = None data = None
match ft_config.ds_type: if ft_config.ds_type == "txt" and not ft_config.skip:
case "txt" if not ft_config.skip: #### LLaMa
#### LLaMA data = train_data.TrainTxt(ft_config.dataset, ft_config.val_set_size, tokenizer, ft_config.cutoff_len)
data = train_data.TrainTxt(ft_config.dataset, ft_config.val_set_size, tokenizer, ft_config.cutoff_len) elif ft_config.ds_type == "alpaca" and not ft_config.skip:
case "alpaca" if not ft_config.skip: #### Stanford Alpaca-like Data
#### Stanford Alpaca-like Data data = train_data.TrainSAD(ft_config.dataset, ft_config.val_set_size, tokenizer, ft_config.cutoff_len)
data = train_data.TrainSAD(ft_config.dataset, ft_config.val_set_size, tokenizer, ft_config.cutoff_len) else:
case _: raise NotImplementedError("ERROR: Unknown dataset format")
raise NotImplementedError("ERROR: Unknown dataset format")
data.prepare_data() data.prepare_data()
#### ####

View File

@ -1,6 +1,7 @@
torch torch
accelerate accelerate
bitsandbytes bitsandbytes
datasets
git+https://github.com/huggingface/transformers.git git+https://github.com/huggingface/transformers.git
git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit
git+https://github.com/sterlind/peft.git git+https://github.com/sterlind/peft.git