From 31614fc2c4f239f2ed23f6c431ce53b4eb8bd03c Mon Sep 17 00:00:00 2001 From: Andy Barry Date: Sat, 8 Apr 2023 01:21:17 -0400 Subject: [PATCH] Move 7bn changes into dockerfile. --- Dockerfile | 4 + requirements2.txt | 96 -------------------- text-generation-webui/custom_monkey_patch.py | 10 +- 3 files changed, 8 insertions(+), 102 deletions(-) delete mode 100644 requirements2.txt diff --git a/Dockerfile b/Dockerfile index b9849fc..e34d510 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,6 +70,10 @@ RUN mv -f text-generation-webui-tmp/* text-generation-webui/ # Symlink for monkeypatch RUN cd text-generation-webui && ln -s ../autograd_4bit.py ./autograd_4bit.py && ln -s ../matmul_utils_4bit.py . + +# Swap to the 7bn parameter model +RUN sed -i 's/llama-13b-4bit/llama-7b-4bit/g' text-generation-webui/custom_monkey_patch.py && sed -i 's/alpaca13b_lora/alpaca7b_lora/g' text-generation-webui/custom_monkey_patch.py + # Run the server WORKDIR /alpaca_lora_4bit/text-generation-webui CMD ["python", "-u", "server.py", "--listen", "--chat"] \ No newline at end of file diff --git a/requirements2.txt b/requirements2.txt deleted file mode 100644 index 439e9fa..0000000 --- a/requirements2.txt +++ /dev/null @@ -1,96 +0,0 @@ -accelerate==0.18.0 -aiofiles==23.1.0 -aiohttp==3.8.4 -aiosignal==1.3.1 -altair==4.2.2 -anyio==3.6.2 -async-timeout==4.0.2 -attrs==22.2.0 -bitsandbytes==0.37.2 -certifi==2022.12.7 -charset-normalizer==3.1.0 -click==8.1.3 -cmake==3.26.1 -contourpy==1.0.7 -cycler==0.11.0 -datasets==2.11.0 -dill==0.3.6 -entrypoints==0.4 -fastapi==0.95.0 -ffmpy==0.3.0 -filelock==3.10.7 -fonttools==4.39.3 -frozenlist==1.3.3 -fsspec==2023.3.0 -gradio==3.24.1 -gradio_client==0.0.7 -h11==0.14.0 -httpcore==0.16.3 -httpx==0.23.3 -huggingface-hub==0.13.3 -idna==3.4 -Jinja2==3.1.2 -jsonschema==4.17.3 -kiwisolver==1.4.4 -linkify-it-py==2.0.0 -lit==16.0.0 -Markdown==3.4.3 -markdown-it-py==2.2.0 -MarkupSafe==2.1.2 -matplotlib==3.7.1 -mdit-py-plugins==0.3.3 -mdurl==0.1.2 -mpmath==1.3.0 -multidict==6.0.4 -multiprocess==0.70.14 -networkx==3.1 -numpy==1.24.2 -nvidia-cublas-cu11==11.10.3.66 -nvidia-cuda-cupti-cu11==11.7.101 -nvidia-cuda-nvrtc-cu11==11.7.99 -nvidia-cuda-runtime-cu11==11.7.99 -nvidia-cudnn-cu11==8.5.0.96 -nvidia-cufft-cu11==10.9.0.58 -nvidia-curand-cu11==10.2.10.91 -nvidia-cusolver-cu11==11.4.0.1 -nvidia-cusparse-cu11==11.7.4.91 -nvidia-nccl-cu11==2.14.3 -nvidia-nvtx-cu11==11.7.91 -orjson==3.8.9 -packaging==23.0 -pandas==2.0.0 -Pillow==9.5.0 -psutil==5.9.4 -pyarrow==11.0.0 -pydantic==1.10.7 -pydub==0.25.1 -pyparsing==3.0.9 -pyrsistent==0.19.3 -python-dateutil==2.8.2 -python-multipart==0.0.6 -pytz==2023.3 -PyYAML==6.0 -regex==2023.3.23 -requests==2.28.2 -responses==0.18.0 -rfc3986==1.5.0 -safetensors==0.3.0 -semantic-version==2.10.0 -sentencepiece==0.1.97 -six==1.16.0 -sniffio==1.3.0 -starlette==0.26.1 -sympy==1.11.1 -tokenizers==0.13.3 -toolz==0.12.0 -torch==2.0.0 -tqdm==4.65.0 -triton==2.0.0 -typing_extensions==4.5.0 -tzdata==2023.3 -uc-micro-py==1.0.1 -urllib3==1.26.15 -uvicorn==0.21.1 -websockets==11.0 -xxhash==3.2.0 -yarl==1.8.2 diff --git a/text-generation-webui/custom_monkey_patch.py b/text-generation-webui/custom_monkey_patch.py index 85d2179..0f4d370 100644 --- a/text-generation-webui/custom_monkey_patch.py +++ b/text-generation-webui/custom_monkey_patch.py @@ -9,9 +9,9 @@ patch_encode_func = False def load_model_llama(*args, **kwargs): - config_path = '../llama-7b-4bit/' - model_path = '../llama-7b-4bit.pt' - lora_path = '../alpaca7b_lora/' + config_path = '../llama-13b-4bit/' + model_path = '../llama-13b-4bit.pt' + lora_path = '../alpaca13b_lora/' print("Loading {} ...".format(model_path)) t0 = time.time() @@ -27,8 +27,6 @@ def load_model_llama(*args, **kwargs): if m.groupsize == -1: m.zeros = m.zeros.half() m.scales = m.scales.half() - - # This line failed for me, commenting it out seems to work... m.bias = m.bias.half() autograd_4bit.use_new = True autograd_4bit.auto_switch = True @@ -39,7 +37,7 @@ def load_model_llama(*args, **kwargs): from modules import models from modules import shared models.load_model = load_model_llama -shared.args.model = 'llama-7b-4bit' +shared.args.model = 'llama-13b-4bit' shared.settings['name1'] = 'You' shared.settings['name2'] = 'Assistant' shared.settings['chat_prompt_size_max'] = 2048