Move 7bn changes into dockerfile.

2023-04-08 01:21:17 -04:00 · 2023-04-08 01:21:17 -04:00 · 31614fc2c4
parent 2e5aaf6dd6
commit 31614fc2c4
3 changed files with 8 additions and 102 deletions
--- a/4
+++ b/4
@ -70,6 +70,10 @@ RUN mv -f text-generation-webui-tmp/* text-generation-webui/
 # Symlink for monkeypatch
 RUN cd text-generation-webui && ln -s ../autograd_4bit.py ./autograd_4bit.py && ln -s ../matmul_utils_4bit.py .
 # Swap to the 7bn parameter model
 RUN sed -i 's/llama-13b-4bit/llama-7b-4bit/g' text-generation-webui/custom_monkey_patch.py && sed -i 's/alpaca13b_lora/alpaca7b_lora/g' text-generation-webui/custom_monkey_patch.py
 # Run the server
 WORKDIR /alpaca_lora_4bit/text-generation-webui
 CMD ["python", "-u", "server.py", "--listen", "--chat"]
--- a/requirements2.txt
+++ b/requirements2.txt
@ -1,96 +0,0 @@
 accelerate==0.18.0
 aiofiles==23.1.0
 aiohttp==3.8.4
 aiosignal==1.3.1
 altair==4.2.2
 anyio==3.6.2
 async-timeout==4.0.2
 attrs==22.2.0
 bitsandbytes==0.37.2
 certifi==2022.12.7
 charset-normalizer==3.1.0
 click==8.1.3
 cmake==3.26.1
 contourpy==1.0.7
 cycler==0.11.0
 datasets==2.11.0
 dill==0.3.6
 entrypoints==0.4
 fastapi==0.95.0
 ffmpy==0.3.0
 filelock==3.10.7
 fonttools==4.39.3
 frozenlist==1.3.3
 fsspec==2023.3.0
 gradio==3.24.1
 gradio_client==0.0.7
 h11==0.14.0
 httpcore==0.16.3
 httpx==0.23.3
 huggingface-hub==0.13.3
 idna==3.4
 Jinja2==3.1.2
 jsonschema==4.17.3
 kiwisolver==1.4.4
 linkify-it-py==2.0.0
 lit==16.0.0
 Markdown==3.4.3
 markdown-it-py==2.2.0
 MarkupSafe==2.1.2
 matplotlib==3.7.1
 mdit-py-plugins==0.3.3
 mdurl==0.1.2
 mpmath==1.3.0
 multidict==6.0.4
 multiprocess==0.70.14
 networkx==3.1
 numpy==1.24.2
 nvidia-cublas-cu11==11.10.3.66
 nvidia-cuda-cupti-cu11==11.7.101
 nvidia-cuda-nvrtc-cu11==11.7.99
 nvidia-cuda-runtime-cu11==11.7.99
 nvidia-cudnn-cu11==8.5.0.96
 nvidia-cufft-cu11==10.9.0.58
 nvidia-curand-cu11==10.2.10.91
 nvidia-cusolver-cu11==11.4.0.1
 nvidia-cusparse-cu11==11.7.4.91
 nvidia-nccl-cu11==2.14.3
 nvidia-nvtx-cu11==11.7.91
 orjson==3.8.9
 packaging==23.0
 pandas==2.0.0
 Pillow==9.5.0
 psutil==5.9.4
 pyarrow==11.0.0
 pydantic==1.10.7
 pydub==0.25.1
 pyparsing==3.0.9
 pyrsistent==0.19.3
 python-dateutil==2.8.2
 python-multipart==0.0.6
 pytz==2023.3
 PyYAML==6.0
 regex==2023.3.23
 requests==2.28.2
 responses==0.18.0
 rfc3986==1.5.0
 safetensors==0.3.0
 semantic-version==2.10.0
 sentencepiece==0.1.97
 six==1.16.0
 sniffio==1.3.0
 starlette==0.26.1
 sympy==1.11.1
 tokenizers==0.13.3
 toolz==0.12.0
 torch==2.0.0
 tqdm==4.65.0
 triton==2.0.0
 typing_extensions==4.5.0
 tzdata==2023.3
 uc-micro-py==1.0.1
 urllib3==1.26.15
 uvicorn==0.21.1
 websockets==11.0
 xxhash==3.2.0
 yarl==1.8.2
--- a/text-generation-webui/custom_monkey_patch.py
+++ b/text-generation-webui/custom_monkey_patch.py
@ -9,9 +9,9 @@ patch_encode_func = False
 def load_model_llama(*args, **kwargs):
-    config_path = '../llama-7b-4bit/'
+    config_path = '../llama-13b-4bit/'
-    model_path = '../llama-7b-4bit.pt'
+    model_path = '../llama-13b-4bit.pt'
-    lora_path = '../alpaca7b_lora/'
+    lora_path = '../alpaca13b_lora/'
    print("Loading {} ...".format(model_path))
    t0 = time.time()
@ -27,8 +27,6 @@ def load_model_llama(*args, **kwargs):
            if m.groupsize == -1:
                m.zeros = m.zeros.half()
            m.scales = m.scales.half()
            # This line failed for me, commenting it out seems to work...
            m.bias = m.bias.half()
    autograd_4bit.use_new = True
    autograd_4bit.auto_switch = True
@ -39,7 +37,7 @@ def load_model_llama(*args, **kwargs):
 from modules import models
 from modules import shared
 models.load_model = load_model_llama
-shared.args.model = 'llama-7b-4bit'
+shared.args.model = 'llama-13b-4bit'
 shared.settings['name1'] = 'You'
 shared.settings['name2'] = 'Assistant'
 shared.settings['chat_prompt_size_max'] = 2048