From 31614fc2c4f239f2ed23f6c431ce53b4eb8bd03c Mon Sep 17 00:00:00 2001
From: Andy Barry <abarry@gmail.com>
Date: Sat, 8 Apr 2023 01:21:17 -0400
Subject: [PATCH] Move 7bn changes into dockerfile.

---
 Dockerfile                                   |  4 +
 requirements2.txt                            | 96 --------------------
 text-generation-webui/custom_monkey_patch.py | 10 +-
 3 files changed, 8 insertions(+), 102 deletions(-)
 delete mode 100644 requirements2.txt

diff --git a/Dockerfile b/Dockerfile
index b9849fc..e34d510 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -70,6 +70,10 @@ RUN mv -f text-generation-webui-tmp/* text-generation-webui/
 
 # Symlink for monkeypatch
 RUN cd text-generation-webui && ln -s ../autograd_4bit.py ./autograd_4bit.py && ln -s ../matmul_utils_4bit.py .
+
+# Swap to the 7bn parameter model
+RUN sed -i 's/llama-13b-4bit/llama-7b-4bit/g' text-generation-webui/custom_monkey_patch.py && sed -i 's/alpaca13b_lora/alpaca7b_lora/g' text-generation-webui/custom_monkey_patch.py
+
 # Run the server
 WORKDIR /alpaca_lora_4bit/text-generation-webui
 CMD ["python", "-u", "server.py", "--listen", "--chat"]
\ No newline at end of file
diff --git a/requirements2.txt b/requirements2.txt
deleted file mode 100644
index 439e9fa..0000000
--- a/requirements2.txt
+++ /dev/null
@@ -1,96 +0,0 @@
-accelerate==0.18.0
-aiofiles==23.1.0
-aiohttp==3.8.4
-aiosignal==1.3.1
-altair==4.2.2
-anyio==3.6.2
-async-timeout==4.0.2
-attrs==22.2.0
-bitsandbytes==0.37.2
-certifi==2022.12.7
-charset-normalizer==3.1.0
-click==8.1.3
-cmake==3.26.1
-contourpy==1.0.7
-cycler==0.11.0
-datasets==2.11.0
-dill==0.3.6
-entrypoints==0.4
-fastapi==0.95.0
-ffmpy==0.3.0
-filelock==3.10.7
-fonttools==4.39.3
-frozenlist==1.3.3
-fsspec==2023.3.0
-gradio==3.24.1
-gradio_client==0.0.7
-h11==0.14.0
-httpcore==0.16.3
-httpx==0.23.3
-huggingface-hub==0.13.3
-idna==3.4
-Jinja2==3.1.2
-jsonschema==4.17.3
-kiwisolver==1.4.4
-linkify-it-py==2.0.0
-lit==16.0.0
-Markdown==3.4.3
-markdown-it-py==2.2.0
-MarkupSafe==2.1.2
-matplotlib==3.7.1
-mdit-py-plugins==0.3.3
-mdurl==0.1.2
-mpmath==1.3.0
-multidict==6.0.4
-multiprocess==0.70.14
-networkx==3.1
-numpy==1.24.2
-nvidia-cublas-cu11==11.10.3.66
-nvidia-cuda-cupti-cu11==11.7.101
-nvidia-cuda-nvrtc-cu11==11.7.99
-nvidia-cuda-runtime-cu11==11.7.99
-nvidia-cudnn-cu11==8.5.0.96
-nvidia-cufft-cu11==10.9.0.58
-nvidia-curand-cu11==10.2.10.91
-nvidia-cusolver-cu11==11.4.0.1
-nvidia-cusparse-cu11==11.7.4.91
-nvidia-nccl-cu11==2.14.3
-nvidia-nvtx-cu11==11.7.91
-orjson==3.8.9
-packaging==23.0
-pandas==2.0.0
-Pillow==9.5.0
-psutil==5.9.4
-pyarrow==11.0.0
-pydantic==1.10.7
-pydub==0.25.1
-pyparsing==3.0.9
-pyrsistent==0.19.3
-python-dateutil==2.8.2
-python-multipart==0.0.6
-pytz==2023.3
-PyYAML==6.0
-regex==2023.3.23
-requests==2.28.2
-responses==0.18.0
-rfc3986==1.5.0
-safetensors==0.3.0
-semantic-version==2.10.0
-sentencepiece==0.1.97
-six==1.16.0
-sniffio==1.3.0
-starlette==0.26.1
-sympy==1.11.1
-tokenizers==0.13.3
-toolz==0.12.0
-torch==2.0.0
-tqdm==4.65.0
-triton==2.0.0
-typing_extensions==4.5.0
-tzdata==2023.3
-uc-micro-py==1.0.1
-urllib3==1.26.15
-uvicorn==0.21.1
-websockets==11.0
-xxhash==3.2.0
-yarl==1.8.2
diff --git a/text-generation-webui/custom_monkey_patch.py b/text-generation-webui/custom_monkey_patch.py
index 85d2179..0f4d370 100644
--- a/text-generation-webui/custom_monkey_patch.py
+++ b/text-generation-webui/custom_monkey_patch.py
@@ -9,9 +9,9 @@ patch_encode_func = False
 
 def load_model_llama(*args, **kwargs):
 
-    config_path = '../llama-7b-4bit/'
-    model_path = '../llama-7b-4bit.pt'
-    lora_path = '../alpaca7b_lora/'
+    config_path = '../llama-13b-4bit/'
+    model_path = '../llama-13b-4bit.pt'
+    lora_path = '../alpaca13b_lora/'
 
     print("Loading {} ...".format(model_path))
     t0 = time.time()
@@ -27,8 +27,6 @@ def load_model_llama(*args, **kwargs):
             if m.groupsize == -1:
                 m.zeros = m.zeros.half()
             m.scales = m.scales.half()
-
-            # This line failed for me, commenting it out seems to work...
             m.bias = m.bias.half()
     autograd_4bit.use_new = True
     autograd_4bit.auto_switch = True
@@ -39,7 +37,7 @@ def load_model_llama(*args, **kwargs):
 from modules import models
 from modules import shared
 models.load_model = load_model_llama
-shared.args.model = 'llama-7b-4bit'
+shared.args.model = 'llama-13b-4bit'
 shared.settings['name1'] = 'You'
 shared.settings['name2'] = 'Assistant'
 shared.settings['chat_prompt_size_max'] = 2048