Fix after merge.

2023-04-08 00:53:28 -04:00 · 2023-04-08 00:53:28 -04:00 · e854f5d111
parent 8435b2c7f2
commit e854f5d111
3 changed files with 59 additions and 27 deletions
--- a/83
+++ b/83
@ -1,44 +1,75 @@
-#FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel
+# syntax = docker/dockerfile:experimental
 FROM nvidia/cuda:11.7.0-devel-ubuntu22.04
-# Get 
+# Dockerfile is split into parts because we want to cache building the requirements and downloading the model, both of which can take a long time.
-RUN apt-get update && apt-get install -y git wget python3 python3-pip
+FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 AS builder
 RUN ln -s `which python3` /usr/bin/python
-RUN pip3 install --upgrade pip requests tqdm
+RUN apt-get update && apt-get install -y python3 python3-pip git
 RUN pip3 install --upgrade pip 
 # Some of the requirements expect some python packages in their setup.py, just install them first.
-RUN pip install torch==2.0.0
+RUN --mount=type=cache,target=/root/.cache/pip pip install --user torch==2.0.0
-RUN pip install semantic-version==2.10.0
+RUN --mount=type=cache,target=/root/.cache/pip pip install --user semantic-version==2.10.0 requests tqdm
 RUN git clone --depth=1 --branch main https://github.com/andybarry/alpaca_lora_4bit_docker.git alpaca_lora_4bit && cd alpaca_lora_4bit
 WORKDIR alpaca_lora_4bit
 COPY requirements2.txt requirements2.txt
 RUN pip install -r requirements2.txt
 # The docker build environment has trouble detecting CUDA version, build for all reasonable archs
 ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6"
 COPY requirements.txt requirements.txt
-RUN pip install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache pip install --user -r requirements.txt
-RUN git clone --depth=1 --branch main https://github.com/andybarry/text-generation-webui-4bit.git text-generation-webui-tmp && cd text-generation-webui-tmp 
+# -------------------------------
 # Download the model
 FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 AS downloader
 RUN apt-get update && apt-get install -y wget
 RUN wget --progress=bar:force:noscroll https://huggingface.co/decapoda-research/llama-7b-hf-int4/resolve/main/llama-7b-4bit.pt
 # -------------------------------
 #FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel
 FROM nvidia/cuda:11.7.0-devel-ubuntu22.04
 RUN --mount=type=cache,target=/var/cache/apt apt-get update && apt-get install -y git python3 python3-pip
 RUN ln -s `which python3` /usr/bin/python
 # Copy the installed packages from the first stage
 COPY --from=builder /root/.local /root/.local
 RUN mkdir alpaca_lora_4bit
 WORKDIR alpaca_lora_4bit
 COPY --from=downloader llama-7b-4bit.pt llama-7b-4bit.pt
 #RUN git clone --depth=1 --branch main https://github.com/andybarry/text-generation-webui-4bit.git text-generation-webui-tmp
 RUN git clone --depth=1 --branch main https://github.com/oobabooga/text-generation-webui.git text-generation-webui-tmp
 RUN --mount=type=cache,target=/root/.cache pip install --user markdown
 # Apply monkey patch
 RUN cd text-generation-webui-tmp && printf '%s'"import custom_monkey_patch # apply monkey patch\nimport gc\n\n" | cat - server.py > tmpfile && mv tmpfile server.py
 # Get the model config
 RUN cd text-generation-webui-tmp && python download-model.py --text-only decapoda-research/llama-7b-hf && mv models/decapoda-research_llama-7b-hf ../llama-7b-4bit
 # Get LoRA
 RUN cd text-generation-webui-tmp && python download-model.py samwit/alpaca7b-lora && mv loras/samwit_alpaca7b-lora ../alpaca7b_lora
 COPY *.py .
 COPY text-generation-webui text-generation-webui
 RUN ls -l
 COPY monkeypatch .
 RUN mv -f text-generation-webui-tmp/* text-generation-webui/
 # Get the model
 RUN cd text-generation-webui && python download-model.py --text-only decapoda-research/llama-7b-hf && mv models/decapoda-research_llama-7b-hf ../llama-7b-4bit
 RUN wget https://huggingface.co/decapoda-research/llama-7b-hf-int4/resolve/main/llama-7b-4bit.pt -O llama-7b-4bit.pt
 # Get LoRA
 RUN cd text-generation-webui && python download-model.py samwit/alpaca7B-lora && mv loras/samwit_alpaca7B-lora ../alpaca7b_lora
 # Symlink for monkeypatch
 RUN cd text-generation-webui && ln -s ../autograd_4bit.py ./autograd_4bit.py && ln -s ../matmul_utils_4bit.py .
 # Run the server
 WORKDIR /alpaca_lora_4bit/text-generation-webui
 CMD ["python", "-u", "server.py", "--listen", "--chat"]
--- a/autograd_4bit.py
+++ b/autograd_4bit.py
@ -107,6 +107,7 @@ class Autograd4bitQuantLinear(nn.Module):
        self.bits = bits
        self.maxq = 2 ** self.bits - 1
        self.groupsize = groupsize
        self.g_idx = 0
        if groupsize == -1:
            self.register_buffer('zeros', torch.empty((out_features, 1)))
            self.register_buffer('scales', torch.empty((out_features, 1)))
--- a/text-generation-webui/custom_monkey_patch.py
+++ b/text-generation-webui/custom_monkey_patch.py
@ -29,7 +29,7 @@ def load_model_llama(*args, **kwargs):
            m.scales = m.scales.half()
            # This line failed for me, commenting it out seems to work...
-            #m.bias = m.bias.half()
+            m.bias = m.bias.half()
    autograd_4bit.use_new = True
    autograd_4bit.auto_switch = True