diff --git a/Dockerfile b/Dockerfile index d268b2d..b9849fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,44 +1,75 @@ -#FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel -FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 +# syntax = docker/dockerfile:experimental -# Get +# Dockerfile is split into parts because we want to cache building the requirements and downloading the model, both of which can take a long time. -RUN apt-get update && apt-get install -y git wget python3 python3-pip -RUN ln -s `which python3` /usr/bin/python +FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 AS builder -RUN pip3 install --upgrade pip requests tqdm +RUN apt-get update && apt-get install -y python3 python3-pip git + +RUN pip3 install --upgrade pip # Some of the requirements expect some python packages in their setup.py, just install them first. -RUN pip install torch==2.0.0 -RUN pip install semantic-version==2.10.0 - -RUN git clone --depth=1 --branch main https://github.com/andybarry/alpaca_lora_4bit_docker.git alpaca_lora_4bit && cd alpaca_lora_4bit - -WORKDIR alpaca_lora_4bit - -COPY requirements2.txt requirements2.txt -RUN pip install -r requirements2.txt +RUN --mount=type=cache,target=/root/.cache/pip pip install --user torch==2.0.0 +RUN --mount=type=cache,target=/root/.cache/pip pip install --user semantic-version==2.10.0 requests tqdm # The docker build environment has trouble detecting CUDA version, build for all reasonable archs ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" COPY requirements.txt requirements.txt -RUN pip install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache pip install --user -r requirements.txt -RUN git clone --depth=1 --branch main https://github.com/andybarry/text-generation-webui-4bit.git text-generation-webui-tmp && cd text-generation-webui-tmp +# ------------------------------- + +# Download the model +FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 AS downloader +RUN apt-get update && apt-get install -y wget + +RUN wget --progress=bar:force:noscroll https://huggingface.co/decapoda-research/llama-7b-hf-int4/resolve/main/llama-7b-4bit.pt + + + +# ------------------------------- + +#FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel +FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 + +RUN --mount=type=cache,target=/var/cache/apt apt-get update && apt-get install -y git python3 python3-pip + +RUN ln -s `which python3` /usr/bin/python + + +# Copy the installed packages from the first stage +COPY --from=builder /root/.local /root/.local + +RUN mkdir alpaca_lora_4bit +WORKDIR alpaca_lora_4bit + +COPY --from=downloader llama-7b-4bit.pt llama-7b-4bit.pt + +#RUN git clone --depth=1 --branch main https://github.com/andybarry/text-generation-webui-4bit.git text-generation-webui-tmp + +RUN git clone --depth=1 --branch main https://github.com/oobabooga/text-generation-webui.git text-generation-webui-tmp + +RUN --mount=type=cache,target=/root/.cache pip install --user markdown + +# Apply monkey patch +RUN cd text-generation-webui-tmp && printf '%s'"import custom_monkey_patch # apply monkey patch\nimport gc\n\n" | cat - server.py > tmpfile && mv tmpfile server.py + +# Get the model config +RUN cd text-generation-webui-tmp && python download-model.py --text-only decapoda-research/llama-7b-hf && mv models/decapoda-research_llama-7b-hf ../llama-7b-4bit + + +# Get LoRA +RUN cd text-generation-webui-tmp && python download-model.py samwit/alpaca7b-lora && mv loras/samwit_alpaca7b-lora ../alpaca7b_lora + +COPY *.py . +COPY text-generation-webui text-generation-webui +RUN ls -l +COPY monkeypatch . RUN mv -f text-generation-webui-tmp/* text-generation-webui/ -# Get the model -RUN cd text-generation-webui && python download-model.py --text-only decapoda-research/llama-7b-hf && mv models/decapoda-research_llama-7b-hf ../llama-7b-4bit - -RUN wget https://huggingface.co/decapoda-research/llama-7b-hf-int4/resolve/main/llama-7b-4bit.pt -O llama-7b-4bit.pt - -# Get LoRA -RUN cd text-generation-webui && python download-model.py samwit/alpaca7B-lora && mv loras/samwit_alpaca7B-lora ../alpaca7b_lora - # Symlink for monkeypatch RUN cd text-generation-webui && ln -s ../autograd_4bit.py ./autograd_4bit.py && ln -s ../matmul_utils_4bit.py . - # Run the server WORKDIR /alpaca_lora_4bit/text-generation-webui CMD ["python", "-u", "server.py", "--listen", "--chat"] \ No newline at end of file diff --git a/autograd_4bit.py b/autograd_4bit.py index 5d19908..be7d73f 100644 --- a/autograd_4bit.py +++ b/autograd_4bit.py @@ -107,6 +107,7 @@ class Autograd4bitQuantLinear(nn.Module): self.bits = bits self.maxq = 2 ** self.bits - 1 self.groupsize = groupsize + self.g_idx = 0 if groupsize == -1: self.register_buffer('zeros', torch.empty((out_features, 1))) self.register_buffer('scales', torch.empty((out_features, 1))) diff --git a/text-generation-webui/custom_monkey_patch.py b/text-generation-webui/custom_monkey_patch.py index e67f11c..85d2179 100644 --- a/text-generation-webui/custom_monkey_patch.py +++ b/text-generation-webui/custom_monkey_patch.py @@ -29,7 +29,7 @@ def load_model_llama(*args, **kwargs): m.scales = m.scales.half() # This line failed for me, commenting it out seems to work... - #m.bias = m.bias.half() + m.bias = m.bias.half() autograd_4bit.use_new = True autograd_4bit.auto_switch = True