From 1af4023883508fdad1b2d68cbca3281ac7210ebf Mon Sep 17 00:00:00 2001 From: Andy Barry Date: Wed, 5 Apr 2023 23:29:10 -0400 Subject: [PATCH] Fix some issues. --- Dockerfile | 6 ++---- README.md | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index d795a74..d268b2d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,8 +12,7 @@ RUN pip3 install --upgrade pip requests tqdm RUN pip install torch==2.0.0 RUN pip install semantic-version==2.10.0 -RUN git clone --depth=1 --branch main https://github.com/andybarry/alpaca_lora_4bit_docker.git && cd alpaca_lora_4bit -# && git checkout 86387a0a3575c82e689a452c20b2c9a5cc94a0f3 +RUN git clone --depth=1 --branch main https://github.com/andybarry/alpaca_lora_4bit_docker.git alpaca_lora_4bit && cd alpaca_lora_4bit WORKDIR alpaca_lora_4bit @@ -26,7 +25,6 @@ COPY requirements.txt requirements.txt RUN pip install -r requirements.txt RUN git clone --depth=1 --branch main https://github.com/andybarry/text-generation-webui-4bit.git text-generation-webui-tmp && cd text-generation-webui-tmp -# && git checkout 378d21e80c3d6f11a4835e57597c69e340008e2c RUN mv -f text-generation-webui-tmp/* text-generation-webui/ @@ -43,4 +41,4 @@ RUN cd text-generation-webui && ln -s ../autograd_4bit.py ./autograd_4bit.py && # Run the server WORKDIR /alpaca_lora_4bit/text-generation-webui -CMD ["python", "server.py"] \ No newline at end of file +CMD ["python", "-u", "server.py", "--listen", "--chat"] \ No newline at end of file diff --git a/README.md b/README.md index 1ee9ee5..21a834b 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ Based on https://github.com/johnsmith0031/alpaca_lora_4bit Can run real-time LLM chat using alpaca on a 8GB NVIDIA/CUDA GPU (ie 3070 Ti mobile) ## Requirements -- linux with docker -- nvidia GPU +- Docker +- NVIDIA GPU ## Installation @@ -19,13 +19,9 @@ docker run -p 7086:7086 alpaca_lora_4bit Point your browser to http://localhost:7086 ## Results -It's fast on a 3070 Ti. - -### Discussion -The model isn't all that good, sometimes it goes crazy. But hey, "when 4-bits _you reach_ look this good you will not." - -But it is fast (on my 3070 Ti mobile at least) +It's fast on a 3070 Ti mobile. Uses 5-6 GB of GPU RAM. +The model isn't all that good, sometimes it goes crazy. But hey, as I always say, "when 4-bits _you reach_ look this good you will not." ## References