add docker support (#75)

* 添加docker支持 * 添加支持openai api的版本 * 添加量化模型 * 更新openai_api.py版本，支持function_call及top_p、temperature * 改为在线clone模型及在线获取字体
QwenLM · Sep 14, 2023 · a0f9057 · a0f9057
1 parent 9525f70
commit a0f9057
Show file tree

Hide file tree

Showing 7 changed files with 714 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,6 @@ build
 .DS_Store
 
 /private/
+Qwen-VL-Chat/
+Qwen-VL-Chat-Int4/
+SimSun.ttf
diff --git a/BUILD.md b/BUILD.md
@@ -0,0 +1,41 @@
+## qwen web demo
+
+### build
+
+```
+docker build -t qwen-vl-chat:webdemo --platform linux/amd64 -f Dockerfile.qwendemo . 
+```
+
+### run
+
+```
+docker run -it --gpus device=0 -d --restart always -v /var/run/docker.sock:/var/run/docker.sock --name qwen-vl-chat -p 8000:8000 --user=20001:20001 --platform linux/amd64 qwen-vl-chat:webdemo
+```
+
+## qwen openai api
+
+### build
+
+```
+docker build -t qwen-vl-chat:openai --platform linux/amd64 -f Dockerfile.qwenopenai . 
+```
+
+### run
+
+```
+docker run -it --gpus device=0 -d --restart always -v /var/run/docker.sock:/var/run/docker.sock --name qwen-vl-chat -p 8080:8080 --user=20001:20001 --platform linux/amd64 qwen-vl-chat:openai
+```
+
+## qwen-int4 openai api
+
+### build
+
+```
+docker build -t qwen-vl-chat:int4-openai --platform linux/amd64 -f Dockerfile.qwenint4openai . 
+```
+
+### run
+
+```
+docker run -it --gpus device=0 -d --restart always -v /var/run/docker.sock:/var/run/docker.sock --name qwen-vl-chat-int4 -p 8080:8080 --user=20001:20001 --platform linux/amd64 qwen-vl-chat:int4-openai
+```
diff --git a/Dockerfile.qwendemo b/Dockerfile.qwendemo
@@ -0,0 +1,48 @@
+# python 3.8 and above
+# pytorch 1.12 and above, 2.0 and above are recommended
+# CUDA 11.4 and above are recommended (this is for GPU users, flash-attention users, etc.)
+
+# based on modelscope docker image
+# registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+# registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+FROM registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+
+ARG workdir=/var/app
+RUN mkdir -p ${workdir}
+
+RUN git lfs install
+
+WORKDIR ${workdir}
+COPY requirements.txt requirements_web_demo.txt ./
+
+# Install Qwen dependencies
+RUN pip install -r requirements.txt
+
+# Install webUI dependencies
+WORKDIR ${workdir}
+RUN pip install -r requirements_web_demo.txt
+
+# Offline mode, check https://huggingface.co/docs/transformers/v4.15.0/installation#offline-mode
+ENV HF_DATASETS_OFFLINE=1
+ENV TRANSFORMERS_OFFLINE=1
+
+# set TZ, make logs dir, and expose port 8080
+ENV TZ=Asia/Shanghai
+RUN mkdir -p ${workdir}/logs && chmod 777 ${workdir}/logs
+VOLUME /var/app/logs
+
+# create user 20001
+RUN useradd -r -m appuser -u 20001 -g 0
+
+WORKDIR ${workdir}
+# copy model
+RUN git clone https://huggingface.co/Qwen/Qwen-VL-Chat
+# COPY --chown=20001:20001 Qwen-VL-Chat ./Qwen-VL-Chat
+# copy fonts
+ADD --chown=20001:20001 https://github.com/StellarCN/scp_zh/raw/master/fonts/SimSun.ttf ./
+# COPY --chown=20001:20001 SimSun.ttf ./
+# copy main app
+COPY --chown=20001:20001 web_demo_mm.py ./
+
+EXPOSE 8000
+CMD ["python3", "web_demo_mm.py", "-c", "./Qwen-VL-Chat", "--server-name", "0.0.0.0", "--server-port", "8000"]
diff --git a/Dockerfile.qwenint4openai b/Dockerfile.qwenint4openai
@@ -0,0 +1,61 @@
+# python 3.8 and above
+# pytorch 1.12 and above, 2.0 and above are recommended
+# CUDA 11.4 and above are recommended (this is for GPU users, flash-attention users, etc.)
+
+# based on modelscope docker image
+# registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+# registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+FROM registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+
+ARG workdir=/var/app
+RUN mkdir -p ${workdir}
+
+RUN git lfs install
+
+WORKDIR ${workdir}
+COPY requirements.txt requirements_web_demo.txt ./
+
+# Install Qwen dependencies
+RUN pip install -r requirements.txt
+
+# Install webUI dependencies
+WORKDIR ${workdir}
+RUN pip install -r requirements_web_demo.txt
+
+# Offline mode, check https://huggingface.co/docs/transformers/v4.15.0/installation#offline-mode
+ENV HF_DATASETS_OFFLINE=1
+ENV TRANSFORMERS_OFFLINE=1
+
+# set TZ, make logs dir, and expose port 8080
+ENV TZ=Asia/Shanghai
+RUN mkdir -p ${workdir}/logs && chmod 777 ${workdir}/logs
+VOLUME /var/app/logs
+
+# create user 20001
+RUN useradd -r -m appuser -u 20001 -g 0
+
+WORKDIR ${workdir}
+# copy model
+RUN git clone https://huggingface.co/Qwen/Qwen-VL-Chat-Int4
+# COPY --chown=20001:20001 Qwen-VL-Chat-Int4 ./Qwen-VL-Chat-Int4
+
+# Install AutoGPTQ
+RUN pip install optimum
+# RUN git clone https://github.com/JustinLin610/AutoGPTQ.git && \
+#     cd AutoGPTQ && \
+#     pip install -v .
+RUN pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/
+
+# Install OpenAI API dependencies
+WORKDIR ${workdir}
+COPY requirements_openai_api.txt ./
+RUN pip install -r requirements_openai_api.txt
+# copy fonts
+ADD --chown=20001:20001 https://github.com/StellarCN/scp_zh/raw/master/fonts/SimSun.ttf ./
+# COPY --chown=20001:20001 SimSun.ttf ./
+# copy main app
+COPY --chown=20001:20001 openai_api.py ./
+
+EXPOSE 8080
+# CMD ["python3", "openai_api.py", "-c", "./Qwen-VL-Chat", "--server-name", "0.0.0.0", "--server-port", "8080"]
+CMD ["python3", "openai_api.py", "-c", "./Qwen-VL-Chat-Int4", "--server-name", "0.0.0.0", "--server-port", "8080"]
diff --git a/Dockerfile.qwenopenai b/Dockerfile.qwenopenai
@@ -0,0 +1,53 @@
+# python 3.8 and above
+# pytorch 1.12 and above, 2.0 and above are recommended
+# CUDA 11.4 and above are recommended (this is for GPU users, flash-attention users, etc.)
+
+# based on modelscope docker image
+# registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+# registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+FROM registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
+
+ARG workdir=/var/app
+RUN mkdir -p ${workdir}
+
+RUN git lfs install
+
+WORKDIR ${workdir}
+COPY requirements.txt requirements_web_demo.txt ./
+
+# Install Qwen dependencies
+RUN pip install -r requirements.txt
+
+# Install webUI dependencies
+WORKDIR ${workdir}
+RUN pip install -r requirements_web_demo.txt
+
+# Offline mode, check https://huggingface.co/docs/transformers/v4.15.0/installation#offline-mode
+ENV HF_DATASETS_OFFLINE=1
+ENV TRANSFORMERS_OFFLINE=1
+
+# set TZ, make logs dir, and expose port 8080
+ENV TZ=Asia/Shanghai
+RUN mkdir -p ${workdir}/logs && chmod 777 ${workdir}/logs
+VOLUME /var/app/logs
+
+# create user 20001
+RUN useradd -r -m appuser -u 20001 -g 0
+
+WORKDIR ${workdir}
+# copy model
+RUN git clone https://huggingface.co/Qwen/Qwen-VL-Chat
+# COPY --chown=20001:20001 Qwen-VL-Chat ./Qwen-VL-Chat
+
+# Install OpenAI API dependencies
+WORKDIR ${workdir}
+COPY requirements_openai_api.txt ./
+RUN pip install -r requirements_openai_api.txt
+# copy fonts
+ADD --chown=20001:20001 https://github.com/StellarCN/scp_zh/raw/master/fonts/SimSun.ttf ./
+# COPY --chown=20001:20001 SimSun.ttf ./
+# copy main app
+COPY --chown=20001:20001 openai_api.py ./
+
+EXPOSE 8080
+CMD ["python3", "openai_api.py", "-c", "./Qwen-VL-Chat", "--server-name", "0.0.0.0", "--server-port", "8080"]