Skip to content

Commit

Permalink
add docker support (#75)
Browse files Browse the repository at this point in the history
* 添加docker支持

* 添加支持openai api的版本

* 添加量化模型

* 更新openai_api.py版本,支持function_call及top_p、temperature

* 改为在线clone模型及在线获取字体
  • Loading branch information
ichaobuster authored Sep 14, 2023
1 parent 9525f70 commit a0f9057
Show file tree
Hide file tree
Showing 7 changed files with 714 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ build
.DS_Store

/private/
Qwen-VL-Chat/
Qwen-VL-Chat-Int4/
SimSun.ttf
41 changes: 41 additions & 0 deletions BUILD.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
## qwen web demo

### build

```
docker build -t qwen-vl-chat:webdemo --platform linux/amd64 -f Dockerfile.qwendemo .
```

### run

```
docker run -it --gpus device=0 -d --restart always -v /var/run/docker.sock:/var/run/docker.sock --name qwen-vl-chat -p 8000:8000 --user=20001:20001 --platform linux/amd64 qwen-vl-chat:webdemo
```

## qwen openai api

### build

```
docker build -t qwen-vl-chat:openai --platform linux/amd64 -f Dockerfile.qwenopenai .
```

### run

```
docker run -it --gpus device=0 -d --restart always -v /var/run/docker.sock:/var/run/docker.sock --name qwen-vl-chat -p 8080:8080 --user=20001:20001 --platform linux/amd64 qwen-vl-chat:openai
```

## qwen-int4 openai api

### build

```
docker build -t qwen-vl-chat:int4-openai --platform linux/amd64 -f Dockerfile.qwenint4openai .
```

### run

```
docker run -it --gpus device=0 -d --restart always -v /var/run/docker.sock:/var/run/docker.sock --name qwen-vl-chat-int4 -p 8080:8080 --user=20001:20001 --platform linux/amd64 qwen-vl-chat:int4-openai
```
48 changes: 48 additions & 0 deletions Dockerfile.qwendemo
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# python 3.8 and above
# pytorch 1.12 and above, 2.0 and above are recommended
# CUDA 11.4 and above are recommended (this is for GPU users, flash-attention users, etc.)

# based on modelscope docker image
# registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
# registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
FROM registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0

ARG workdir=/var/app
RUN mkdir -p ${workdir}

RUN git lfs install

WORKDIR ${workdir}
COPY requirements.txt requirements_web_demo.txt ./

# Install Qwen dependencies
RUN pip install -r requirements.txt

# Install webUI dependencies
WORKDIR ${workdir}
RUN pip install -r requirements_web_demo.txt

# Offline mode, check https://huggingface.co/docs/transformers/v4.15.0/installation#offline-mode
ENV HF_DATASETS_OFFLINE=1
ENV TRANSFORMERS_OFFLINE=1

# set TZ, make logs dir, and expose port 8080
ENV TZ=Asia/Shanghai
RUN mkdir -p ${workdir}/logs && chmod 777 ${workdir}/logs
VOLUME /var/app/logs

# create user 20001
RUN useradd -r -m appuser -u 20001 -g 0

WORKDIR ${workdir}
# copy model
RUN git clone https://huggingface.co/Qwen/Qwen-VL-Chat
# COPY --chown=20001:20001 Qwen-VL-Chat ./Qwen-VL-Chat
# copy fonts
ADD --chown=20001:20001 https://github.com/StellarCN/scp_zh/raw/master/fonts/SimSun.ttf ./
# COPY --chown=20001:20001 SimSun.ttf ./
# copy main app
COPY --chown=20001:20001 web_demo_mm.py ./

EXPOSE 8000
CMD ["python3", "web_demo_mm.py", "-c", "./Qwen-VL-Chat", "--server-name", "0.0.0.0", "--server-port", "8000"]
61 changes: 61 additions & 0 deletions Dockerfile.qwenint4openai
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# python 3.8 and above
# pytorch 1.12 and above, 2.0 and above are recommended
# CUDA 11.4 and above are recommended (this is for GPU users, flash-attention users, etc.)

# based on modelscope docker image
# registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
# registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
FROM registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0

ARG workdir=/var/app
RUN mkdir -p ${workdir}

RUN git lfs install

WORKDIR ${workdir}
COPY requirements.txt requirements_web_demo.txt ./

# Install Qwen dependencies
RUN pip install -r requirements.txt

# Install webUI dependencies
WORKDIR ${workdir}
RUN pip install -r requirements_web_demo.txt

# Offline mode, check https://huggingface.co/docs/transformers/v4.15.0/installation#offline-mode
ENV HF_DATASETS_OFFLINE=1
ENV TRANSFORMERS_OFFLINE=1

# set TZ, make logs dir, and expose port 8080
ENV TZ=Asia/Shanghai
RUN mkdir -p ${workdir}/logs && chmod 777 ${workdir}/logs
VOLUME /var/app/logs

# create user 20001
RUN useradd -r -m appuser -u 20001 -g 0

WORKDIR ${workdir}
# copy model
RUN git clone https://huggingface.co/Qwen/Qwen-VL-Chat-Int4
# COPY --chown=20001:20001 Qwen-VL-Chat-Int4 ./Qwen-VL-Chat-Int4

# Install AutoGPTQ
RUN pip install optimum
# RUN git clone https://github.com/JustinLin610/AutoGPTQ.git && \
# cd AutoGPTQ && \
# pip install -v .
RUN pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/

# Install OpenAI API dependencies
WORKDIR ${workdir}
COPY requirements_openai_api.txt ./
RUN pip install -r requirements_openai_api.txt
# copy fonts
ADD --chown=20001:20001 https://github.com/StellarCN/scp_zh/raw/master/fonts/SimSun.ttf ./
# COPY --chown=20001:20001 SimSun.ttf ./
# copy main app
COPY --chown=20001:20001 openai_api.py ./

EXPOSE 8080
# CMD ["python3", "openai_api.py", "-c", "./Qwen-VL-Chat", "--server-name", "0.0.0.0", "--server-port", "8080"]
CMD ["python3", "openai_api.py", "-c", "./Qwen-VL-Chat-Int4", "--server-name", "0.0.0.0", "--server-port", "8080"]
53 changes: 53 additions & 0 deletions Dockerfile.qwenopenai
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# python 3.8 and above
# pytorch 1.12 and above, 2.0 and above are recommended
# CUDA 11.4 and above are recommended (this is for GPU users, flash-attention users, etc.)

# based on modelscope docker image
# registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
# registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0
FROM registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.7.1-py38-torch2.0.1-tf1.15.5-1.8.0

ARG workdir=/var/app
RUN mkdir -p ${workdir}

RUN git lfs install

WORKDIR ${workdir}
COPY requirements.txt requirements_web_demo.txt ./

# Install Qwen dependencies
RUN pip install -r requirements.txt

# Install webUI dependencies
WORKDIR ${workdir}
RUN pip install -r requirements_web_demo.txt

# Offline mode, check https://huggingface.co/docs/transformers/v4.15.0/installation#offline-mode
ENV HF_DATASETS_OFFLINE=1
ENV TRANSFORMERS_OFFLINE=1

# set TZ, make logs dir, and expose port 8080
ENV TZ=Asia/Shanghai
RUN mkdir -p ${workdir}/logs && chmod 777 ${workdir}/logs
VOLUME /var/app/logs

# create user 20001
RUN useradd -r -m appuser -u 20001 -g 0

WORKDIR ${workdir}
# copy model
RUN git clone https://huggingface.co/Qwen/Qwen-VL-Chat
# COPY --chown=20001:20001 Qwen-VL-Chat ./Qwen-VL-Chat

# Install OpenAI API dependencies
WORKDIR ${workdir}
COPY requirements_openai_api.txt ./
RUN pip install -r requirements_openai_api.txt
# copy fonts
ADD --chown=20001:20001 https://github.com/StellarCN/scp_zh/raw/master/fonts/SimSun.ttf ./
# COPY --chown=20001:20001 SimSun.ttf ./
# copy main app
COPY --chown=20001:20001 openai_api.py ./

EXPOSE 8080
CMD ["python3", "openai_api.py", "-c", "./Qwen-VL-Chat", "--server-name", "0.0.0.0", "--server-port", "8080"]
Loading

0 comments on commit a0f9057

Please sign in to comment.