Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 85 additions & 4 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,104 @@ on:
push:
branches:
- main
pull_request:
workflow_dispatch:

jobs:
build-root-gpu-image:
# builds all the base images required to build the full GPU-capable CUDA-enabled Docker images
build-base-gpu-image:
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
cuda: [12.0.1, 12.1.1, 12.2.2, 12.3.2, 12.4.1, 12.5.0]
cuda: [12.5.0] # [12.0.1, 12.1.1, 12.2.2, 12.3.2, 12.4.1, 12.5.0]
steps:
- name: 'Checkout GitHub Action'
uses: actions/checkout@main
- name: 'Login to GitHub Container Registry'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: 'Build Base GPU Image'
run: CUDA_VERSION=${{matrix.cuda}} ./ci/bash/build_base_gpu_image.sh
# builds the base image required to build the ONX CPU-bound Docker images
build-base-cpu-image:
runs-on: ubuntu-latest
steps:
- name: 'Checkout GitHub Action'
uses: actions/checkout@main
- name: 'Login to GitHub Container Registry'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: 'Build Base CPU Image'
run: ./ci/bash/build_base_cpu_image.sh

# builds the ONNX images for the available models
build-cpu-image:
runs-on: ubuntu-latest
needs: [build-base-cpu-image]
strategy:
fail-fast: false
matrix:
model: [
BAAI/bge-small-en-v1.5,
# BAAI/bge-medium-en-v1.5,
# BAAI/bge-large-en-v1.5,
# Snowflake/snowflake-artic-embed-xs,
# Snowflake/snowflake-artic-embed-s,
# Snowflake/snowflake-artic-embed-m,
# Snowflake/snowflake-artic-embed-l,
# mixedbread-ai/mxbai-embed-large-v1,
# sentence-transformers/all-MiniLM-L6-v2,
# sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
]
targetarch: [arm64] #[amd64, arm64]
steps:
- name: 'Checkout GitHub Action'
uses: actions/checkout@main
- name: 'Login to GitHub Container Registry'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: 'Build Production CPU Image'
run: docker build . --build-arg MODEL_NAME=${{matrix.model}} --build-arg TARGETARCH=${{matrix.targetarch}} --file Dockerfile.cpu --target release

# builds the CUDA images for the available models
build-gpu-image:
runs-on: gpu-t4-4-core
needs: [build-base-gpu-image]
strategy:
fail-fast: false
matrix:
cuda: [12.5.0] # [12.0.1, 12.1.1, 12.2.2, 12.3.2, 12.4.1, 12.5.0]
model: [
BAAI/bge-small-en-v1.5,
# BAAI/bge-medium-en-v1.5,
# BAAI/bge-large-en-v1.5,
# Snowflake/snowflake-artic-embed-xs,
# Snowflake/snowflake-artic-embed-s,
# Snowflake/snowflake-artic-embed-m,
# Snowflake/snowflake-artic-embed-l,
# mixedbread-ai/mxbai-embed-large-v1,
# sentence-transformers/all-MiniLM-L6-v2,
# sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
]
targetarch: [arm64] #[amd64, arm64]
steps:
- name: 'Checkout GitHub Action'
uses: actions/checkout@main
- name: 'Login to GitHub Container Registry'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: 'Build Root GPU Image'
run: CUDA_VERSION=${{matrix.cuda}} ./ci/bash/build_root_gpu_image.sh
- name: 'Build Production GPU Image'
run: docker build . --build-arg MODEL_NAME=${{matrix.model}} --build-arg TARGETARCH=${{matrix.targetarch}} --build-arg CUDA_VERSION=${{matrix.cuda}} --file Dockerfile.gpu --target release
1 change: 1 addition & 0 deletions Dockerfile.root.cpu → Dockerfile.base.cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM ubuntu:22.04
LABEL org.opencontainers.image.source https://github.com/weaviate/t2v-transformers-models-rs

# Install rust dependencies
RUN apt-get update -qq \
Expand Down
File renamed without changes.
27 changes: 2 additions & 25 deletions Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -1,35 +1,12 @@
FROM ubuntu:22.04 as sys

# Install rust dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y vim gcc g++ curl git build-essential libssl-dev openssl pkg-config \
&& rm -rf /var/lib/apt/lists/*

# Install Rust
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"

# Install Python
RUN apt-get update && \
apt-get install -y python3-pip python3-dev python3-venv && \
rm -rf /var/lib/apt/lists/*

FROM ghcr.io/weaviate/t2v-transformers-models-rs/base-cpu:latest as build
RUN . venv/bin/activate
WORKDIR /app

# Copy our manifests
COPY ./Cargo.lock ./Cargo.lock
COPY ./Cargo.toml ./Cargo.toml
COPY ./src ./src
COPY ./ci ./ci

FROM sys as build
WORKDIR /app

RUN python3 -m venv venv
RUN . venv/bin/activate
# Install Python scripts for model download
RUN pip install -r ci/requirements.txt

ARG TARGETARCH
ARG MODEL_NAME

Expand Down
3 changes: 1 addition & 2 deletions Dockerfile.gpu
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
ARG CUDA_VERSION
FROM ghcr.io/weaviate/t2v-transformers-models-rs/t2v-rs-root-gpu-${CUDA_VERSION}:0.0.1 as build
FROM ghcr.io/weaviate/t2v-transformers-models-rs/base-gpu-${CUDA_VERSION}:latest as build
RUN . venv/bin/activate

WORKDIR /app

# Copy our manifests
COPY ./Cargo.lock ./Cargo.lock
COPY ./Cargo.toml ./Cargo.toml
COPY ./src ./src
COPY ./ci ./ci
Expand Down
10 changes: 10 additions & 0 deletions ci/bash/build_base_cpu_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
DOCKER_REGISTRY="ghcr.io/weaviate/t2v-transformers-models-rs"
IMAGE=$DOCKER_REGISTRY/base-cpu:latest

docker build --cache-from $IMAGE -t base-cpu -f Dockerfile.base.cpu .

docker tag base-cpu $IMAGE

docker push $IMAGE

docker logout
10 changes: 10 additions & 0 deletions ci/bash/build_base_gpu_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
DOCKER_REGISTRY="ghcr.io/weaviate/t2v-transformers-models-rs"
IMAGE=$DOCKER_REGISTRY/base-gpu-${CUDA_VERSION}:latest

docker build --cache-from $IMAGE --build-arg CUDA_VERSION=$CUDA_VERSION -t base-gpu -f Dockerfile.base.gpu .

docker tag base-gpu $IMAGE

docker push $IMAGE

docker logout
9 changes: 0 additions & 9 deletions ci/bash/build_root_gpu_image.sh

This file was deleted.

86 changes: 43 additions & 43 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,50 +1,50 @@
---
version: '3.4'
services:
t2v-transformers-rs-gpu:
build:
context: .
dockerfile: Dockerfile.gpu
target: release
args:
- MODEL_NAME: "BAAI/bge-small-en-v1.5"
- TARGETARCH: "arm64"
- CUDA_VERSION: "12.4.1"
ports:
- 3000:3000
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
t2v-transformers-rs-cpu:
build:
context: .
dockerfile: Dockerfile.cpu
target: release
args:
- MODEL_NAME: "BAAI/bge-small-en-v1.5"
- TARGETARCH: "arm64"
ports:
- 3000:3000
t2v-transformers-py-gpu:
image: semitechnologies/transformers-inference:baai-bge-small-en-v1.5
ports:
- 8080:8080
environment:
ENABLE_CUDA: "true"
T2V_TRANSFORMERS_DIRECT_TOKENIZE: "true"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
# t2v-transformers-rs-gpu:
# build:
# context: .
# dockerfile: Dockerfile.gpu
# target: release
# args:
# - MODEL_NAME: "BAAI/bge-small-en-v1.5"
# - TARGETARCH: "arm64"
# - CUDA_VERSION: "12.4.1"
# ports:
# - 3000:3000
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
# t2v-transformers-rs-cpu:
# build:
# context: .
# dockerfile: Dockerfile.cpu
# target: release
# args:
# - MODEL_NAME: "BAAI/bge-small-en-v1.5"
# - TARGETARCH: "arm64"
# ports:
# - 3000:3000
# t2v-transformers-py-gpu:
# image: semitechnologies/transformers-inference:baai-bge-small-en-v1.5
# ports:
# - 8080:8080
# environment:
# ENABLE_CUDA: "true"
# T2V_TRANSFORMERS_DIRECT_TOKENIZE: "true"
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
t2v-transformers-py-cpu:
image: semitechnologies/transformers-inference:baai-bge-small-en-v1.5-onnx
image: semitechnologies/transformers-inference:mixedbread-ai-mxbai-embed-large-v1-onnx
ports:
- 8080:8080
environment:
Expand Down