add comfyui backend

tungedng2710 · Oct 31, 2024 · ea27963 · ea27963
1 parent 202b917
commit ea27963
Show file tree

Hide file tree

Showing 6 changed files with 298 additions and 38 deletions.
diff --git a/app.py b/app.py
@@ -9,6 +9,7 @@
 from utils import *
 
 from diffusers.utils import logging
+from query_comfyui import *
 
 logging.set_verbosity_info()
 logging.get_logger("diffusers").setLevel(logging.ERROR)
@@ -29,9 +30,23 @@ def gen_image(prompt, negative_prompt, width, height,
         progress(i / num_steps * 100, f"Processing step {i} of {num_steps}...")
 
     images = [Image.open("stuffs/logo.png")]
+
     if len(prompt) == 0:
         gr.Info("Please input prompt!", duration=5)
         return images
+
+    # Query COmfyUI backend
+    if "Stable Diffusion 3.5" in mode:
+        if "Medium" in mode:
+            ckpt_name = "sd3.5_medium.safetensors"
+        else:
+            ckpt_name = "sd3.5_large.safetensors"
+        images = query_sd35(ckpt_name, prompt, negative_prompt,
+                            int(width), int(height),
+                            int(num_images), int(seed),
+                            float(guidance_scale), int(num_steps))
+        return images
+
     model = TEXT_TO_IMAGE_DICTIONARY[mode]
     use_lora = False
     _, current_max_memory = get_gpu_info(width, height, num_images)
@@ -142,12 +157,12 @@ def gen_image(prompt, negative_prompt, width, height,
                         placeholder="Describe the image you want to generate")
                 with gr.Row():
                     width = gr.components.Slider(
-                        minimum=512, maximum=2048, value=1024, step=8,
+                        minimum=512, maximum=1920, value=1024, step=8,
                         label="Width",
                         scale=1
                     )
                     height = gr.components.Slider(
-                        minimum=512, maximum=2048, value=1024, step=8,
+                        minimum=512, maximum=1920, value=1024, step=8,
                         label="Height",
                         scale=1
                     )
@@ -166,13 +181,13 @@ def gen_image(prompt, negative_prompt, width, height,
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
-                    value='',
+                    value="ugly, disfigured, deformed",
                     placeholder="Instruct the AI model that it should not include")
                 with gr.Row():
                     with gr.Column(scale=4):
                         with gr.Row():
                             num_steps = gr.components.Slider(
-                                minimum=3, maximum=60, value=8, step=1,
+                                minimum=3, maximum=50, value=20, step=1,
                                 label="Inference Steps",
                                 scale=2
                             )
@@ -224,7 +239,6 @@ def gen_image(prompt, negative_prompt, width, height,
                 format="png",
                 elem_id="gallery",
                 columns=2, rows=2,
-                show_share_button=True,
                 preview=True,
                 object_fit="contain")
             click_button_behavior = {

diff --git a/output.png b/output.png
diff --git a/query_comfyui.py b/query_comfyui.py
@@ -0,0 +1,97 @@
+#This is an example that uses the websockets api and the SaveImageWebsocket node to get images directly without
+#them being saved to disk
+
+import websocket #NOTE: websocket-client (https://github.com/websocket-client/websocket-client)
+import uuid
+import json
+import urllib.request
+import urllib.parse
+import json
+
+server_address = "127.0.0.1:7864"
+client_id = str(uuid.uuid4())
+
+def queue_prompt(prompt):
+    p = {"prompt": prompt, "client_id": client_id}
+    data = json.dumps(p).encode('utf-8')
+    req =  urllib.request.Request("http://{}/prompt".format(server_address), data=data)
+    return json.loads(urllib.request.urlopen(req).read())
+
+def get_image(filename, subfolder, folder_type):
+    data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
+    url_values = urllib.parse.urlencode(data)
+    with urllib.request.urlopen("http://{}/view?{}".format(server_address, url_values)) as response:
+        return response.read()
+
+def get_history(prompt_id):
+    with urllib.request.urlopen("http://{}/history/{}".format(server_address, prompt_id)) as response:
+        return json.loads(response.read())
+
+def get_images(ws, prompt):
+    prompt_id = queue_prompt(prompt)['prompt_id']
+    output_images = {}
+    while True:
+        out = ws.recv()
+        if isinstance(out, str):
+            message = json.loads(out)
+            if message['type'] == 'executing':
+                data = message['data']
+                if data['node'] is None and data['prompt_id'] == prompt_id:
+                    break #Execution is done
+        else:
+            # If you want to be able to decode the binary stream for latent previews, here is how you can do it:
+            # bytesIO = BytesIO(out[8:])
+            # preview_image = Image.open(bytesIO) # This is your preview in PIL image format, store it in a global
+            continue #previews are binary data
+
+    history = get_history(prompt_id)[prompt_id]
+    for node_id in history['outputs']:
+        node_output = history['outputs'][node_id]
+        images_output = []
+        if 'images' in node_output:
+            for image in node_output['images']:
+                image_data = get_image(image['filename'], image['subfolder'], image['type'])
+                images_output.append(image_data)
+        output_images[node_id] = images_output
+
+    return output_images
+
+def query_sd35(ckpt_name: str = "sd3.5_medium.safetensors",
+               prompt: str = "a capybara",
+               negative_prompt: str = "ugly, disfigured, deformed",
+               width: int = 1024,
+               height: int = 1024,
+               batch_size: int = 1,
+               seed: int = 77498386,
+               cfg: float = 3.0,
+               step: int = 20):
+
+    with open('stuffs/comfyui_workflow_api/sd3_5_workflow_api.json') as f:
+        prompt_config = json.load(f)
+
+    prompt_config["3"]["inputs"]["seed"] = seed
+    prompt_config["3"]["inputs"]["cfg"] = cfg
+    prompt_config["3"]["inputs"]["step"] = step
+    prompt_config["4"]["inputs"]["ckpt_name"] = ckpt_name
+    prompt_config["16"]["inputs"]["text"] = prompt
+    prompt_config["40"]["inputs"]["text"] = negative_prompt
+    prompt_config["53"]["inputs"]["width"] = width
+    prompt_config["53"]["inputs"]["height"] = height
+    prompt_config["53"]["inputs"]["batch_size"] = batch_size
+
+    ws = websocket.WebSocket()
+    ws.connect("ws://{}/ws?clientId={}".format(server_address, client_id))
+    images = get_images(ws, prompt_config)
+    ws.close() # for in case this example is used in an environment where it will be repeatedly called, like in a Gradio app. otherwise, you'll randomly receive connection timeouts
+    #Commented out code to display the output images:
+
+    output_images = []
+    for node_id in images:
+        for image_data in images[node_id]:
+            from PIL import Image
+            import io
+            output_images.append(Image.open(io.BytesIO(image_data)))
+    return output_images
+
+
+# query_sd35(prompt="a cat")
diff --git a/stuffs/comfyui_workflow_api/sd3_5_workflow_api.json b/stuffs/comfyui_workflow_api/sd3_5_workflow_api.json
@@ -0,0 +1,139 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 715383868423439,
+      "steps": 25,
+      "cfg": 3,
+      "sampler_name": "euler",
+      "scheduler": "sgm_uniform",
+      "denoise": 1,
+      "model": [
+        "4",
+        0
+      ],
+      "positive": [
+        "16",
+        0
+      ],
+      "negative": [
+        "40",
+        0
+      ],
+      "latent_image": [
+        "53",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "4": {
+    "inputs": {
+      "ckpt_name": "sd3.5_medium.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "4",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "16": {
+    "inputs": {
+      "text": "Painting of Taylor Swift in the style of Vincent van Gogh.",
+      "clip": [
+        "43",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "Positive Prompt"
+    }
+  },
+  "40": {
+    "inputs": {
+      "text": "ugly, disfigured, deformed",
+      "clip": [
+        "43",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "Negative Prompt"
+    }
+  },
+  "41": {
+    "inputs": {
+      "clip_name": "t5xxl_fp16.safetensors",
+      "type": "sd3"
+    },
+    "class_type": "CLIPLoader",
+    "_meta": {
+      "title": "Load CLIP"
+    }
+  },
+  "42": {
+    "inputs": {
+      "clip_name1": "clip_l.safetensors",
+      "clip_name2": "clip_g.safetensors",
+      "type": "sd3"
+    },
+    "class_type": "DualCLIPLoader",
+    "_meta": {
+      "title": "DualCLIPLoader"
+    }
+  },
+  "43": {
+    "inputs": {
+      "clip_name1": "clip_l.safetensors",
+      "clip_name2": "clip_g.safetensors",
+      "clip_name3": "t5xxl_fp16.safetensors"
+    },
+    "class_type": "TripleCLIPLoader",
+    "_meta": {
+      "title": "TripleCLIPLoader"
+    }
+  },
+  "53": {
+    "inputs": {
+      "width": 1024,
+      "height": 1024,
+      "batch_size": 1
+    },
+    "class_type": "EmptySD3LatentImage",
+    "_meta": {
+      "title": "EmptySD3LatentImage"
+    }
+  }
+}
diff --git a/stuffs/html/tonai_creative_info.html b/stuffs/html/tonai_creative_info.html
@@ -39,7 +39,7 @@
     </head>
 <body>
     <div class="header">
-        <img src="file/stuffs/splash.png" alt="Logo" class="logo">
+        <img src="/gradio_api/file/stuffs/splash.png" alt="Logo" class="logo">
         <div class="title-container">
             <div class="title">TonAI Creative</div>
             <div class="subtitle">Unleash Your Imagination With AI</div>