gizAI/main2.py at main · iswarpatel123/gizAI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from __future__ import annotations

import asyncio
import json
from typing import Dict, List
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from aiohttp import ClientSession
from enum import Enum

#curl -X POST "http://localhost:8000/v1/chat/completions" \     -H "Content-Type: application/json" \
 #    -d '{"model": "qwen-coder-32b", "messages": [{"type": "human", "content": "Are you qwen?"}]}'

 # models - qwen-coder-32b, chat-gemini-flash, claude-haiku, claude-sonnet, chat-o1-mini

# Type definitions
Messages = List[Dict[str, str]]
AsyncResult = asyncio.Future

class MessageType(str, Enum):
    HUMAN = "human"
    ASSISTANT = "assistant"
    SYSTEM = "system"

class Message(BaseModel):
    type: MessageType
    content: str

class ChatRequest(BaseModel):
    model: str
    messages: List[Message]
    mode: str = "plan"
    noStream: bool = True

class ChatResponse(BaseModel):
    output: str

# Provider implementation
class GizAI:
    api_endpoint = "https://app.giz.ai/api/data/users/inferenceServer.infer"
    working = True
    supports_stream = False
    supports_system_message = True
    supports_message_history = True

    @classmethod
    async def create_async_generator(
        cls,
        model: str,
        messages: Messages,
        proxy: str = None,
        **kwargs
    ) -> AsyncResult:
        headers = {
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'en-US,en;q=0.9',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Content-Type': 'application/json',
            'DNT': '1',
            'Origin': 'https://app.giz.ai',
            'Pragma': 'no-cache',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-origin',
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
            'sec-ch-ua': '"Not?A_Brand";v="99", "Chromium";v="130"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Linux"'
        }

        data = {
            "model": "chat",
            "baseModel": model,
            "input": {
                "messages": messages,
                "mode": "chat"
            },
            "noStream": True
        }

        # Print request body
        print("Request to API endpoint:")
        print(json.dumps(data, indent=2))

        async with ClientSession(headers=headers) as session:
            async with session.post(cls.api_endpoint, json=data, proxy=proxy) as response:
                if response.status == 201:
                    result = await response.json()
                    yield result['output'].strip()
                else:
                    raise Exception(f"Unexpected response status: {response.status}")

# FastAPI application
app = FastAPI(title="LLM Proxy Server")

@app.post("/v1/chat/completions", response_model=ChatResponse)
async def chat_completions(request: ChatRequest):
    try:
        # Convert the request messages to the format expected by GizAI
        messages = [
            {"type": msg.type, "content": msg.content}
            for msg in request.messages
        ]

        # Create async generator
        async_gen = GizAI.create_async_generator(
            model=request.model,
            messages=messages
        )

        # Get the first (and only) response
        response = None
        async for result in async_gen:
            response = result
            break

        if response is None:
            raise HTTPException(status_code=500, detail="No response generated")

        return ChatResponse(output=response)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Configuration and startup
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8001)