-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain2.py
More file actions
129 lines (108 loc) · 3.92 KB
/
main2.py
File metadata and controls
129 lines (108 loc) · 3.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from __future__ import annotations
import asyncio
import json
from typing import Dict, List
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from aiohttp import ClientSession
from enum import Enum
#curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \
# -d '{"model": "qwen-coder-32b", "messages": [{"type": "human", "content": "Are you qwen?"}]}'
# models - qwen-coder-32b, chat-gemini-flash, claude-haiku, claude-sonnet, chat-o1-mini
# Type definitions
Messages = List[Dict[str, str]]
AsyncResult = asyncio.Future
class MessageType(str, Enum):
HUMAN = "human"
ASSISTANT = "assistant"
SYSTEM = "system"
class Message(BaseModel):
type: MessageType
content: str
class ChatRequest(BaseModel):
model: str
messages: List[Message]
mode: str = "plan"
noStream: bool = True
class ChatResponse(BaseModel):
output: str
# Provider implementation
class GizAI:
api_endpoint = "https://app.giz.ai/api/data/users/inferenceServer.infer"
working = True
supports_stream = False
supports_system_message = True
supports_message_history = True
@classmethod
async def create_async_generator(
cls,
model: str,
messages: Messages,
proxy: str = None,
**kwargs
) -> AsyncResult:
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'DNT': '1',
'Origin': 'https://app.giz.ai',
'Pragma': 'no-cache',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not?A_Brand";v="99", "Chromium";v="130"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"'
}
data = {
"model": "chat",
"baseModel": model,
"input": {
"messages": messages,
"mode": "chat"
},
"noStream": True
}
# Print request body
print("Request to API endpoint:")
print(json.dumps(data, indent=2))
async with ClientSession(headers=headers) as session:
async with session.post(cls.api_endpoint, json=data, proxy=proxy) as response:
if response.status == 201:
result = await response.json()
yield result['output'].strip()
else:
raise Exception(f"Unexpected response status: {response.status}")
# FastAPI application
app = FastAPI(title="LLM Proxy Server")
@app.post("/v1/chat/completions", response_model=ChatResponse)
async def chat_completions(request: ChatRequest):
try:
# Convert the request messages to the format expected by GizAI
messages = [
{"type": msg.type, "content": msg.content}
for msg in request.messages
]
# Create async generator
async_gen = GizAI.create_async_generator(
model=request.model,
messages=messages
)
# Get the first (and only) response
response = None
async for result in async_gen:
response = result
break
if response is None:
raise HTTPException(status_code=500, detail="No response generated")
return ChatResponse(output=response)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Configuration and startup
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)