-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSummarizeServer.py
More file actions
177 lines (148 loc) · 5.68 KB
/
SummarizeServer.py
File metadata and controls
177 lines (148 loc) · 5.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
from fastapi.responses import JSONResponse
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from dotenv import load_dotenv
import os
import httpx
from bs4 import BeautifulSoup
from prometheus_fastapi_instrumentator import Instrumentator
import asyncio
import logging
# .env 파일 불러오기
load_dotenv()
app = FastAPI()
instrumentator = Instrumentator().instrument(app).expose(app)
# 환경변수에서 vLLM 서버 URL 불러오기
VLLM_SERVER_URL = os.getenv("VLLM_SERVER_URL")
# 요청 모델
class SummarizeRequest(BaseModel):
post_id: int
context: str
# 응답 모델
class CommonResponse(BaseModel):
status: int
message: str
data: str
def strip_html_tags(text):
soup = BeautifulSoup(text, "html.parser")
clean_text = soup.get_text(separator="\n")
return clean_text.strip()
@app.get("/api/summarize-service/health-check")
async def health_check():
return JSONResponse(
status_code=200,
content={
"status": 200,
"message": "서버 상태 확인",
"data": "Working"
}
)
@app.post("/api/summarize-service/summarize", response_model=CommonResponse)
async def summarizeText(request: SummarizeRequest):
try:
# messages 포맷
messages = [
{
"role": "system",
"content": (
"You are a summarizing expert. Summarize the following text in Korean.\n"
"Important: Output must be only the Korean summary text itself. No explanation, no label, no preamble, no English text.\n"
"Conditions:\n"
"1. The summary must be complete and end with a full sentence.\n"
"2. Keep it within 500 characters.\n"
"3. Avoid repeated words or redundant expressions.\n"
"4. Only include the key information.\n"
"Example:\n"
"Input: \"이것은 샘플 텍스트입니다.\"\n"
"Output: \"샘플 요약입니다.\"\n"
"Now summarize this text:\n"
)
},
{
"role": "user",
"content": strip_html_tags(request.context)
}
]
payload = {
"model": "google/gemma-3-4b-it",
"messages": messages,
"temperature": 0.3
}
# ✅ 비동기 HTTP 요청으로 변경
async with httpx.AsyncClient() as client:
response = await client.post(VLLM_SERVER_URL, json=payload)
if response.status_code != 200:
return CommonResponse(
status=500,
message=f"vLLM 서버 오류: {response.text}",
data=""
)
result = response.json()
summaryText = result.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
if not summaryText:
return CommonResponse(
status=500,
message="vLLM 응답에서 content를 찾을 수 없습니다.",
data=""
)
return CommonResponse(
status=200,
message="요청에 성공하였습니다",
data=summaryText
)
except Exception as e:
return CommonResponse(
status=500,
message=str(e),
data=""
)
# 로거 설정 (optional)
logger = logging.getLogger("uvicorn.error")
# Dummy 요청용 메시지
dummy_messages = [
{
"role": "system",
"content": (
"You are a summarizing expert. Summarize the following text in Korean.\n"
"Important: Output must be only the Korean summary text itself. No explanation, no label, no preamble, no English text.\n"
"Conditions:\n"
"1. The summary must be complete and end with a full sentence.\n"
"2. Keep it within 500 characters.\n"
"3. Avoid repeated words or redundant expressions.\n"
"4. Only include the key information.\n"
"Example:\n"
"Input: \"이것은 샘플 텍스트입니다.\"\n"
"Output: \"샘플 요약입니다.\"\n"
"Now summarize this text:\n"
)
},
{
"role": "user",
"content": "이것은 더미 요청용 텍스트입니다. 서버가 정상 동작하는지 확인합니다."
}
]
async def dummy_warmup_task():
while True:
try:
logger.info("[Warmup] Dummy 요청 전송 시작")
payload = {
"model": "google/gemma-3-4b-it",
"messages": dummy_messages,
"temperature": 0.3
}
async with httpx.AsyncClient() as client:
response = await client.post(VLLM_SERVER_URL, json=payload)
if response.status_code == 200:
logger.info("[Warmup] Dummy 요청 성공 ✅")
else:
logger.warning(f"[Warmup] Dummy 요청 실패 ❌ - 상태코드: {response.status_code}, 응답: {response.text}")
except Exception as e:
logger.error(f"[Warmup] Dummy 요청 중 예외 발생 ❗ - {str(e)}")
# 5분(300초)마다 실행 → 필요시 600초(10분)로 변경 가능
await asyncio.sleep(300)
@app.on_event("startup")
async def startup_event():
logger.info("서버 시작 시 warmup task 실행")
asyncio.create_task(dummy_warmup_task())
#cd Desktop/vLLM
#uvicorn SummarizeServer:app --host 0.0.0.0 --port 8500 --reload