Skip to content

Commit 0418696

Browse files
committed
chore: 뉴스 크롤링 수정
1 parent 0ea277c commit 0418696

1 file changed

Lines changed: 19 additions & 0 deletions

File tree

src/main/java/com/playhive/batch/crawler/news/esports/EsportsNewsCrawler.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ private void crawlForDate(LocalDate date) {
110110
LocalDateTime postDate = parseRelativeTime(postDateStr);
111111
String title = getTitle(news);
112112
String content = getContent(news);
113+
String thumb = extractImage(news);
114+
115+
if (thumb == null || thumb.isBlank()) {
116+
log.debug("🔍 썸네일 없음, 기본값 또는 상세 진입 고려: {}", source);
117+
}
113118

114119
if (title.isBlank()) {
115120
log.debug("⛔ 무시됨 - 제목 없음: {}", source);
@@ -184,6 +189,20 @@ private LocalDateTime parseRelativeTime(String timeStr) {
184189
}
185190
}
186191

192+
private String extractImage(WebElement news) {
193+
try {
194+
WebElement thumbnailDiv = news.findElement(By.className("news_card_thumbnail__3thTg"));
195+
String style = thumbnailDiv.getAttribute("style"); // style="background-image: url(...)"
196+
Matcher matcher = Pattern.compile("url\\([\"']?(.*?)[\"']?\\)").matcher(style);
197+
if (matcher.find()) {
198+
return matcher.group(1);
199+
}
200+
} catch (NoSuchElementException e) {
201+
log.debug("❌ 썸네일 div 없음");
202+
}
203+
return null;
204+
}
205+
187206
private void save(List<NewsSaveRequest> newsList) {
188207
for (NewsSaveRequest news : newsList) {
189208
try {

0 commit comments

Comments
 (0)