Skip to content

Commit b533c8f

Browse files
committed
chore: 뉴스 크롤링 수정
1 parent edd1888 commit b533c8f

5 files changed

Lines changed: 20 additions & 218 deletions

File tree

src/main/java/com/playhive/batch/crawler/news/FootballBaseballCrawler.java

Lines changed: 0 additions & 200 deletions
This file was deleted.

src/main/java/com/playhive/batch/crawler/news/TestCrawler.java renamed to src/main/java/com/playhive/batch/crawler/news/FootballBaseballNewsCrawler.java

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import org.openqa.selenium.support.ui.WebDriverWait;
2424

2525
@Slf4j
26-
public class TestCrawler {
26+
public class FootballBaseballNewsCrawler {
2727

2828
private static final String DATE_FIELD = "&date=";
2929

@@ -39,7 +39,7 @@ public class TestCrawler {
3939

4040
private final NewsService newsService;
4141

42-
public TestCrawler(NewsService newsService) {
42+
public FootballBaseballNewsCrawler(NewsService newsService) {
4343
this.newsService = newsService;
4444
}
4545

@@ -274,20 +274,22 @@ private LocalDateTime parsePostDate(WebElement item) {
274274
*/
275275
private String getThumbImage(WebElement item) {
276276
try {
277-
WebElement img = item.findElement(By.className(THUMB_CLASS))
278-
.findElement(By.tagName("img"));
279-
return img.getAttribute("src");
280-
} catch (Exception e) {
281-
// 다른 방법으로 이미지 찾기
282-
try {
283-
WebElement img = item.findElement(By.cssSelector("img"));
284-
return img.getAttribute("src");
285-
} catch (Exception e2) {
286-
return null;
277+
WebElement img = item.findElement(By.cssSelector("img"));
278+
String src = img.getAttribute("src");
279+
280+
if (src == null || src.isBlank() || src.startsWith("data:")) {
281+
src = img.getAttribute("data-src");
287282
}
283+
284+
return (src != null && !src.isBlank()) ? src : null;
285+
286+
} catch (Exception e) {
287+
// img 태그 자체가 없거나 접근 실패 → 그냥 null 리턴
288+
return null;
288289
}
289290
}
290291

292+
291293
/**
292294
* 스크롤 수행
293295
*

src/main/java/com/playhive/batch/crawler/news/baseball/BaseballNewsCrawler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package com.playhive.batch.crawler.news.baseball;
22

3+
import com.playhive.batch.crawler.news.FootballBaseballNewsCrawler;
34
import com.playhive.batch.crawler.news.NewsCrawler;
4-
import com.playhive.batch.crawler.news.TestCrawler;
55
import com.playhive.batch.news.entity.NewsCategory;
66
import com.playhive.batch.news.service.NewsService;
77
import java.time.LocalDate;
@@ -10,7 +10,7 @@
1010

1111
@Component
1212
@Transactional
13-
public class BaseballNewsCrawler extends TestCrawler implements NewsCrawler {
13+
public class BaseballNewsCrawler extends FootballBaseballNewsCrawler implements NewsCrawler {
1414

1515
private static final String URL = "https://m.sports.naver.com/kbaseball/news?sectionId=kbaseball&sort=latest";
1616

src/main/java/com/playhive/batch/crawler/news/football/KFootballNewsCrawler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package com.playhive.batch.crawler.news.football;
22

3+
import com.playhive.batch.crawler.news.FootballBaseballNewsCrawler;
34
import com.playhive.batch.crawler.news.NewsCrawler;
4-
import com.playhive.batch.crawler.news.TestCrawler;
55
import com.playhive.batch.news.entity.NewsCategory;
66
import com.playhive.batch.news.service.NewsService;
77
import java.time.LocalDate;
@@ -10,7 +10,7 @@
1010

1111
@Component
1212
@Transactional
13-
public class KFootballNewsCrawler extends TestCrawler implements NewsCrawler {
13+
public class KFootballNewsCrawler extends FootballBaseballNewsCrawler implements NewsCrawler {
1414

1515
private static final String URL = "https://m.sports.naver.com/kfootball/news?sectionId=kfootball&sort=latest";
1616

src/main/java/com/playhive/batch/crawler/news/football/WFootballNewsCrawler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package com.playhive.batch.crawler.news.football;
22

3+
import com.playhive.batch.crawler.news.FootballBaseballNewsCrawler;
34
import com.playhive.batch.crawler.news.NewsCrawler;
4-
import com.playhive.batch.crawler.news.TestCrawler;
55
import com.playhive.batch.news.entity.NewsCategory;
66
import com.playhive.batch.news.service.NewsService;
77
import java.time.LocalDate;
@@ -10,7 +10,7 @@
1010

1111
@Component
1212
@Transactional
13-
public class WFootballNewsCrawler extends TestCrawler implements NewsCrawler {
13+
public class WFootballNewsCrawler extends FootballBaseballNewsCrawler implements NewsCrawler {
1414

1515
private static final String URL = "https://m.sports.naver.com/wfootball/news?sectionId=epl&sort=latest";
1616

0 commit comments

Comments
 (0)