diff --git a/src/main/java/Capstone/AutoScheduler/global/domain/enums/CrawlingTpye.java b/src/main/java/Capstone/AutoScheduler/global/domain/enums/CrawlingTpye.java new file mode 100644 index 0000000..45648f9 --- /dev/null +++ b/src/main/java/Capstone/AutoScheduler/global/domain/enums/CrawlingTpye.java @@ -0,0 +1,5 @@ +package Capstone.AutoScheduler.global.domain.enums; + +public enum CrawlingTpye { + BODY, HTML +} diff --git a/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java b/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java index 2bba93f..f1e04b0 100644 --- a/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java +++ b/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java @@ -21,7 +21,7 @@ @Service public class WebCrawlerService { - public List getHtmlContent(String url) { + public List getHtmlContent(int type, String url) { List htmlContent = new ArrayList<>(); WebDriver driver = null; @@ -46,18 +46,25 @@ public List getHtmlContent(String url) { WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); wait.until(ExpectedConditions.presenceOfElementLocated(org.openqa.selenium.By.tagName("body"))); - // header의 CSS파일 가져오기 - String cssFiles = ""; - List links = driver.findElements(By.xpath("//link[@rel='stylesheet']")); - for (WebElement link : links) { - String cssLink = link.getAttribute("href"); - cssFiles += ""; + if(type == 0) { + // header의 CSS파일 가져오기 + String cssFiles = ""; + List links = driver.findElements(By.xpath("//link[@rel='stylesheet']")); + for (WebElement link : links) { + String cssLink = link.getAttribute("href"); + cssFiles += ""; + } + htmlContent.add(cssFiles); + + // html의 body만 가져오기 + String bodyContent = driver.findElement(By.tagName("body")).getAttribute("outerHTML"); + htmlContent.add(bodyContent); + } + else { + String pageSource = driver.getPageSource(); + htmlContent.add(null); + htmlContent.add(pageSource); } - htmlContent.add(cssFiles); - - // html의 body만 가져오기 - String bodyContent = driver.findElement(By.tagName("body")).getAttribute("outerHTML"); - htmlContent.add(bodyContent); return htmlContent; diff --git a/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java b/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java index 7663751..683e480 100644 --- a/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java +++ b/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java @@ -14,6 +14,7 @@ import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.CrossOrigin; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; @@ -26,6 +27,7 @@ public class CrawlerController { @Autowired private WebCrawlerService webCrawlerService; + @CrossOrigin(origins = "http://localhost:3000") // 프론트엔드 서버만 허용 @Operation( summary = "웹 크롤링 API", description = "URL을 입력하면 해당 페이지의 HTML 소스를 반환합니다." @@ -33,11 +35,12 @@ public class CrawlerController { @GetMapping("/crawl") public ApiResponse crawl( @Parameter(description = "크롤링할 URL", required = true) + @RequestParam int type, @RequestParam String url ) { try { // HTML 크롤링 결과 가져오기 - List htmlContent = webCrawlerService.getHtmlContent(url); + List htmlContent = webCrawlerService.getHtmlContent(type, url); return ApiResponse.onSuccess(SuccessStatus.CRAWLING_OK, CrawlingConverter.toGetCrawlingResultDTO(htmlContent)); } catch (IllegalArgumentException e) {