diff --git a/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/ErrorStatus.java b/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/ErrorStatus.java index 04e7511..d304f2a 100644 --- a/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/ErrorStatus.java +++ b/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/ErrorStatus.java @@ -50,6 +50,10 @@ public enum ErrorStatus implements BaseErrorCode { BOOKMARK_NOT_FOUND(HttpStatus.BAD_REQUEST, "BOOKMARK_4005", "북마크가 없습니다."), BOOKMARK_DUPLICATE(HttpStatus.BAD_REQUEST, "BOOKMARK_4006", "이미 북마크가 되어있습니다."), + // 크롤링 관련 에러 5000 + CRAWLING_NOT_EXIST(HttpStatus.BAD_REQUEST, "CRAWLING_5001", "유효하지 않은 URL 형식입니다."), + CRAWLING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, "CRAWLING_5002", "크롤링 중 오류가 발생했습니다."), + // 카테고리 관련 에러 7000 CATEGORY_NOT_FOUND(HttpStatus.BAD_REQUEST, "CATEGORY_7001", "카테고리가 없습니다."), // 회원별 관심 카테고리 관련 에러 8000 diff --git a/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/SuccessStatus.java b/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/SuccessStatus.java index b732713..7d18ae6 100644 --- a/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/SuccessStatus.java +++ b/src/main/java/Capstone/AutoScheduler/global/apiPayload/code/status/SuccessStatus.java @@ -17,9 +17,9 @@ public enum SuccessStatus implements BaseCode { // 일정 생성기 관련 응답 GENERATOR_OK(HttpStatus.OK, "GENERATOR_3000", "성공입니다."), // 북마크 관련 응답 - BOOKMARK_OK(HttpStatus.OK, "BOOKMARK_4000", "성공입니다.") - - ; + BOOKMARK_OK(HttpStatus.OK, "BOOKMARK_4000", "성공입니다."), + // 크롤링 관련 응답 + CRAWLING_OK(HttpStatus.OK, "CRAWLING_5000", "성공입니다."); private final HttpStatus httpStatus; private final String code; diff --git a/src/main/java/Capstone/AutoScheduler/global/converter/CrawlingConverter.java b/src/main/java/Capstone/AutoScheduler/global/converter/CrawlingConverter.java new file mode 100644 index 0000000..bd3e6d3 --- /dev/null +++ b/src/main/java/Capstone/AutoScheduler/global/converter/CrawlingConverter.java @@ -0,0 +1,15 @@ +package Capstone.AutoScheduler.global.converter; + +import Capstone.AutoScheduler.global.web.dto.CrawlingResponseDTO; +import java.util.List; + +public class CrawlingConverter{ + // html의 body와 css파일 DTO로 변환 + public static CrawlingResponseDTO.GetCrawlingResultDTO toGetCrawlingResultDTO(List htmlContent) { + return CrawlingResponseDTO.GetCrawlingResultDTO.builder() + .cssFile(htmlContent.get(0)) + .htmlBody(htmlContent.get(1)) + .build(); + } +} + diff --git a/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java b/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java index a1e74bd..2bba93f 100644 --- a/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java +++ b/src/main/java/Capstone/AutoScheduler/global/service/SeleniumService/WebCrawlerService.java @@ -1,20 +1,28 @@ package Capstone.AutoScheduler.global.service.SeleniumService; +import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; +import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Service; import java.net.MalformedURLException; import java.net.URL; import java.time.Duration; +import java.util.ArrayList; +import java.util.List; @Service public class WebCrawlerService { - public String getHtmlContent(String url) { + public List getHtmlContent(String url) { + List htmlContent = new ArrayList<>(); WebDriver driver = null; try { @@ -38,8 +46,30 @@ public String getHtmlContent(String url) { WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); wait.until(ExpectedConditions.presenceOfElementLocated(org.openqa.selenium.By.tagName("body"))); - // HTML 소스 반환 - return driver.getPageSource(); + // header의 CSS파일 가져오기 + String cssFiles = ""; + List links = driver.findElements(By.xpath("//link[@rel='stylesheet']")); + for (WebElement link : links) { + String cssLink = link.getAttribute("href"); + cssFiles += ""; + } + htmlContent.add(cssFiles); + + // html의 body만 가져오기 + String bodyContent = driver.findElement(By.tagName("body")).getAttribute("outerHTML"); + htmlContent.add(bodyContent); + + return htmlContent; + +// // HTML 소스 가져와서 변수에 저장 +// String htmlContent = driver.getPageSource(); +// +// // HTML을 MIME 타입으로 반환 +// HttpHeaders headers = new HttpHeaders(); +// headers.add(HttpHeaders.CONTENT_TYPE, "text/html; charset=UTF-8"); +// +// // ResponseEntity와 getBody로 body return +// return new ResponseEntity<>(htmlContent, headers, HttpStatus.OK).getBody(); } catch (MalformedURLException e) { throw new IllegalArgumentException("유효하지 않은 URL 형식입니다: " + url); } catch (Exception e) { diff --git a/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java b/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java index ae247c3..7663751 100644 --- a/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java +++ b/src/main/java/Capstone/AutoScheduler/global/web/controller/CrawlerController.java @@ -1,6 +1,13 @@ package Capstone.AutoScheduler.global.web.controller; +import Capstone.AutoScheduler.global.apiPayload.ApiResponse; +import Capstone.AutoScheduler.global.apiPayload.code.status.ErrorStatus; +import Capstone.AutoScheduler.global.apiPayload.code.status.SuccessStatus; +import Capstone.AutoScheduler.global.converter.CrawlingConverter; +import Capstone.AutoScheduler.global.converter.EventConverter; import Capstone.AutoScheduler.global.service.SeleniumService.WebCrawlerService; +import Capstone.AutoScheduler.global.web.dto.CrawlingResponseDTO; +import Capstone.AutoScheduler.global.web.dto.Event.EventResponseDTO; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import org.springframework.beans.factory.annotation.Autowired; @@ -11,6 +18,8 @@ import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import java.util.List; + @RestController public class CrawlerController { @@ -22,23 +31,19 @@ public class CrawlerController { description = "URL을 입력하면 해당 페이지의 HTML 소스를 반환합니다." ) @GetMapping("/crawl") - public ResponseEntity crawl( + public ApiResponse crawl( @Parameter(description = "크롤링할 URL", required = true) @RequestParam String url ) { try { // HTML 크롤링 결과 가져오기 - String htmlContent = webCrawlerService.getHtmlContent(url); - - // HTML을 MIME 타입으로 반환 - HttpHeaders headers = new HttpHeaders(); - headers.add(HttpHeaders.CONTENT_TYPE, "text/html; charset=UTF-8"); + List htmlContent = webCrawlerService.getHtmlContent(url); - return new ResponseEntity<>(htmlContent, headers, HttpStatus.OK); + return ApiResponse.onSuccess(SuccessStatus.CRAWLING_OK, CrawlingConverter.toGetCrawlingResultDTO(htmlContent)); } catch (IllegalArgumentException e) { - return new ResponseEntity<>("유효하지 않은 URL 형식입니다: " + e.getMessage(), HttpStatus.BAD_REQUEST); + return ApiResponse.onFailure(ErrorStatus.CRAWLING_NOT_EXIST.getCode(), ErrorStatus.CRAWLING_NOT_EXIST.getMessage(), null); } catch (Exception e) { - return new ResponseEntity<>("크롤링 중 오류가 발생했습니다: " + e.getMessage(), HttpStatus.INTERNAL_SERVER_ERROR); + return ApiResponse.onFailure(ErrorStatus.CRAWLING_ERROR.getCode(), ErrorStatus.CRAWLING_NOT_EXIST.getMessage(), null); } } } diff --git a/src/main/java/Capstone/AutoScheduler/global/web/dto/CrawlingResponseDTO.java b/src/main/java/Capstone/AutoScheduler/global/web/dto/CrawlingResponseDTO.java new file mode 100644 index 0000000..023b93b --- /dev/null +++ b/src/main/java/Capstone/AutoScheduler/global/web/dto/CrawlingResponseDTO.java @@ -0,0 +1,20 @@ +package Capstone.AutoScheduler.global.web.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +import java.time.LocalDateTime; + +public class CrawlingResponseDTO { + // html의 body와 cssFile링크 + @Builder + @Getter + @NoArgsConstructor + @AllArgsConstructor + public static class GetCrawlingResultDTO { + String cssFile; + String htmlBody; + } +}