Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package Capstone.AutoScheduler.global.domain.enums;

public enum CrawlingTpye {
BODY, HTML
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
@Service
public class WebCrawlerService {

public List<String> getHtmlContent(String url) {
public List<String> getHtmlContent(int type, String url) {
List<String> htmlContent = new ArrayList<>();
WebDriver driver = null;

Expand All @@ -46,18 +46,25 @@ public List<String> getHtmlContent(String url) {
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
wait.until(ExpectedConditions.presenceOfElementLocated(org.openqa.selenium.By.tagName("body")));

// header의 CSS파일 가져오기
String cssFiles = "";
List<WebElement> links = driver.findElements(By.xpath("//link[@rel='stylesheet']"));
for (WebElement link : links) {
String cssLink = link.getAttribute("href");
cssFiles += "<link rel=\"stylesheet\" href=\"" + cssLink + "\">";
if(type == 0) {
// header의 CSS파일 가져오기
String cssFiles = "";
List<WebElement> links = driver.findElements(By.xpath("//link[@rel='stylesheet']"));
for (WebElement link : links) {
String cssLink = link.getAttribute("href");
cssFiles += "<link rel=\"stylesheet\" href=\"" + cssLink + "\">";
}
htmlContent.add(cssFiles);

// html의 body만 가져오기
String bodyContent = driver.findElement(By.tagName("body")).getAttribute("outerHTML");
htmlContent.add(bodyContent);
}
else {
String pageSource = driver.getPageSource();
htmlContent.add(null);
htmlContent.add(pageSource);
}
htmlContent.add(cssFiles);

// html의 body만 가져오기
String bodyContent = driver.findElement(By.tagName("body")).getAttribute("outerHTML");
htmlContent.add(bodyContent);

return htmlContent;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
Expand All @@ -26,18 +27,20 @@ public class CrawlerController {
@Autowired
private WebCrawlerService webCrawlerService;

@CrossOrigin(origins = "http://localhost:3000") // 프론트엔드 서버만 허용
@Operation(
summary = "웹 크롤링 API",
description = "URL을 입력하면 해당 페이지의 HTML 소스를 반환합니다."
)
@GetMapping("/crawl")
public ApiResponse<CrawlingResponseDTO.GetCrawlingResultDTO> crawl(
@Parameter(description = "크롤링할 URL", required = true)
@RequestParam int type,
@RequestParam String url
) {
try {
// HTML 크롤링 결과 가져오기
List<String> htmlContent = webCrawlerService.getHtmlContent(url);
List<String> htmlContent = webCrawlerService.getHtmlContent(type, url);

return ApiResponse.onSuccess(SuccessStatus.CRAWLING_OK, CrawlingConverter.toGetCrawlingResultDTO(htmlContent));
} catch (IllegalArgumentException e) {
Expand Down
Loading