Skip to content

Commit

Permalink
[IDLE-000] 크롤링 시작 시간 17시 20분으로 변경
Browse files Browse the repository at this point in the history
  • Loading branch information
wonjunYou committed Oct 31, 2024
1 parent 9565572 commit 830ca30
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class CrawlingJobScheduler(
private val crawlingJobConfig: CrawlingJobConfig,
) {

@Scheduled(cron = "0 30 16 * * *")
@Scheduled(cron = "0 20 17 * * *")
fun scheduleJob() {
val jobParameters: JobParameters = JobParametersBuilder()
.addLong("timestamp", System.currentTimeMillis())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,31 +92,38 @@ object WorknetCrawler {
logger.warn { "pageCount= " + pageCount }

for (i in 1..pageCount) {
if (i >= 2) {
val updatedCrawlingUrl = crawlingUrl
.replace("{yesterday}", yesterday)
.replace(Regex("pageIndex=\\d+"), "pageIndex=${i}")
driver.get(updatedCrawlingUrl)
}

wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("#list1")))
try {
if (i >= 2) {
val updatedCrawlingUrl = crawlingUrl
.replace("{yesterday}", yesterday)
.replace(Regex("pageIndex=\\d+"), "pageIndex=${i}")
driver.get(updatedCrawlingUrl)
}

crawlPosts(1, JOB_POSTING_COUNT_PER_PAGE, postings)
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("#list1")))
crawlPosts(1, JOB_POSTING_COUNT_PER_PAGE, postings)
} catch (e: Exception) {
println("크롤링 오류 발생: 페이지 $i - ${e.message}")
}
}

val lastPageJobPostingCount = jobPostingCount % JOB_POSTING_COUNT_PER_PAGE

if (lastPageJobPostingCount > 0) {
val updateCrawlingUrl = crawlingUrl
.replace("{yesterday}", yesterday)
.replace(Regex("pageIndex=\\d+"), "pageIndex=${pageCount + 1}")
driver.get(updateCrawlingUrl)

wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("#list1")))
try {
val updateCrawlingUrl = crawlingUrl
.replace("{yesterday}", yesterday)
.replace(Regex("pageIndex=\\d+"), "pageIndex=${pageCount + 1}")
driver.get(updateCrawlingUrl)

crawlPosts(1, lastPageJobPostingCount, postings)
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("#list1")))
crawlPosts(1, lastPageJobPostingCount, postings)
} catch (e: Exception) {
println("크롤링 오류 발생: 마지막 페이지 ${pageCount + 1} - ${e.message}")
}
}


driver.quit()
return postings
}
Expand Down

0 comments on commit 830ca30

Please sign in to comment.