diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/common/scheduler/CrawlingJobScheduler.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/common/launcher/CrawlingJobLauncher.kt similarity index 54% rename from idle-batch/src/main/kotlin/com/swm/idle/batch/common/scheduler/CrawlingJobScheduler.kt rename to idle-batch/src/main/kotlin/com/swm/idle/batch/common/launcher/CrawlingJobLauncher.kt index 7192a043..174485e8 100644 --- a/idle-batch/src/main/kotlin/com/swm/idle/batch/common/scheduler/CrawlingJobScheduler.kt +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/common/launcher/CrawlingJobLauncher.kt @@ -1,16 +1,18 @@ -package com.swm.idle.batch.common.scheduler +package com.swm.idle.batch.common.launcher -import com.swm.idle.batch.job.CrawlingJobConfig +import com.swm.idle.batch.job.JobConfig import org.springframework.batch.core.JobParameters import org.springframework.batch.core.JobParametersBuilder +import org.springframework.batch.core.configuration.JobRegistry import org.springframework.batch.core.launch.JobLauncher import org.springframework.scheduling.annotation.Scheduled import org.springframework.stereotype.Component @Component -class CrawlingJobScheduler( +class CrawlingJobLauncher( private val jobLauncher: JobLauncher, - private val crawlingJobConfig: CrawlingJobConfig, + private val jobRegistry: JobRegistry, + private val crawlingJobConfig: JobConfig, ) { @Scheduled(cron = "0 0 23 * * *") @@ -22,4 +24,11 @@ class CrawlingJobScheduler( jobLauncher.run(crawlingJobConfig.crawlingJob(), jobParameters) } + fun jobStart() { + val jobParameters: JobParameters = JobParametersBuilder() + .addLong("timestamp", System.currentTimeMillis()) + .toJobParameters() + + jobLauncher.run(jobRegistry.getJob("crawlingJob"), jobParameters) + } } diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/CrawlerConsts.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/CrawlerConsts.kt new file mode 100644 index 00000000..df32dc06 --- /dev/null +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/CrawlerConsts.kt @@ -0,0 +1,62 @@ +package com.swm.idle.batch.crawler + +enum class CrawlerConsts(val location: String, val value: String) { + CRAWLING_TARGET_URL_FORMAT("CRAWLING_TARGET_URL_FORMAT","https://www.work24.go.kr/wk/a/b/1200/retriveDtlEmpSrchList.do?basicSetupYn=&careerTo=&keywordJobCd=&occupation=&seqNo=&cloDateEndtParam=&payGbn=&templateInfo=&rot2WorkYn=&shsyWorkSecd=&srcKeywordParam=%EC%9A%94%EC%96%91%EB%B3%B4%ED%98%B8%EC%82%AC&resultCnt=50&keywordJobCont=&cert=&moreButtonYn=Y&minPay=&codeDepth2Info=11000¤tPageNo=1&eventNo=&mode=&major=&resrDutyExcYn=&eodwYn=&sortField=DATE&staArea=&sortOrderBy=DESC&keyword=%EC%9A%94%EC%96%91%EB%B3%B4%ED%98%B8%EC%82%AC&termSearchGbn=all&carrEssYns=&benefitSrchAndOr=O&disableEmpHopeGbn=&actServExcYn=&keywordStaAreaNm=&maxPay=&emailApplyYn=&codeDepth1Info=11000&keywordEtcYn=®DateStdtParam={yesterday}&publDutyExcYn=&keywordJobCdSeqNo=&viewType=&exJobsCd=&templateDepthNmInfo=®ion=&employGbn=&empTpGbcd=&computerPreferential=&infaYn=&cloDateStdtParam=&siteClcd=WORK&searchMode=Y&birthFromYY=&indArea=&careerTypes=&subEmpHopeYn=&tlmgYn=&academicGbn=&templateDepthNoInfo=&foriegn=&entryRoute=&mealOfferClcd=&basicSetupYnChk=&station=&holidayGbn=&srcKeyword=%EC%9A%94%EC%96%91%EB%B3%B4%ED%98%B8%EC%82%AC&academicGbnoEdu=noEdu&enterPriseGbn=all&cloTermSearchGbn=all&birthToYY=&keywordWantedTitle=&stationNm=&benefitGbn=¬SrcKeywordParam=&keywordFlag=¬SrcKeyword=&essCertChk=&depth2SelCode=&keywordBusiNm=&preferentialGbn=&rot3WorkYn=®DateEndtParam={yesterday}&pfMatterPreferential=&pageIndex={pageIndex}&termContractMmcnt=&careerFrom=&laborHrShortYn=#scrollLoc"), + JOB_POSTING_COUNT_PER_PAGE("JOB_POSTING_COUNT_PER_PAGE","50"), + JOB_POSTING_COUNT("JOB_POSTING_COUNT","//*[@id=\"mForm\"]/div[2]/div/div[1]/div[1]/span/span"), + + //공고 정보 + TITLE("TITLE", "//*[@id=\"contents\"]/div/div/div/div[1]/div[3]/div[1]/div[1]/strong"), + CONTENT("CONTENT", "//*[@id=\"tab-panel01\"]/div[1]/div"), + + //근무 정보 + PAY_INFO("PAY_INFO", "//*[@id=\"tab-panel02\"]/div/table/tbody/tr[1]/td[2]"), + WORK_TIME("WORK_TIME","//*[@id=\"tab-panel02\"]/div/table/tbody/tr[2]/td"), + WORK_SCHEDULE("WORK_SCHEDULE","//*[@id=\"tab-panel02\"]/div/table/tbody/tr[3]/td[2]"), + + //모집 정보 + RECRUITMENT_PROCESS("RECRUITMENT_PROCESS","//*[@id=\"tab-panel05\"]/div[2]/div/div[2]/p[1]"), + REQUIRED_DOCUMENT("REQUIRED_DOCUMENT","//*[@id=\"tab-panel05\"]/div[2]/div/div[2]/p[2]"), + APPLY_METHOD("APPLY_METHOD","//*[@id=\"tab-panel05\"]/div[2]/div/div[2]/p[1]"), + APPLY_DEADLINE("APPLY_DEADLINE","//*[@id=\"tab-panel05\"]/div[2]/div/div[1]/div[1]/p"), + CREATED_AT("CREATED_AT","//*[@id=\"contents\"]/div/div/div/div[1]/div[5]/div[11]/div[2]/table/tbody/tr[1]/td[1]"), + + //센터 정보 + CENTER_NAME("CENTER_NAME","//*[@id=\"contents\"]/div/div/div/div[1]/div[3]/div[1]/div[1]/p/strong"), + CENTER_ADDRESS1("CENTER_ADDRESS1","//*[@id=\"tab-panel02\"]/div/table/tbody/tr[5]/td/div[1]/p"), + CENTER_ADDRESS2("CENTER_ADDRESs2","//*[@id=\"tab-panel02\"]/div/table/tbody/tr[5]/td/div[1]/p"), + CENTER_ADDRESS3("CENTER_ADDRESS3","//*[@id=\"tab-panel02\"]/div/table/tbody/tr[5]/td/div[1]/p"), + + //노인 주소 + CLIENT_ADDRESS1("CLIENT_ADDRESS1","//*[@id=\"tab-panel02\"]/div/table/tbody/tr[5]/td/div[1]/p"), + CLIENT_ADDRESS2("CLIENT_ADDRESS2","//*[@id=\"tab-panel02\"]/div/table/tbody/tr[5]/td/div[1]/p"), + + //ChromDriver-Options + HEADLESS("HEADLESS","--headless"), + NO_SANDBOX("NO_SANDBOX","--no-sandbox"), + DISABLE_DEV_SHM_USAGE("DISABLE_DEV_SHM_USAGE","--disable-dev-shm-usage"), + DISABLE_GPU("DISABLE_GPU","--disable-gpu"), + WINDOW_SIZE("WINDOW_SIZE","window-size=1920x1080"), + DISABLE_SOFTWARE_RASTERIZER("DISABLE_SOFTWARE_RASTERIZER","--disable-software-rasterizer"), + IGNORE_SSL_ERRORS("IGNORE_SSL_ERRORS","--ignore-ssl-errors=yes"), + IGNORE_CERTIFICATE_ERRORS("IGNORE_CERTIFICATE_ERRORS","--ignore-certificate-errors"); + + companion object { + fun getChromOptions(): Array { + return arrayOf( + HEADLESS.value, + NO_SANDBOX.value, + DISABLE_DEV_SHM_USAGE.value, + DISABLE_GPU.value, + WINDOW_SIZE.value, + DISABLE_SOFTWARE_RASTERIZER.value, + IGNORE_SSL_ERRORS.value, + IGNORE_CERTIFICATE_ERRORS.value + ) + } + } + + fun getIntValue(): Int { + return value.toInt() + } +} \ No newline at end of file diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/DriverInitializer.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/DriverInitializer.kt new file mode 100644 index 00000000..21e5db0f --- /dev/null +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/DriverInitializer.kt @@ -0,0 +1,29 @@ +package com.swm.idle.batch.crawler + +import io.github.oshai.kotlinlogging.KotlinLogging +import org.openqa.selenium.chrome.ChromeDriver +import org.openqa.selenium.chrome.ChromeDriverService +import org.openqa.selenium.chrome.ChromeOptions +import java.io.File + +object DriverInitializer { + private val logger = KotlinLogging.logger { } + + fun init(): ChromeDriver { + return runCatching { + ChromeDriver( + ChromeDriverService.Builder() + .usingDriverExecutable(File(System.getenv("CHROMEDRIVER_BIN"))) + .build() + .also { logger.info { System.getenv("CHROMEDRIVER_BIN") } }, + ChromeOptions().apply { + addArguments(*CrawlerConsts.getChromOptions()) + setBinary(System.getenv("CHROME_BIN")) + }.also { logger.info { System.getenv("CHROME_BIN")} } + ) + }.getOrElse { + logger.error { "ChromeDriver initialization failed: ${it.message}" } + throw RuntimeException("ChromeDriver initialization failed, application will exit.") // 이후 코드가 실행되지 않도록 예외 던짐 + } + } +} \ No newline at end of file diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/WorknetPageCrawler.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/WorknetPageCrawler.kt new file mode 100644 index 00000000..0ed09067 --- /dev/null +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/WorknetPageCrawler.kt @@ -0,0 +1,44 @@ +package com.swm.idle.batch.crawler + +import com.swm.idle.batch.step.PostingReader +import org.openqa.selenium.By +import org.openqa.selenium.WebDriver +import org.openqa.selenium.support.ui.ExpectedConditions +import org.openqa.selenium.support.ui.WebDriverWait +import java.time.Duration +import java.time.LocalDate +import java.time.format.DateTimeFormatter + +class WorknetPageCrawler { + private var driver: WebDriver = DriverInitializer.init() + + fun initCounts(reader: PostingReader) { + reader.crawlingUrl = CrawlerConsts.CRAWLING_TARGET_URL_FORMAT.value + .replace("{yesterday}", LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"))) + .replace("{pageIndex}", "1") + + moveToPage(reader) + + reader.postingCount = driver + .findElement(By.xpath(CrawlerConsts.JOB_POSTING_COUNT.value)) + .text.toInt() + .takeIf { it > 0 } + ?: run { + driver.quit() + throw Exception("크롤링 할 공고가 없습니다.") + } + + reader.pageCount = (reader.postingCount + CrawlerConsts.JOB_POSTING_COUNT_PER_PAGE.getIntValue() - 1) / + CrawlerConsts.JOB_POSTING_COUNT_PER_PAGE.getIntValue() + reader.lastPageJobPostingCount = reader.postingCount % CrawlerConsts.JOB_POSTING_COUNT_PER_PAGE.getIntValue() + driver.quit() + } + + private fun moveToPage(reader: PostingReader) { + driver.get(reader.crawlingUrl) + WebDriverWait(driver, Duration.ofSeconds(10)) + .also { + it.until(ExpectedConditions.visibilityOfElementLocated(By.xpath(CrawlerConsts.JOB_POSTING_COUNT.value))) + } + } +} \ No newline at end of file diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/WorknetPostCrawler.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/WorknetPostCrawler.kt new file mode 100644 index 00000000..9e3f37c5 --- /dev/null +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/crawler/WorknetPostCrawler.kt @@ -0,0 +1,142 @@ +package com.swm.idle.batch.crawler + +import com.swm.idle.batch.common.dto.CrawledJobPostingDto +import io.github.oshai.kotlinlogging.KotlinLogging +import org.openqa.selenium.By +import org.openqa.selenium.WebDriver +import org.openqa.selenium.support.ui.ExpectedConditions +import org.openqa.selenium.support.ui.WebDriverWait +import java.time.Duration +import java.time.LocalDate +import java.time.format.DateTimeFormatter +import org.openqa.selenium.WebElement + +class WorknetPostCrawler { + private val logger = KotlinLogging.logger { } + private var driver: WebDriver = DriverInitializer.init() + private var errorCountMap: MutableMap = mutableMapOf() + + fun crawlPosts(end: Int, url: String): List { + moveToPage(url) + + val crawledPostings = mutableListOf() + repeat(end) { i -> + val originalWindow = driver.windowHandle + val titleElement = findElementSafe(By.xpath("//*[@id=\"list${i+1}\"]/td[1]/div/div[2]/a")) ?: return@repeat + + moveToPostDetailWindow(titleElement, originalWindow) + + try { + val post: CrawledJobPostingDto = createPost() + crawledPostings.add(post) + } catch (e: Exception) { + logger.warn { "실패" } + } + + backWindow(originalWindow) + } + errorCountMap.asSequence().forEach { (key, value) -> println("$key -> $value") } + driver.quit() + return crawledPostings + } + + private fun moveToPage(url: String) { + driver.get(url) + WebDriverWait(driver, Duration.ofSeconds(10)) + .until( + ExpectedConditions.visibilityOfElementLocated(By.cssSelector("#list1")) + ) + } + + private fun createPost(): CrawledJobPostingDto { + return CrawledJobPostingDto( + title = extractText(CrawlerConsts.TITLE), + content = extractText(CrawlerConsts.CONTENT), + createdAt = extractText(CrawlerConsts.CREATED_AT), + payInfo = extractText(CrawlerConsts.PAY_INFO), + workSchedule = extractText(CrawlerConsts.WORK_SCHEDULE), + recruitmentProcess = extractText(CrawlerConsts.RECRUITMENT_PROCESS), + applyMethod = extractText(CrawlerConsts.APPLY_METHOD), + requiredDocument = extractText(CrawlerConsts.REQUIRED_DOCUMENT), + centerName = extractText(CrawlerConsts.CENTER_NAME), + applyDeadline = extractApplyDeadline(CrawlerConsts.APPLY_DEADLINE), + workTime = extractWorkTime(CrawlerConsts.WORK_TIME), + centerAddress = extractAddress( + CrawlerConsts.CLIENT_ADDRESS1, + CrawlerConsts.CLIENT_ADDRESS2 + ), + clientAddress = extractAddress( + CrawlerConsts.CENTER_ADDRESS1, + CrawlerConsts.CENTER_ADDRESS2, + CrawlerConsts.CENTER_ADDRESS3 + ), + directUrl = driver.currentUrl + ) + } + + + private inline fun errorRecord(location: String, action: () -> T): T { + return runCatching { action() } + .getOrElse { e -> + logError(location) + throw e + } + } + + private fun findElementSafe(by: By): WebElement? { + return runCatching { driver.findElement(by) }.getOrNull() + } + + private fun moveToPostDetailWindow(titleElement: WebElement, originalWindow: String) { + titleElement.click() + WebDriverWait(driver, Duration.ofSeconds(10)) + .until(ExpectedConditions.numberOfWindowsToBe(2)) + driver.switchTo().window(driver.windowHandles.first { it != originalWindow }) + } + + private fun extractText(con: CrawlerConsts): String { + return errorRecord(con.location) { driver.findElement(By.xpath(con.value)).text } + } + + private fun extractApplyDeadline(con: CrawlerConsts): String { + return errorRecord(con.location) { + driver.findElement(By.xpath(con.value)).text.let { + if (it.contains("채용시까지")) + LocalDate.now().plusDays(15).format(DateTimeFormatter.ofPattern("yyyyMMdd")) + else + it + } + } + } + + private fun extractAddress(vararg cons: CrawlerConsts): String { + for (con in cons) { + runCatching { + val address = driver.findElement(By.xpath(con.value)).text + return address.replace("지도보기", "").trim().replace(Regex("\\(\\d{5}\\)"), "").trim() + } .getOrElse { e -> + logError(con.location) + throw e + } + } + throw NoSuchElementException("Center address not found using any of the provided XPaths") + } + + private fun extractWorkTime(con: CrawlerConsts): String { + return errorRecord(con.location) { + driver.findElement(By.xpath(con.value)).text + .replace("도움말", "") + .replace("(근무시간)", "") + .replace("\n", "") + } + } + + private fun logError(location: String) { + errorCountMap[location] = errorCountMap.getOrDefault(location, 0) + 1 + } + + private fun backWindow(originalWindow: String?) { + driver.close() + driver.switchTo().window(originalWindow) + } +} \ No newline at end of file diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/job/CrawlingJobConfig.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/job/CrawlingJobConfig.kt deleted file mode 100644 index 7b6e8258..00000000 --- a/idle-batch/src/main/kotlin/com/swm/idle/batch/job/CrawlingJobConfig.kt +++ /dev/null @@ -1,34 +0,0 @@ -package com.swm.idle.batch.job - -import org.springframework.batch.core.Job -import org.springframework.batch.core.Step -import org.springframework.batch.core.job.builder.JobBuilder -import org.springframework.batch.core.repository.JobRepository -import org.springframework.batch.core.step.builder.StepBuilder -import org.springframework.context.annotation.Bean -import org.springframework.context.annotation.Configuration -import org.springframework.transaction.PlatformTransactionManager - -@Configuration -class CrawlingJobConfig( - private val jobRepository: JobRepository, - private val transactionManager: PlatformTransactionManager, - private val crawlingJobPostingTasklet: CrawlingJobPostingTasklet, -) { - - @Bean - fun crawlingJob(): Job { - return JobBuilder("crawlingJob", jobRepository) - .start(crawlingJobPostStep()) - .build() - } - - @Bean - fun crawlingJobPostStep(): Step { - return StepBuilder("crawlingJobPostStep", jobRepository) - .tasklet(crawlingJobPostingTasklet, transactionManager) - .allowStartIfComplete(true) - .build() - } - -} diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/job/CrawlingJobPostingTasklet.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/job/CrawlingJobPostingTasklet.kt deleted file mode 100644 index 3cc54dbb..00000000 --- a/idle-batch/src/main/kotlin/com/swm/idle/batch/job/CrawlingJobPostingTasklet.kt +++ /dev/null @@ -1,87 +0,0 @@ -package com.swm.idle.batch.job - -import com.swm.idle.application.common.converter.PointConverter -import com.swm.idle.application.jobposting.domain.CrawlingJobPostingService -import com.swm.idle.batch.common.dto.CrawledJobPostingDto -import com.swm.idle.batch.util.WorknetCrawler -import com.swm.idle.infrastructure.client.geocode.service.GeoCodeService -import io.github.oshai.kotlinlogging.KotlinLogging -import org.springframework.batch.core.StepContribution -import org.springframework.batch.core.scope.context.ChunkContext -import org.springframework.batch.core.step.tasklet.Tasklet -import org.springframework.batch.repeat.RepeatStatus -import org.springframework.stereotype.Component - -@Component -class CrawlingJobPostingTasklet( - private val crawlingJobPostingService: CrawlingJobPostingService, - private val geoCodeService: GeoCodeService, -) : Tasklet { - - private val logger = KotlinLogging.logger { } - - override fun execute(contribution: StepContribution, chunkContext: ChunkContext): RepeatStatus { - val crawlingJobPostings: List? = try { - WorknetCrawler.run() - } catch (e: Exception) { - logger.warn { - e.toString() - } - e.printStackTrace() // 오류 로그 출력 - null // 오류 발생 - } - - if (crawlingJobPostings != null) { - crawlingJobPostings.mapNotNull { crawledJobPosting -> - val clientRoadNameAddress = extractRoadNameAddress(crawledJobPosting.clientAddress) - val clientLocationInfo = geoCodeService.search(clientRoadNameAddress) - - if (clientLocationInfo.addresses.isEmpty()) { - return@mapNotNull null - } - - val clientLocation = PointConverter.convertToPoint( - latitude = clientLocationInfo.addresses[0].y.toDouble(), - longitude = clientLocationInfo.addresses[0].x.toDouble(), - ) - - CrawledJobPostingDto( - title = crawledJobPosting.title, - content = crawledJobPosting.content, - clientAddress = crawledJobPosting.clientAddress, - createdAt = crawledJobPosting.createdAt, - payInfo = crawledJobPosting.payInfo, - workTime = crawledJobPosting.workTime, - workSchedule = crawledJobPosting.workSchedule, - applyDeadline = crawledJobPosting.applyDeadline, - recruitmentProcess = crawledJobPosting.recruitmentProcess, - applyMethod = crawledJobPosting.applyMethod, - requiredDocument = crawledJobPosting.requiredDocument, - centerName = crawledJobPosting.centerName, - centerAddress = crawledJobPosting.centerAddress, - directUrl = crawledJobPosting.directUrl, - ).toDomain(clientLocation) - }.let { - println("크롤링된 data 크기 : ${it.size}") - crawlingJobPostingService.saveAll(it) - } - - return RepeatStatus.FINISHED - } - - return RepeatStatus.FINISHED - } - - private fun extractRoadNameAddress(clientAddress: String): String { - return clientAddress.replace(Regex(REGEX_FORMAT), "") - .substringBefore(SPLIT_DELIMITER) - .trim() - } - - companion object { - - const val REGEX_FORMAT = "\\(.*?\\)" - const val SPLIT_DELIMITER = "," - } - -} diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/job/JobConfig.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/job/JobConfig.kt new file mode 100644 index 00000000..c5feff94 --- /dev/null +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/job/JobConfig.kt @@ -0,0 +1,84 @@ +package com.swm.idle.batch.job + +import com.swm.idle.batch.common.dto.CrawledJobPostingDto +import com.swm.idle.batch.step.PostingProcessor +import com.swm.idle.batch.step.PostingReader +import com.swm.idle.batch.step.PostingReader.Companion.nextPage +import com.swm.idle.domain.jobposting.entity.jpa.CrawledJobPosting +import jakarta.persistence.EntityManagerFactory +import org.springframework.batch.core.Step +import org.springframework.batch.core.Job +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing +import org.springframework.batch.core.configuration.annotation.StepScope +import org.springframework.batch.core.job.builder.JobBuilder +import org.springframework.batch.core.repository.JobRepository +import org.springframework.batch.core.step.builder.StepBuilder +import org.springframework.batch.item.Chunk +import org.springframework.batch.item.ItemProcessor +import org.springframework.batch.item.ItemReader +import org.springframework.batch.item.ItemWriter +import org.springframework.batch.item.database.JpaItemWriter +import org.springframework.core.task.SimpleAsyncTaskExecutor +import org.springframework.transaction.PlatformTransactionManager +import java.util.concurrent.atomic.AtomicInteger + +@Configuration +@EnableBatchProcessing +class JobConfig( + private val jobRepository: JobRepository, + private val transactionManager: PlatformTransactionManager, + private val entityManagerFactory: EntityManagerFactory +) { + + @Bean + fun crawlingJob(): Job { + nextPage = AtomicInteger(1) + return JobBuilder("crawlingJob", jobRepository) + .start(crawlStep()) + .preventRestart() + .build() + } + + @Bean + fun crawlStep(): Step { + return StepBuilder("crawlStep", jobRepository) + .chunk, + List>(1, transactionManager) + .reader(postingReader()) + .processor(postingProcessor()) + .writer(postingWriter()) + .taskExecutor(taskExecutor()) + .allowStartIfComplete(true) + .build() + } + + @Bean + fun taskExecutor(): SimpleAsyncTaskExecutor { + return SimpleAsyncTaskExecutor().apply { + this.setConcurrencyLimit(4) + } + } + + @Bean + @StepScope + fun postingReader(): ItemReader> = PostingReader() + + @Bean + fun postingProcessor(): ItemProcessor, out List> = PostingProcessor() + + @Bean + fun postingWriter(): ItemWriter> { + val jpaWriter = JpaItemWriter() + jpaWriter.setEntityManagerFactory(entityManagerFactory) + + return ItemWriter { items -> + val chunk = Chunk() + items.forEach { itemList -> + chunk.addAll(itemList) + } + jpaWriter.write(chunk) + } + } +} \ No newline at end of file diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/step/PostingProcessor.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/step/PostingProcessor.kt new file mode 100644 index 00000000..16725903 --- /dev/null +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/step/PostingProcessor.kt @@ -0,0 +1,36 @@ +package com.swm.idle.batch.step + +import com.swm.idle.batch.common.dto.CrawledJobPostingDto +import com.swm.idle.application.common.converter.PointConverter +import com.swm.idle.domain.jobposting.entity.jpa.CrawledJobPosting +import com.swm.idle.infrastructure.client.geocode.service.GeoCodeService +import org.springframework.batch.item.ItemProcessor + +class PostingProcessor : ItemProcessor, List> { + + companion object { + const val REGEX_FORMAT = "\\(.*?\\)" + const val SPLIT_DELIMITER = "," + } + + override fun process(item: List): List { + return item.asSequence().mapNotNull { + val roadNameAddress = extractRoadNameAddress(it.clientAddress) + val geoInfo = GeoCodeService.search(roadNameAddress) + if (geoInfo.addresses.isEmpty()) { + return@mapNotNull null + } + val clientLocation = PointConverter.convertToPoint( + latitude = geoInfo.addresses[0].y.toDouble(), + longitude = geoInfo.addresses[0].x.toDouble(), + ) + it.toDomain(clientLocation) + }.toList() + } + + private fun extractRoadNameAddress(clientAddress: String): String { + return clientAddress.replace(Regex(REGEX_FORMAT), "") + .substringBefore(SPLIT_DELIMITER) + .trim() + } +} \ No newline at end of file diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/step/PostingReader.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/step/PostingReader.kt new file mode 100644 index 00000000..c5468579 --- /dev/null +++ b/idle-batch/src/main/kotlin/com/swm/idle/batch/step/PostingReader.kt @@ -0,0 +1,46 @@ +package com.swm.idle.batch.step + +import com.swm.idle.batch.common.dto.CrawledJobPostingDto +import com.swm.idle.batch.crawler.CrawlerConsts +import com.swm.idle.batch.crawler.WorknetPageCrawler +import com.swm.idle.batch.crawler.WorknetPostCrawler +import org.springframework.batch.item.ItemStreamReader +import java.util.concurrent.atomic.AtomicInteger + +class PostingReader : ItemStreamReader> { + var crawlingUrl: String = "" + var postingCount: Int = 1 + var lastPageJobPostingCount: Int = 1 + var pageCount: Int = 1 + + init { + WorknetPageCrawler().initCounts(this) + } + + companion object { + var nextPage = AtomicInteger(1) + } + + override fun read(): List? { + val currentPage = nextPage.getAndIncrement() + if (currentPage > pageCount) return null + + val amount = getAmount(currentPage) + val nowUrl = getPageUrl(currentPage) + + return WorknetPostCrawler().crawlPosts(amount, nowUrl) + } + + private fun getAmount(currentPage: Int): Int { + if (currentPage == pageCount && lastPageJobPostingCount > 0) { + return lastPageJobPostingCount + } + return CrawlerConsts.JOB_POSTING_COUNT_PER_PAGE.getIntValue() + } + + private fun getPageUrl(currentPage: Int): String { + return crawlingUrl.replace( + Regex("pageIndex=\\d+"), + "pageIndex=$currentPage") + } +} \ No newline at end of file diff --git a/idle-batch/src/main/kotlin/com/swm/idle/batch/util/WorknetCrawler.kt b/idle-batch/src/main/kotlin/com/swm/idle/batch/util/WorknetCrawler.kt deleted file mode 100644 index 3c8bf139..00000000 --- a/idle-batch/src/main/kotlin/com/swm/idle/batch/util/WorknetCrawler.kt +++ /dev/null @@ -1,364 +0,0 @@ -package com.swm.idle.batch.util - -import com.swm.idle.batch.common.dto.CrawledJobPostingDto -import io.github.oshai.kotlinlogging.KotlinLogging -import org.openqa.selenium.Alert -import org.openqa.selenium.By -import org.openqa.selenium.NoAlertPresentException -import org.openqa.selenium.WebDriver -import org.openqa.selenium.chrome.ChromeDriver -import org.openqa.selenium.chrome.ChromeDriverService -import org.openqa.selenium.chrome.ChromeOptions -import org.openqa.selenium.support.ui.ExpectedConditions -import org.openqa.selenium.support.ui.WebDriverWait -import java.io.File -import java.time.Duration -import java.time.LocalDate -import java.time.format.DateTimeFormatter - -object WorknetCrawler { - - private val logger = KotlinLogging.logger { } - - private const val CRAWLING_TARGET_URL_FORMAT = - "https://www.work24.go.kr/wk/a/b/1200/retriveDtlEmpSrchList.do?basicSetupYn=&careerTo=&keywordJobCd=&occupation=&seqNo=&cloDateEndtParam=&payGbn=&templateInfo=&rot2WorkYn=&shsyWorkSecd=&srcKeywordParam=%EC%9A%94%EC%96%91%EB%B3%B4%ED%98%B8%EC%82%AC&resultCnt=10&keywordJobCont=&cert=&moreButtonYn=Y&minPay=&codeDepth2Info=11000¤tPageNo=1&eventNo=&mode=&major=&resrDutyExcYn=&eodwYn=&sortField=DATE&staArea=&sortOrderBy=DESC&keyword=%EC%9A%94%EC%96%91%EB%B3%B4%ED%98%B8%EC%82%AC&termSearchGbn=all&carrEssYns=&benefitSrchAndOr=O&disableEmpHopeGbn=&actServExcYn=&keywordStaAreaNm=&maxPay=&emailApplyYn=&codeDepth1Info=11000&keywordEtcYn=®DateStdtParam={yesterday}&publDutyExcYn=&keywordJobCdSeqNo=&viewType=&exJobsCd=&templateDepthNmInfo=®ion=&employGbn=&empTpGbcd=&computerPreferential=&infaYn=&cloDateStdtParam=&siteClcd=WORK&searchMode=Y&birthFromYY=&indArea=&careerTypes=&subEmpHopeYn=&tlmgYn=&academicGbn=&templateDepthNoInfo=&foriegn=&entryRoute=&mealOfferClcd=&basicSetupYnChk=&station=&holidayGbn=&srcKeyword=%EC%9A%94%EC%96%91%EB%B3%B4%ED%98%B8%EC%82%AC&academicGbnoEdu=noEdu&enterPriseGbn=all&cloTermSearchGbn=all&birthToYY=&keywordWantedTitle=&stationNm=&benefitGbn=¬SrcKeywordParam=&keywordFlag=¬SrcKeyword=&essCertChk=&depth2SelCode=&keywordBusiNm=&preferentialGbn=&rot3WorkYn=®DateEndtParam={yesterday}&pfMatterPreferential=&pageIndex={pageIndex}&termContractMmcnt=&careerFrom=&laborHrShortYn=#scrollLoc" - - private const val JOB_POSTING_COUNT_PER_PAGE = 50 - - private lateinit var driver: WebDriver - - private val postings = mutableListOf() - - // 에러 카운트를 저장할 맵 - private val errorCountMap = mutableMapOf() - - private fun initializeDriver() { - try { - val service = ChromeDriverService.Builder() - .usingDriverExecutable(File(System.getenv("CHROMEDRIVER_BIN"))) - .build() - - val options = ChromeOptions().apply { - addArguments("--headless") - addArguments("--no-sandbox") - addArguments("--disable-dev-shm-usage") - addArguments("--disable-gpu") - addArguments("window-size=1920x1080") - addArguments("--disable-software-rasterizer") - addArguments("--ignore-ssl-errors=yes") - addArguments("--ignore-certificate-errors") - - setBinary(System.getenv("CHROME_BIN")) - } - - driver = ChromeDriver(service, options) - } catch (e: Exception) { - logError("initializeDriver", e) - } - } - - fun run(): List? { - try { - initializeDriver() - } catch (e: Exception) { - logger.error { e.toString() } - logError("run", e) - } - - logger.info { "=====초기화 완료, 크롤링 작업 시작" } - - val formatter = DateTimeFormatter.ofPattern("yyyyMMdd") - val yesterday = LocalDate.now().format(formatter) - val crawlingUrl = CRAWLING_TARGET_URL_FORMAT - .replace("{yesterday}", yesterday) - .replace("{pageIndex}", "1") - - driver.get(crawlingUrl) - - logger.info { "=====크롤링 url: $crawlingUrl" } - - val wait = WebDriverWait(driver, Duration.ofSeconds(10)) - wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//*[@id=\"mForm\"]/div[2]/div/div[1]/div[1]/span/span"))) - - val jobPostingCountText = - driver.findElement(By.xpath("//*[@id=\"mForm\"]/div[2]/div/div[1]/div[1]/span/span")).text - - logger.info { "=====크롤링 대상 공고 수: $jobPostingCountText" } - - val jobPostingCount = Integer.parseInt(jobPostingCountText.replace(",", "")) - - if (jobPostingCount == 0) { - driver.quit() - logger.info { "=====크롤링 할 공고가 없어 미리 종료합니다." } - return null - } - - val pageCount = jobPostingCount / JOB_POSTING_COUNT_PER_PAGE - - logger.warn { "===== 크롤링 페이지 수 " + pageCount } - - for (i in 1..pageCount) { - if (i >= 2) { - val updatedCrawlingUrl = crawlingUrl - .replace("{yesterday}", yesterday) - .replace(Regex("pageIndex=\\d+"), "pageIndex=${i}") - driver.get(updatedCrawlingUrl) - } - - wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("#list1"))) - - crawlPosts(1, JOB_POSTING_COUNT_PER_PAGE, postings) - } - - val lastPageJobPostingCount = jobPostingCount % JOB_POSTING_COUNT_PER_PAGE - - if (lastPageJobPostingCount > 0) { - val updateCrawlingUrl = crawlingUrl - .replace("{yesterday}", yesterday) - .replace(Regex("pageIndex=\\d+"), "pageIndex=${pageCount + 1}") - driver.get(updateCrawlingUrl) - - wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("#list1"))) - - crawlPosts(1, lastPageJobPostingCount, postings) - } - - driver.quit() - return postings - } - - private fun logError(method: String, e: Exception) { - logger.error(e) { "Error occurred in $method: ${e.message}" } - errorCountMap[method] = errorCountMap.getOrDefault(method, 0) + 1 - } - - private fun crawlPosts( - start: Int, - end: Int, - postings: MutableList, - ) { - for (i in start..end) { - try { - val originalWindow = driver.windowHandle - - val element = driver.findElement(By.xpath("//*[@id=\"list$i\"]/td[2]/a")) - element.click() - - if (handleAlertIfPresent()) { - driver.navigate().back() - continue - } - - val wait = WebDriverWait(driver, Duration.ofSeconds(5)) - wait.until(ExpectedConditions.numberOfWindowsToBe(2)) - - val allWindows = driver.windowHandles - - for (windowHandle in allWindows) { - if (windowHandle != originalWindow) { - driver.switchTo().window(windowHandle) - break - } - } - - val crawledJobPostingDto = CrawledJobPostingDto( - title = getTitle(), - content = getContent(), - clientAddress = getClientAddress(), - createdAt = getCreatedAt(), - payInfo = getPayInfo(), - workTime = getWorkTime(), - workSchedule = getWorkSchedule(), - applyDeadline = getApplyDeadline(), - recruitmentProcess = getRecruitmentProcess(), - applyMethod = getApplyMethod(), - requiredDocument = getRequiredDocument(), - centerName = getCenterName(), - centerAddress = getCenterAddress(), - directUrl = driver.currentUrl, - ) - - postings.add(crawledJobPostingDto) - - driver.close() - driver.switchTo().window(originalWindow) - } catch (e: Exception) { - logError("=== 에러 원인은..", e) - } - } - } - - private fun handleAlertIfPresent(): Boolean { - return try { - val alert: Alert = driver.switchTo().alert() - alert.accept() // 알림창이 있을 경우 수락 - true // 알림창이 있었음을 표시 - } catch (e: NoAlertPresentException) { - false // 알림창이 없었음을 표시 - } - } - - private fun getClientAddress(): String { - val xpaths = listOf( - "//*[@id=\"contents\"]/section/div/div[3]/div[15]/div/table/tbody/tr[4]/td/p[2]", - "//*[@id=\"contents\"]/section/div/div[3]/div[17]/div/table/tbody/tr[4]/td/p[2]", - "//*[@id=\"contents\"]/section/div/div[3]/div[16]/div/table/tbody/tr[4]/td/p[2]" - ) - - for (xpath in xpaths) { - try { - val address = driver.findElement(By.xpath(xpath)).text - return address.replace("지도보기", "").trim() - } catch (e: Exception) { - logError("getClientAddress", e) - } - } - - throw NoSuchElementException("클라이언트 주소 크롤링 에러") - } - - private fun getRequiredDocument(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[7]/table/tbody/tr/td[4]")).text - } catch (e: Exception) { - logError("getRequiredDocument", e) - throw e - } - } - - private fun getApplyMethod(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[7]/table/tbody/tr/td[3]")).text - } catch (e: Exception) { - logError("getApplyMethod", e) - throw e - } - } - - private fun getRecruitmentProcess(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[7]/table/tbody/tr/td[2]")).text - } catch (e: Exception) { - logError("getRecruitmentProcess", e) - throw e - } - } - - private fun getApplyDeadline(): String { - return try { - val applyDeadline = - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[7]/table/tbody/tr/td[1]")).text - - if (applyDeadline.contains("채용시까지")) { - LocalDate.now().plusDays(15).format(DateTimeFormatter.ofPattern("yyyyMMdd")) - } else { - applyDeadline - } - } catch (e: Exception) { - logError("getApplyDeadline", e) - throw e - } - } - - private fun getWorkSchedule(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[6]/table/tbody/tr/td[3]")).text - } catch (e: Exception) { - logError("getWorkSchedule", e) - throw e - } - } - - private fun getWorkTime(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[6]/table/tbody/tr/td[2]")).text - .replace("(근무시간) ", "") - .substringBefore("주 소정근로시간").trim() - } catch (e: Exception) { - logError("getWorkTime", e) - throw e - } - } - - private fun getPayInfo(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div/ul/li[2]/span")).text - } catch (e: Exception) { - logError("getPayInfo", e) - throw e - } - } - - private fun getCenterName(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[2]/div[1]/div[2]/div[2]/ul/li[1]/div")).text - } catch (e: Exception) { - logError("getCenterName", e) - throw e - } - } - - private fun getCreatedAt(): String { - val xpaths = listOf( - "//*[@id=\"contents\"]/section/div/div[3]/div[16]/table/tbody/tr/td[1]", - "//*[@id=\"contents\"]/section/div/div[3]/div[18]/table/tbody/tr/td[1]", - "//*[@id=\"contents\"]/section/div/div[3]/div[17]/table/tbody/tr/td[1]" - ) - - for (xpath in xpaths) { - try { - return driver.findElement(By.xpath(xpath)).text - } catch (e: Exception) { - logError("getCreatedAt", e) - } - } - - throw NoSuchElementException("CreatedAt element not found using any of the provided XPaths") - } - - private fun getCenterAddress(): String { - val xpaths = listOf( - "//*[@id=\"contents\"]/section/div/div[3]/div[15]/div/table/tbody/tr[1]/td", - "//*[@id=\"contents\"]/section/div/div[3]/div[17]/div/table/tbody/tr[1]/td", - "//*[@id=\"contents\"]/section/div/div[3]/div[16]/div/table/tbody/tr[4]/td/p[2]" - ) - - for (xpath in xpaths) { - try { - val address = driver.findElement(By.xpath(xpath)).text - return address.replace("지도보기", "").trim().replace(Regex("\\(\\d{5}\\)"), "").trim() - } catch (e: Exception) { - logError("getCenterAddress", e) - } - } - - throw NoSuchElementException("Center address not found using any of the provided XPaths") - } - - private fun getContent(): String { - return try { - driver.findElement(By.xpath("//*[@id=\"contents\"]/section/div/div[3]/div[3]/table/tbody/tr/td")).text - } catch (e: Exception) { - logError("getContent", e) - throw e - } - } - - private fun getTitle(): String { - return try { - val em = driver.findElement(By.cssSelector(".left")) - em.findElement(By.cssSelector(".tit-area .tit")).text - } catch (e: Exception) { - logError("getTitle", e) - throw e - } - } - - // 에러 집계를 출력하는 메서드 추가 - fun printErrorSummary() { - logger.error { "===== 에러 집계 =====" } - errorCountMap.forEach { (method, count) -> - logger.error { "$method: $count errors" } - } - } -} diff --git a/idle-domain/src/main/resources/db/migration/V10__alter_table_carer_add_column_location.sql b/idle-domain/src/main/resources/db/migration/V10__alter_table_carer_add_column_location.sql deleted file mode 100644 index 9d49ec00..00000000 --- a/idle-domain/src/main/resources/db/migration/V10__alter_table_carer_add_column_location.sql +++ /dev/null @@ -1,4 +0,0 @@ --- V10__alter_table_carer_add_column_location.sql - -ALTER TABLE carer -ADD COLUMN location POINT; diff --git a/idle-domain/src/main/resources/db/migration/V11__alter_table_notification_add_notification_type.sql b/idle-domain/src/main/resources/db/migration/V11__alter_table_notification_add_notification_type.sql deleted file mode 100644 index cbcd8c8c..00000000 --- a/idle-domain/src/main/resources/db/migration/V11__alter_table_notification_add_notification_type.sql +++ /dev/null @@ -1,4 +0,0 @@ --- V11__alter_table_notification_add_notification_type.sql - -ALTER TABLE notification - MODIFY COLUMN notification_type ENUM('APPLICANT', 'NEW_JOB_POSTING', 'CENTER_AUTHENTICATION') diff --git a/idle-domain/src/main/resources/db/migration/V1__init.sql b/idle-domain/src/main/resources/db/migration/V1__init.sql index e69de29b..588bb9bf 100644 --- a/idle-domain/src/main/resources/db/migration/V1__init.sql +++ b/idle-domain/src/main/resources/db/migration/V1__init.sql @@ -0,0 +1,215 @@ +-- V1__init.sql + +-- Create tables +create table applys ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + carer_id BINARY(16) NOT NULL, + id BINARY(16) NOT NULL, + job_posting_id BINARY(16) NOT NULL, + apply_method_type ENUM ('CALLING','APP') NOT NULL, + entity_status VARCHAR(255), + PRIMARY KEY (id) +) engine=InnoDB; + +create table carer ( + birth_year INT NOT NULL, + experience_year INT, + latitude DECIMAL(14, 10) NOT NULL, + longitude DECIMAL(14, 10) NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + introduce TEXT, + lot_number_address VARCHAR(255) NOT NULL, + name VARCHAR(255) NOT NULL, + phone_number VARCHAR(255) NOT NULL, + profile_image_url VARCHAR(255), + road_name_address VARCHAR(255) NOT NULL, + speciality TEXT, + carer_account_status VARCHAR(20) NOT NULL, + entity_status VARCHAR(255), + gender VARCHAR(255) NOT NULL, + job_search_status VARCHAR(20) NOT NULL, + location POINT SRID 4326 NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table center ( + latitude DECIMAL(14, 10) NOT NULL, + longitude DECIMAL(14, 10) NOT NULL, + id BINARY(16) NOT NULL, + business_registration_number VARCHAR(255) NOT NULL, + center_name VARCHAR(255) NOT NULL, + detailed_address VARCHAR(255) NOT NULL, + introduce TEXT, + lot_number_address VARCHAR(255) NOT NULL, + office_number VARCHAR(255) NOT NULL, + profile_image_url VARCHAR(255), + road_name_address VARCHAR(255) NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table center_manager ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + center_business_registration_number VARCHAR(255) NOT NULL, + identifier VARCHAR(255) NOT NULL, + name VARCHAR(255) NOT NULL, + password VARCHAR(255) NOT NULL, + phone_number VARCHAR(255) NOT NULL, + entity_status VARCHAR(255), + status VARCHAR(255) NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table chat_message ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + room_id BINARY(16), + sender_id BINARY(16), + contents JSON NOT NULL, + entity_status VARCHAR(255), + sender_type ENUM ('USER','SYSTEM'), + PRIMARY KEY (id) +) ENGINE=InnoDB; + +create table chat_room ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + receiver_id BINARY(16) NOT NULL, + sender_id BINARY(16) NOT NULL, + entity_status VARCHAR(255), + PRIMARY KEY (id) +) engine=InnoDB; + +create table crawled_job_posting ( + created_at DATE DEFAULT(CURRENT_DATE), + apply_deadline VARCHAR(255), + apply_method VARCHAR(255), + center_address VARCHAR(255), + center_name VARCHAR(255), + client_address VARCHAR(255), + content TEXT, + direct_url TEXT, + pay_info VARCHAR(255), + recruitment_process VARCHAR(255), + required_document VARCHAR(255), + title VARCHAR(255), + work_schedule VARCHAR(255), + work_time VARCHAR(255), + entity_status VARCHAR(255), + location POINT SRID 4326, + id BINARY(16) NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table deleted_user_info ( + deleted_at datetime(6) NOT NULL, + id BINARY(16) NOT NULL, + phone_number VARCHAR(255) NOT NULL, + reason VARCHAR(255) NOT NULL, + role ENUM ('CENTER','CARER') NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table device_token ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + user_id BINARY(16), + device_token VARCHAR(255), + entity_status VARCHAR(255), + user_type VARCHAR(255), + PRIMARY KEY (id) +) engine=InnoDB; + +create table job_posting ( + apply_deadline date, + birth_year INT NOT NULL, + care_level INT NOT NULL, + is_bowel_assistance bit NOT NULL, + is_experience_preferred bit NOT NULL, + is_meal_assistance bit NOT NULL, + is_walking_assistance bit NOT NULL, + pay_amount int NOT NULL, + weight int, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + center_id BINARY(16) NOT NULL, + id BINARY(16) NOT NULL, + client_name VARCHAR(255) NOT NULL, + disease TEXT, + end_time VARCHAR(255) NOT NULL, + extra_requirement TEXT, + lot_number_address VARCHAR(255) NOT NULL, + road_name_address VARCHAR(255) NOT NULL, + start_time VARCHAR(255) NOT NULL, + apply_deadline_type ENUM ('LIMITED','UNLIMITED') NOT NULL, + entity_status VARCHAR(255), + gender ENUM ('MAN','WOMAN') NOT NULL, + job_posting_status ENUM ('IN_PROGRESS','COMPLETED') NOT NULL, + location POINT SRID 4326 NOT NULL, + mental_status ENUM ('NORMAL','EARLY_STAGE','OVER_MIDDLE_STAGE') NOT NULL, + pay_type ENUM ('HOURLY','WEEKLY','MONTHLY') NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table job_posting_apply_method ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + job_posting_id BINARY(16) NOT NULL, + apply_method ENUM ('CALLING','APP') NOT NULL, + entity_status VARCHAR(255), + PRIMARY KEY (id) +) engine=InnoDB; + +create table job_posting_favorite ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + carer_id BINARY(16) NOT NULL, + id BINARY(16) NOT NULL, + job_posting_id BINARY(16) NOT NULL, + entity_status VARCHAR(255), + job_posting_type ENUM ('WORKNET','CAREMEET') NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table job_posting_life_assistance ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + job_posting_id BINARY(16) NOT NULL, + entity_status VARCHAR(255), + life_assistance ENUM ('CLEANING','LAUNDRY','WALKING','HEALTH','TALKING','NONE') NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table job_posting_weekday ( + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + job_posting_id BINARY(16) NOT NULL, + entity_status VARCHAR(255), + weekday ENUM ('MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY','SUNDAY') NOT NULL, + PRIMARY KEY (id) +) engine=InnoDB; + +create table notification ( + is_read VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + id BINARY(16) NOT NULL, + receiver_id BINARY(16), + body VARCHAR(255), + image_url VARCHAR(255), + notification_details_json json, + title VARCHAR(255), + entity_status VARCHAR(255), + notification_type ENUM ('APPLICANT','CENTER_AUTHENTICATION','NEW_JOB_POSTING'), + PRIMARY KEY (id) +) engine=InnoDB; \ No newline at end of file diff --git a/idle-domain/src/main/resources/db/migration/V2__add_index.sql b/idle-domain/src/main/resources/db/migration/V2__add_index.sql new file mode 100644 index 00000000..0fe23005 --- /dev/null +++ b/idle-domain/src/main/resources/db/migration/V2__add_index.sql @@ -0,0 +1,14 @@ +-- V2__add_index.sql + +-- Create index for chat room +CREATE INDEX idx_sender_id ON chat_room(sender_id); +CREATE INDEX idx_receiver_id ON chat_room(receiver_id); + +CREATE INDEX idx_receiver_id ON notification(receiver_id); +CREATE INDEX idx_created_at ON notification(created_at); + +CREATE INDEX idx_room_id ON chat_message(room_id); +CREATE INDEX idx_sender_id ON chat_message(sender_id); + +CREATE UNIQUE INDEX idx_phone_number ON center_manager(phone_number); +CREATE UNIQUE INDEX idx_phone_number ON carer(phone_number); \ No newline at end of file diff --git a/idle-domain/src/main/resources/db/migration/V2__create_table_chat_room.sql b/idle-domain/src/main/resources/db/migration/V2__create_table_chat_room.sql deleted file mode 100644 index e3249c94..00000000 --- a/idle-domain/src/main/resources/db/migration/V2__create_table_chat_room.sql +++ /dev/null @@ -1,13 +0,0 @@ --- V2__create_table_chat_room.sql - --- Create table for chat room -CREATE TABLE chat_room ( - id BINARY(16) PRIMARY KEY, - sender_id BINARY(16) NOT NULL, - receiver_id BINARY(16) NOT NULL, - entity_status VARCHAR(50) NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - INDEX (sender_id), - INDEX (receiver_id) -) diff --git a/idle-domain/src/main/resources/db/migration/V3__batch_schema.sql b/idle-domain/src/main/resources/db/migration/V3__batch_schema.sql new file mode 100644 index 00000000..9fc5093e --- /dev/null +++ b/idle-domain/src/main/resources/db/migration/V3__batch_schema.sql @@ -0,0 +1,99 @@ +-- V3__batch_schema.sql + +-- Create batch schema tables +CREATE TABLE BATCH_JOB_INSTANCE ( + JOB_INSTANCE_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT , + JOB_NAME VARCHAR(100) NOT NULL, + JOB_KEY VARCHAR(32) NOT NULL, + constraint JOB_INST_UN unique (JOB_NAME, JOB_KEY) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION ( + JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT , + JOB_INSTANCE_ID BIGINT NOT NULL, + CREATE_TIME DATETIME(6) NOT NULL, + START_TIME DATETIME(6) DEFAULT NULL , + END_TIME DATETIME(6) DEFAULT NULL , + STATUS VARCHAR(10) , + EXIT_CODE VARCHAR(2500) , + EXIT_MESSAGE VARCHAR(2500) , + LAST_UPDATED DATETIME(6), + constraint JOB_INST_EXEC_FK foreign key (JOB_INSTANCE_ID) + references BATCH_JOB_INSTANCE(JOB_INSTANCE_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION_PARAMS ( + JOB_EXECUTION_ID BIGINT NOT NULL , + PARAMETER_NAME VARCHAR(100) NOT NULL , + PARAMETER_TYPE VARCHAR(100) NOT NULL , + PARAMETER_VALUE VARCHAR(2500) , + IDENTIFYING CHAR(1) NOT NULL , + constraint JOB_EXEC_PARAMS_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION ( + STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT NOT NULL, + STEP_NAME VARCHAR(100) NOT NULL, + JOB_EXECUTION_ID BIGINT NOT NULL, + CREATE_TIME DATETIME(6) NOT NULL, + START_TIME DATETIME(6) DEFAULT NULL , + END_TIME DATETIME(6) DEFAULT NULL , + STATUS VARCHAR(10) , + COMMIT_COUNT BIGINT , + READ_COUNT BIGINT , + FILTER_COUNT BIGINT , + WRITE_COUNT BIGINT , + READ_SKIP_COUNT BIGINT , + WRITE_SKIP_COUNT BIGINT , + PROCESS_SKIP_COUNT BIGINT , + ROLLBACK_COUNT BIGINT , + EXIT_CODE VARCHAR(2500) , + EXIT_MESSAGE VARCHAR(2500) , + LAST_UPDATED DATETIME(6), + constraint JOB_EXEC_STEP_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION_CONTEXT ( + STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, + SHORT_CONTEXT VARCHAR(2500) NOT NULL, + SERIALIZED_CONTEXT TEXT , + constraint STEP_EXEC_CTX_FK foreign key (STEP_EXECUTION_ID) + references BATCH_STEP_EXECUTION(STEP_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION_CONTEXT ( + JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, + SHORT_CONTEXT VARCHAR(2500) NOT NULL, + SERIALIZED_CONTEXT TEXT , + constraint JOB_EXEC_CTX_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_STEP_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_STEP_EXECUTION_SEQ); + +CREATE TABLE BATCH_JOB_EXECUTION_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_JOB_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_EXECUTION_SEQ); + +CREATE TABLE BATCH_JOB_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_JOB_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_SEQ); \ No newline at end of file diff --git a/idle-domain/src/main/resources/db/migration/V3__create_table_chat_message.sql b/idle-domain/src/main/resources/db/migration/V3__create_table_chat_message.sql deleted file mode 100644 index 434134a8..00000000 --- a/idle-domain/src/main/resources/db/migration/V3__create_table_chat_message.sql +++ /dev/null @@ -1,19 +0,0 @@ --- V3__create_table_chat_message.sql - --- Create table for chat message -CREATE TABLE chat_message ( - id BINARY(16) PRIMARY KEY, - room_id BINARY(16) NOT NULL, - sender_id BINARY(16) NOT NULL, - sender_type ENUM('USER', 'SYSTEM') NOT NULL, - contents JSON NOT NULL, - entity_status VARCHAR(50) NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - INDEX (room_id), - INDEX (sender_id), - INDEX (created_at) -) ENGINE = InnoDB - DEFAULT CHARSET = utf8mb4 - COLLATE = utf8mb4_unicode_ci; - diff --git a/idle-domain/src/main/resources/db/migration/V4__alter_table_notification_add_column_device_token.sql b/idle-domain/src/main/resources/db/migration/V4__alter_table_notification_add_column_device_token.sql deleted file mode 100644 index 3ba2e265..00000000 --- a/idle-domain/src/main/resources/db/migration/V4__alter_table_notification_add_column_device_token.sql +++ /dev/null @@ -1,4 +0,0 @@ --- V4__alter_table_notification_add_column_device_token.sql - --- Add secondary index on deviceToken and userId columns -CREATE INDEX idx_user_id ON notification(user_id); diff --git a/idle-domain/src/main/resources/db/migration/V5__create_index_receiver_id_in_notification_table.sql b/idle-domain/src/main/resources/db/migration/V5__create_index_receiver_id_in_notification_table.sql deleted file mode 100644 index e67edf0c..00000000 --- a/idle-domain/src/main/resources/db/migration/V5__create_index_receiver_id_in_notification_table.sql +++ /dev/null @@ -1,3 +0,0 @@ --- V5__create_index_receiver_id_in_notification_table.sql - -CREATE INDEX idx_receiver_id ON notification(receiver_id); diff --git a/idle-domain/src/main/resources/db/migration/V6__alter_table_carer_modify_column_type.sql b/idle-domain/src/main/resources/db/migration/V6__alter_table_carer_modify_column_type.sql deleted file mode 100644 index 09ffddd1..00000000 --- a/idle-domain/src/main/resources/db/migration/V6__alter_table_carer_modify_column_type.sql +++ /dev/null @@ -1,5 +0,0 @@ --- V6__alter_table_carer_modify_column_type.sql - -ALTER TABLE carer -MODIFY COLUMN introduce TEXT, -MODIFY COLUMN speciality TEXT diff --git a/idle-domain/src/main/resources/db/migration/V7__alter_table_center_modify_column_type.sql b/idle-domain/src/main/resources/db/migration/V7__alter_table_center_modify_column_type.sql deleted file mode 100644 index 7bec374c..00000000 --- a/idle-domain/src/main/resources/db/migration/V7__alter_table_center_modify_column_type.sql +++ /dev/null @@ -1,4 +0,0 @@ --- V7__alter_table_center_modify_column_type.sql - -ALTER TABLE center -MODIFY COLUMN introduce TEXT; diff --git a/idle-domain/src/main/resources/db/migration/V8__alter_table_center_manager_add_index.sql b/idle-domain/src/main/resources/db/migration/V8__alter_table_center_manager_add_index.sql deleted file mode 100644 index 2d78b207..00000000 --- a/idle-domain/src/main/resources/db/migration/V8__alter_table_center_manager_add_index.sql +++ /dev/null @@ -1,4 +0,0 @@ --- V8__alter_table_center_manager_add_index.sql - -ALTER TABLE center_manager -ADD UNIQUE INDEX idx_phone_number (phone_number); diff --git a/idle-domain/src/main/resources/db/migration/V9__alter_table_carer_add_index.sql b/idle-domain/src/main/resources/db/migration/V9__alter_table_carer_add_index.sql deleted file mode 100644 index 2223232c..00000000 --- a/idle-domain/src/main/resources/db/migration/V9__alter_table_carer_add_index.sql +++ /dev/null @@ -1,4 +0,0 @@ --- V9__alter_table_carer_add_index.sql - -ALTER TABLE carer -ADD UNIQUE INDEX idx_phone_number (phone_number); diff --git a/idle-infrastructure/client/src/main/kotlin/com/swm/idle/infrastructure/client/geocode/service/GeoCodeService.kt b/idle-infrastructure/client/src/main/kotlin/com/swm/idle/infrastructure/client/geocode/service/GeoCodeService.kt index b5b5fc06..e2824bb0 100644 --- a/idle-infrastructure/client/src/main/kotlin/com/swm/idle/infrastructure/client/geocode/service/GeoCodeService.kt +++ b/idle-infrastructure/client/src/main/kotlin/com/swm/idle/infrastructure/client/geocode/service/GeoCodeService.kt @@ -4,6 +4,7 @@ import com.swm.idle.infrastructure.client.geocode.dto.GeoCodeSearchResultRespons import com.swm.idle.infrastructure.client.geocode.exception.GeoCodeException import com.swm.idle.infrastructure.client.geocode.properties.GeoCodeProperties import com.swm.idle.infrastructure.client.geocode.util.GeoCodeClient +import jakarta.annotation.PostConstruct import org.springframework.stereotype.Service import java.net.URI import java.net.URLEncoder @@ -15,6 +16,40 @@ class GeoCodeService( private val geoCodeClient: GeoCodeClient, private val geoCodeProperties: GeoCodeProperties, ) { + companion object { + private lateinit var geoCodeClient: GeoCodeClient + private lateinit var geoCodeProperties: GeoCodeProperties + + fun initialize(client: GeoCodeClient, properties: GeoCodeProperties) { + geoCodeClient = client + geoCodeProperties = properties + } + + fun search(address: String): GeoCodeSearchResultResponse { + val uri = generateSearchUri(address) + try { + return geoCodeClient.send( + uri, + geoCodeProperties.clientId, + geoCodeProperties.clientSecret, + ) + } catch (e: Exception) { + throw GeoCodeException.ResultNotFound() + } + } + + private fun generateSearchUri(address: String): URI { + val baseUrl = geoCodeProperties.baseUrl + val encodedAddress: String = URLEncoder.encode(address, StandardCharsets.UTF_8) + val uriString = "$baseUrl?query=$encodedAddress" + return URI.create(uriString) + } + } + + @PostConstruct + fun init() { + initialize(geoCodeClient, geoCodeProperties) + } fun search(address: String): GeoCodeSearchResultResponse { val uri = generateSearchUri(address) diff --git a/idle-presentation/src/main/kotlin/com/swm/idle/presentation/batch/api/BatchApi.kt b/idle-presentation/src/main/kotlin/com/swm/idle/presentation/batch/api/BatchApi.kt new file mode 100644 index 00000000..7107dd6c --- /dev/null +++ b/idle-presentation/src/main/kotlin/com/swm/idle/presentation/batch/api/BatchApi.kt @@ -0,0 +1,20 @@ +package com.swm.idle.presentation.batch.api + +import io.swagger.v3.oas.annotations.Hidden +import io.swagger.v3.oas.annotations.Operation +import io.swagger.v3.oas.annotations.tags.Tag +import org.springframework.http.HttpStatus +import org.springframework.web.bind.annotation.GetMapping +import org.springframework.web.bind.annotation.RequestMapping +import org.springframework.web.bind.annotation.ResponseStatus + +@Tag(name = "Applys - Batch", description = "배치 API") +@RequestMapping("/api/v2/batch", produces = ["application/json;charset=utf-8"]) +interface BatchApi { + + @Hidden + @Operation(summary = "배치 엔트포인트 실행 API") + @GetMapping + @ResponseStatus(HttpStatus.OK) + fun launchBatch() +} \ No newline at end of file diff --git a/idle-presentation/src/main/kotlin/com/swm/idle/presentation/batch/controller/BatchController.kt b/idle-presentation/src/main/kotlin/com/swm/idle/presentation/batch/controller/BatchController.kt new file mode 100644 index 00000000..f7d01d7d --- /dev/null +++ b/idle-presentation/src/main/kotlin/com/swm/idle/presentation/batch/controller/BatchController.kt @@ -0,0 +1,12 @@ +package com.swm.idle.presentation.batch.controller + +import com.swm.idle.batch.common.launcher.CrawlingJobLauncher +import com.swm.idle.presentation.batch.api.BatchApi +import org.springframework.web.bind.annotation.RestController + +@RestController +class BatchController( + private val jobLauncher: CrawlingJobLauncher, +) : BatchApi { + override fun launchBatch() = jobLauncher.jobStart() +} \ No newline at end of file