diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/01-create-function-get-json-value.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/01-create-function-get-json-value.sql new file mode 100644 index 00000000..0e9e4bc7 --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/01-create-function-get-json-value.sql @@ -0,0 +1,14 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-get-json-value-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP FUNCTION IF EXISTS GetJsonValue// + +CREATE FUNCTION GetJsonValue(json_data TEXT, json_path VARCHAR(255)) +RETURNS TEXT DETERMINISTIC +BEGIN +DECLARE result TEXT; +-- 문자열 값에서 따옴표를 제거하기 위해 JSON_UNQUOTE 사용 +SET result = JSON_UNQUOTE(JSON_EXTRACT(json_data, json_path)); + -- 존재하지 않는 경로에 대해 NULL대신 빈문자열 반환 + RETURN COALESCE(result, ''); +END// \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/02-create-function-safe-to-double.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/02-create-function-safe-to-double.sql new file mode 100644 index 00000000..ea5ba81a --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/02-create-function-safe-to-double.sql @@ -0,0 +1,14 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-function-safe-to-double-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP FUNCTION IF EXISTS SafeToDouble// + +CREATE FUNCTION SafeToDouble(str VARCHAR(255)) +RETURNS DOUBLE DETERMINISTIC +BEGIN +DECLARE result DOUBLE DEFAULT NULL; +IF str REGEXP '^[0-9]+(.[0-9]+)?$' THEN +SET result = CAST(str AS DOUBLE); +END IF; + RETURN result; +END// \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/03-create-function-safe-to-long.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/03-create-function-safe-to-long.sql new file mode 100644 index 00000000..ad9fee8b --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/function/03-create-function-safe-to-long.sql @@ -0,0 +1,14 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-function-safe-to-long-v1 runOnChange:true-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP FUNCTION IF EXISTS SafeToLong// + +CREATE FUNCTION SafeToLong(str VARCHAR(255)) +RETURNS BIGINT DETERMINISTIC +BEGIN +DECLARE result BIGINT DEFAULT 0; + IF str REGEXP '^[0-9]+$' THEN + SET result = CAST(str AS SIGNED); + END IF; + RETURN result; +END// \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/01-create-procedure-migrate-naver-raw-article.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/01-create-procedure-migrate-naver-raw-article.sql new file mode 100644 index 00000000..f8c1f986 --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/01-create-procedure-migrate-naver-raw-article.sql @@ -0,0 +1,135 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-procedure-migrate-naver-raw-article-v1 runOnChange:true endDelimiter:// +DROP PROCEDURE IF EXISTS MigrateNaverRawArticle// + +CREATE PROCEDURE MigrateNaverRawArticle(IN article_id BIGINT) +BEGIN + DECLARE raw_data TEXT; + DECLARE article_no VARCHAR(255); + DECLARE region_code VARCHAR(255); + DECLARE trad_tp_nm VARCHAR(50); + DECLARE category_val VARCHAR(20); + DECLARE price_val BIGINT DEFAULT 0; + DECLARE deposit_val BIGINT DEFAULT 0; + DECLARE monthly_rent_val BIGINT DEFAULT 0; + DECLARE error_msg TEXT; + DECLARE cortar_no_val BIGINT; + + -- 매물이 존재여부 확인 + DECLARE article_exists INT DEFAULT 0; + + -- 예외처리 + DECLARE CONTINUE HANDLER FOR SQLEXCEPTION + BEGIN + GET DIAGNOSTICS CONDITION 1 error_msg = MESSAGE_TEXT; + -- 에러로그 기록 + UPDATE naver_raw_articles + SET migration_status = 'FAILED', + migration_error = error_msg, + migrated_at = NOW() + WHERE id = article_id; + END; + + -- 로우 데이터 셋 + SELECT raw_data, article_id, cortar_no INTO raw_data, article_no, cortar_no_val + FROM naver_raw_articles + WHERE id = article_id; + + -- 지역 코드 셋 + SET region_code = CAST(cortar_no_val AS CHAR); + + -- 추출타입 셋 카테고리 셋 + SET trad_tp_nm = GetJsonValue(raw_data, '$.tradTpNm'); + + CASE trad_tp_nm + WHEN '매매' THEN + SET category_val = 'SALE'; + SET price_val = SafeToLong(GetJsonValue(raw_data, '$.prc')); + SET deposit_val = NULL; + SET monthly_rent_val = NULL; + WHEN '전세' THEN + SET category_val = 'DEPOSIT'; + SET price_val = NULL; + SET deposit_val = SafeToLong(GetJsonValue(raw_data, '$.prc')); + SET monthly_rent_val = NULL; + WHEN '월세' THEN + SET category_val = 'MONTHLY'; + SET price_val = NULL; + SET deposit_val = SafeToLong(GetJsonValue(raw_data, '$.prc')); + SET monthly_rent_val = SafeToLong(GetJsonValue(raw_data, '$.rentPrc')); + ELSE + SET category_val = 'SALE'; + SET price_val = 0; + SET deposit_val = NULL; + SET monthly_rent_val = NULL; + END CASE; + + --이미 존재하는지 체크 + SELECT COUNT(*) INTO article_exists + FROM property_articles + WHERE article_id = article_no; + + IF article_exists > 0 THEN + -- 기존기록 업데이트 + UPDATE property_articles + SET region_code = region_code, + category = category_val, + building_name = GetJsonValue(raw_data, '$.atclNm'), + description = GetJsonValue(raw_data, '$.atclFetrDesc'), + building_type = GetJsonValue(raw_data, '$.rletTpNm'), + price = price_val, + deposit = deposit_val, + monthly_rent = monthly_rent_val, + longitude = SafeToDouble(GetJsonValue(raw_data, '$.lng')), + latitude = SafeToDouble(GetJsonValue(raw_data, '$.lat')), + supply_area = SafeToDouble(GetJsonValue(raw_data, '$.spc1')), + exclusive_area = SafeToDouble(GetJsonValue(raw_data, '$.spc2')), + platform = 'NAVER', + updated_at = NOW() + WHERE article_id = article_no; + ELSE + -- 새기록 삽입 + INSERT INTO property_articles ( + article_id, + region_code, + category, + building_name, + description, + building_type, + price, + deposit, + monthly_rent, + longitude, + latitude, + supply_area, + exclusive_area, + platform, + created_at, + updated_at + ) VALUES ( + article_no, + region_code, + category_val, + GetJsonValue(raw_data, '$.atclNm'), + GetJsonValue(raw_data, '$.atclFetrDesc'), + GetJsonValue(raw_data, '$.rletTpNm'), + price_val, + deposit_val, + monthly_rent_val, + SafeToDouble(GetJsonValue(raw_data, '$.lng')), + SafeToDouble(GetJsonValue(raw_data, '$.lat')), + SafeToDouble(GetJsonValue(raw_data, '$.spc1')), + SafeToDouble(GetJsonValue(raw_data, '$.spc2')), + 'NAVER', + NOW(), + NOW() + ); + END IF; + + -- 완료로 표시 + UPDATE naver_raw_articles + SET migration_status = 'COMPLETED', + migration_error = NULL, + migrated_at = NOW() + WHERE id = article_id; +END// diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/auto/01-create-trigger-after-insert.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/auto/01-create-trigger-after-insert.sql new file mode 100644 index 00000000..8d66955a --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/auto/01-create-trigger-after-insert.sql @@ -0,0 +1,14 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-trigger-after-insert-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP TRIGGER IF EXISTS after_naver_raw_article_insert// + +CREATE TRIGGER after_naver_raw_article_insert +AFTER INSERT ON naver_raw_articles +FOR EACH ROW +BEGIN +-- PENDING 상태이거나 NULL인 경우에만 마이그레이션 호출 +IF NEW.migration_status = 'PENDING' OR NEW.migration_status IS NULL THEN +CALL MigrateNaverRawArticle(NEW.id); +END IF; +END// diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/auto/02-create-trigger-after-update.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/auto/02-create-trigger-after-update.sql new file mode 100644 index 00000000..9d172baf --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/auto/02-create-trigger-after-update.sql @@ -0,0 +1,14 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-trigger-after-update-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP TRIGGER IF EXISTS after_naver_raw_article// + +CREATE TRIGGER after_naver_raw_article_update +AFTER UPDATE ON naver_raw_articles +FOR EACH ROW + BEGIN + -- 펜딩으로 변경시 트리거 + IF NEW.migration_status = 'PENDING' AND (OLD.migration_status != 'PENDING' OR OLD.migration_status IS NULL) THEN + CALL MigrateNaverRawArticle(NEW.id); + END IF; +END// diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/01-create-procedure-migrate-all-pending-articles.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/01-create-procedure-migrate-all-pending-articles.sql new file mode 100644 index 00000000..cc137256 --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/01-create-procedure-migrate-all-pending-articles.sql @@ -0,0 +1,24 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-procedure-migrate-all-pending-articles-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP PROCEDURE IF EXISTS MigrateAllPendingArticles// + +CREATE PROCEDURE MigrateAllPendingArticles() +BEGIN +DECLARE done INT DEFAULT FALSE; +DECLARE article_id BIGINT; +DECLARE cur CURSOR FOR SELECT id FROM naver_raw_articles WHERE migration_status = 'PENDING'; +DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; + OPEN cur; + read_loop: LOOP + FETCH cur INTO article_id; + IF done THEN + LEAVE read_loop; + END IF; + CALL MigrateNaverRawArticle(article_id); + -- 데이터베이스 과부하 방지를 위한 지연 + DO SLEEP(0.01); + END LOOP; + + CLOSE cur; +END// diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/02-create-procedure-migrate-articles-by-region.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/02-create-procedure-migrate-articles-by-region.sql new file mode 100644 index 00000000..ef9c6f28 --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/02-create-procedure-migrate-articles-by-region.sql @@ -0,0 +1,29 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-trigger-after-update-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP PROCEDURE IF EXISTS MigrateArticlesByRegion// + +CREATE PROCEDURE MigrateArticlesByRegion(IN region_code BIGINT) +BEGIN +DECLARE done INT DEFAULT FALSE; +DECLARE article_id BIGINT; +DECLARE cur CURSOR FOR SELECT id FROM naver_raw_articles +WHERE cortar_no = region_code +AND migration_status = 'PENDING'; +DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; + OPEN cur; + + read_loop: LOOP + FETCH cur INTO article_id; + IF done THEN + LEAVE read_loop; + END IF; + + CALL MigrateNaverRawArticle(article_id); + + -- 과부하 방지를 위한 지연 + DO SLEEP(0.01); + END LOOP; + + CLOSE cur; +END// \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/03-create-procedure-retry-failed-migrations.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/03-create-procedure-retry-failed-migrations.sql new file mode 100644 index 00000000..c220b05c --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/procedures/manual/03-create-procedure-retry-failed-migrations.sql @@ -0,0 +1,16 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-procedure-retry-failed-migrations-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP PROCEDURE IF EXISTS RetryFailedMigrations// + +CREATE PROCEDURE RetryFailedMigrations() +BEGIN +-- 실패한 함수들 펜딩으로 변경 +UPDATE naver_raw_articles +SET migration_status = 'PENDING', +migration_error = NULL, +migrated_at = NULL +WHERE migration_status = 'FAILED'; +-- 재실행 + CALL MigrateAllPendingArticles(); +END// \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/tables/01-create-naver-raw-articles.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/tables/01-create-naver-raw-articles.sql new file mode 100644 index 00000000..e69de29b diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/view/01-create-view-migration-statistics.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/view/01-create-view-migration-statistics.sql new file mode 100644 index 00000000..6c4804d0 --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/view/01-create-view-migration-statistics.sql @@ -0,0 +1,12 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-view-migration-statistics-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP VIEW IF EXISTS migration_statistics// + +CREATE OR REPLACE VIEW migration_statistics AS +SELECT + COUNT(*) as total_count, + SUM(CASE WHEN migration_status = 'PENDING' THEN 1 ELSE 0 END) as pending_count, + SUM(CASE WHEN migration_status = 'COMPLETED' THEN 1 ELSE 0 END) as completed_count, + SUM(CASE WHEN migration_status = 'FAILED' THEN 1 ELSE 0 END) as failed_count +FROM naver_raw_articles// \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/view/02-create-view-migration-statistics-by-region.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/view/02-create-view-migration-statistics-by-region.sql new file mode 100644 index 00000000..216858d6 --- /dev/null +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/naverMigration/view/02-create-view-migration-statistics-by-region.sql @@ -0,0 +1,14 @@ +--liquibase formatted sql +--changeset jungwoo_shin:create-view-migration-statistics-by-region-v1 runOnChange:true endDelimiter:// dbms:mariadb + +DROP VIEW IF EXISTS migration_statistics_by_region// + +CREATE OR REPLACE VIEW migration_statistics_by_region AS +SELECT + cortar_no, + COUNT(*) as total_count, + SUM(CASE WHEN migration_status = 'PENDING' THEN 1 ELSE 0 END) as pending_count, + SUM(CASE WHEN migration_status = 'COMPLETED' THEN 1 ELSE 0 END) as completed_count, + SUM(CASE WHEN migration_status = 'FAILED' THEN 1 ELSE 0 END) as failed_count +FROM naver_raw_articles +GROUP BY cortar_no// \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/01-create-regions-table.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/01-create-regions-table.sql similarity index 82% rename from crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/01-create-regions-table.sql rename to crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/01-create-regions-table.sql index 2dbd7bb4..c5068d7a 100644 --- a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/01-create-regions-table.sql +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/01-create-regions-table.sql @@ -1,5 +1,5 @@ --liquibase formatted sql ---changeset jungwoo_shin:create-regions-table dbms:mariadb +--changeset jungwoo_shin:create-regions-table-v1 runOnChange:true dbms:mariadb CREATE TABLE IF NOT EXISTS regions ( id BIGINT AUTO_INCREMENT PRIMARY KEY, diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/02-create-migrations-table.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/02-create-migrations-table.sql similarity index 63% rename from crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/02-create-migrations-table.sql rename to crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/02-create-migrations-table.sql index b5da330d..d0631912 100644 --- a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/02-create-migrations-table.sql +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/02-create-migrations-table.sql @@ -1,12 +1,10 @@ --liquibase formatted sql ---changeset jungwoo_shin:create-migrations-table dbms:mariadb +--changeset jungwoo_shin:create-migrations-table-v1 runOnChange:true dbms:mariadb CREATE TABLE IF NOT EXISTS migrations ( id BIGINT AUTO_INCREMENT PRIMARY KEY, cortar_no BIGINT NOT NULL UNIQUE, naver_status VARCHAR(20), naver_last_migrated_at DATETIME, - zigbang_status VARCHAR(20), - zigbang_last_migrated_at DATETIME, error_log TEXT ); \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/03-create-crawls-table.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/03-create-crawls-table.sql similarity index 64% rename from crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/03-create-crawls-table.sql rename to crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/03-create-crawls-table.sql index 9c43db60..f5323422 100644 --- a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/tables/03-create-crawls-table.sql +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/region/tables/03-create-crawls-table.sql @@ -1,12 +1,10 @@ --liquibase formatted sql ---changeset jungwoo_shin:create-crawls-table dbms:mariadb +--changeset jungwoo_shin:create-crawls-table-v1 runOnChange:true dbms:mariadb CREATE TABLE IF NOT EXISTS crawls ( id BIGINT AUTO_INCREMENT PRIMARY KEY, cortar_no BIGINT NOT NULL UNIQUE, naver_status VARCHAR(20), naver_last_crawled_at DATETIME, - zigbang_status VARCHAR(20), - zigbang_last_crawled_at DATETIME, error_log TEXT ); \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/02-create-after-region-insert-trigger.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/region/triggers/02-create-after-region-insert-trigger.sql similarity index 71% rename from crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/02-create-after-region-insert-trigger.sql rename to crawlerserver/common/src/main/resources/db/changelog/changes/region/triggers/02-create-after-region-insert-trigger.sql index c9140237..3f083b34 100644 --- a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/02-create-after-region-insert-trigger.sql +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/region/triggers/02-create-after-region-insert-trigger.sql @@ -1,5 +1,7 @@ --liquibase formatted sql ---changeset jungwoo_shin:create-after-region-insert-trigger runOnChange="true" endDelimiter:// dbms:mariadb +--changeset jungwoo_shin:create-after-region-insert-trigger-v1 runOnChange="true" endDelimiter:// dbms:mariadb + +DROP TRIGGER IF EXISTS after_region_insert// CREATE TRIGGER after_region_insert AFTER INSERT ON regions diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/03-create-after-region-update-trigger.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/region/triggers/03-create-after-region-update-trigger.sql similarity index 55% rename from crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/03-create-after-region-update-trigger.sql rename to crawlerserver/common/src/main/resources/db/changelog/changes/region/triggers/03-create-after-region-update-trigger.sql index 5a1bd0a9..660957ea 100644 --- a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/03-create-after-region-update-trigger.sql +++ b/crawlerserver/common/src/main/resources/db/changelog/changes/region/triggers/03-create-after-region-update-trigger.sql @@ -1,15 +1,20 @@ --liquibase formatted sql ---changeset jungwoo_shin:create-after-region-update-trigger runOnChange="true" endDelimiter:// dbms:mariadb +--changeset jungwoo_shin:create-after-region-update-trigger-v1 runOnChange="true" endDelimiter:// dbms:mariadb + +DROP TRIGGER IF EXISTS after_region_update// CREATE TRIGGER after_region_update AFTER UPDATE ON regions FOR EACH ROW -region_update:BEGIN +BEGIN IF NEW.level = 3 THEN INSERT INTO migrations (cortar_no, naver_status, zigbang_status) VALUES (NEW.cortar_no, 'NEW', 'NEW') - ON DUPLICATE KEY UPDATE naver_status = 'NEW', zigbang_status = 'NEW', - naver_last_migrated_at = NULL, zigbang_last_migrated_at = NULL; + ON DUPLICATE KEY UPDATE + naver_status = 'NEW', + zigbang_status = 'NEW', + naver_last_migrated_at = NULL, + zigbang_last_migrated_at = NULL; ELSEIF OLD.level = 3 AND NEW.level != 3 THEN DELETE FROM migrations WHERE cortar_no = OLD.cortar_no; END IF; @@ -17,8 +22,11 @@ region_update:BEGIN IF NEW.level = 3 THEN INSERT INTO crawls (cortar_no, naver_status, zigbang_status) VALUES (NEW.cortar_no, 'NEW', 'NEW') - ON DUPLICATE KEY UPDATE naver_status = 'NEW', zigbang_status = 'NEW', - naver_last_crawled_at = NULL, zigbang_last_crawled_at = NULL; + ON DUPLICATE KEY UPDATE + naver_status = 'NEW', + zigbang_status = 'NEW', + naver_last_crawled_at = NULL, + zigbang_last_crawled_at = NULL; ELSEIF OLD.level = 3 AND NEW.level != 3 THEN DELETE FROM crawls WHERE cortar_no = OLD.cortar_no; END IF; diff --git a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/01-drop-trigger.sql b/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/01-drop-trigger.sql deleted file mode 100644 index 2db86e90..00000000 --- a/crawlerserver/common/src/main/resources/db/changelog/changes/v1/region/triggers/01-drop-trigger.sql +++ /dev/null @@ -1,5 +0,0 @@ ---liquibase formatted sql ---changeset jungwoo_shin:drop-triggers runOnChange="true" dbms:mariadb - -DROP TRIGGER IF EXISTS after_region_insert; -DROP TRIGGER IF EXISTS after_region_update; \ No newline at end of file diff --git a/crawlerserver/common/src/main/resources/db/changelog/db.changelog-master.yaml b/crawlerserver/common/src/main/resources/db/changelog/db.changelog-master.yaml index b1d8fc7e..496f9a7a 100644 --- a/crawlerserver/common/src/main/resources/db/changelog/db.changelog-master.yaml +++ b/crawlerserver/common/src/main/resources/db/changelog/db.changelog-master.yaml @@ -1,20 +1,66 @@ databaseChangeLog: + ## 실제 DB 테이블 생성 (리퀴베이스는 JPA 테이블 생성전에 실행) - include: - file: changes/v1/region/tables/01-create-regions-table.sql + file: changes/region/tables/01-create-regions-table.sql relativeToChangelogFile: true - include: - file: changes/v1/region/tables/02-create-migrations-table.sql + file: changes/region/tables/02-create-migrations-table.sql relativeToChangelogFile: true - include: - file: changes/v1/region/tables/03-create-crawls-table.sql + file: changes/region/tables/03-create-crawls-table.sql relativeToChangelogFile: true + + ## Json 파싱 함수 + - include: + file: changes/naverMigration/function/01-create-function-get-json-value.sql + relativeToChangelogFile: true + - include: + file: changes/naverMigration/function/02-create-function-safe-to-double.sql + relativeToChangelogFile: true + - include: + file: changes/naverMigration/function/03-create-function-safe-to-long.sql + relativeToChangelogFile: true + + ## 매물 마이그레이션 함수 + - include: + file: changes/naverMigration/procedures/01-create-procedure-migrate-naver-raw-article.sql + relativeToChangelogFile: true + +# ## 자동 인서트 트리거 +# - include: +# file: db/changelog/changes/01-create-trigger-after-insert.sql +# - include: +# file: db/changelog/changes/06-create-trigger-after-update.sql +# + ## 수동 트리거 함수 - include: - file: changes/v1/region/triggers/01-drop-trigger.sql + file: changes/naverMigration/procedures/manual/01-create-procedure-migrate-all-pending-articles.sql relativeToChangelogFile: true - include: - file: changes/v1/region/triggers/02-create-after-region-insert-trigger.sql + file: changes/naverMigration/procedures/manual/02-create-procedure-migrate-articles-by-region.sql relativeToChangelogFile: true - include: - file: changes/v1/region/triggers/03-create-after-region-update-trigger.sql + file: changes/naverMigration/procedures/manual/03-create-procedure-retry-failed-migrations.sql relativeToChangelogFile: true + ## 지역코드 크롤링 자동 트리거 +# - include: +# file: changes/v1/region/triggers/01-drop-trigger.sql +# relativeToChangelogFile: true + - include: + file: changes/region/triggers/02-create-after-region-insert-trigger.sql + relativeToChangelogFile: true + - include: + file: changes/region/triggers/03-create-after-region-update-trigger.sql + relativeToChangelogFile: true + + + ## 상태확인 뷰 + - include: + file: changes/naverMigration/view/01-create-view-migration-statistics.sql + relativeToChangelogFile: true + - include: + file: changes/naverMigration/view/02-create-view-migration-statistics-by-region.sql + relativeToChangelogFile: true + + diff --git a/crawlerserver/controller/src/main/java/com/zipline/controller/migration/NaverMigrationController.java b/crawlerserver/controller/src/main/java/com/zipline/controller/migration/NaverMigrationController.java index dc5e4a53..28927674 100644 --- a/crawlerserver/controller/src/main/java/com/zipline/controller/migration/NaverMigrationController.java +++ b/crawlerserver/controller/src/main/java/com/zipline/controller/migration/NaverMigrationController.java @@ -31,6 +31,13 @@ public ResponseEntity> migrateRegion(@PathVariable return ResponseEntity.ok(response); } + @GetMapping("/retry/") + public ResponseEntity> retryFailedMigration() { + TaskResponseDto result = migrationService.retryFailedMigrations(); + ApiResponse response = ApiResponse.ok("실패한 네이버 마이그레이션 재시도",result); + return ResponseEntity.ok(response); + } + @GetMapping("/status/{taskType}") public ResponseEntity> getTaskStatus(@PathVariable TaskType taskType) { TaskResponseDto result = migrationService.getTaskStatus(taskType); diff --git a/crawlerserver/domain/src/main/java/com/zipline/domain/entity/zigbang/ZigBangArticle.java b/crawlerserver/domain/src/main/java/com/zipline/domain/entity/zigbang/ZigBangArticle.java index b336091a..7edc138d 100644 --- a/crawlerserver/domain/src/main/java/com/zipline/domain/entity/zigbang/ZigBangArticle.java +++ b/crawlerserver/domain/src/main/java/com/zipline/domain/entity/zigbang/ZigBangArticle.java @@ -37,6 +37,9 @@ public class ZigBangArticle { @Column(name = "created_at", nullable = false) private LocalDateTime createdAt; + @Column(name = "updated_at") + private LocalDateTime updatedAt; + @Column(name = "migration_status") @Enumerated(EnumType.STRING) private MigrationStatus migrationStatus; @@ -47,7 +50,6 @@ public class ZigBangArticle { @Column(name = "migrated_at") private LocalDateTime migratedAt; - public ZigBangArticle create(String articleId, String geohash, PropertyCategory category, String rawData) { this.articleId = articleId; this.geohash = geohash; @@ -56,4 +58,12 @@ public ZigBangArticle create(String articleId, String geohash, PropertyCategory this.createdAt = LocalDateTime.now(); return this; } + + public ZigBangArticle update(String geohash, PropertyCategory category, String rawData) { + this.geohash = geohash; + this.category = category; + this.rawData = rawData; + this.updatedAt = LocalDateTime.now(); + return this; + } } \ No newline at end of file diff --git a/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/naver/NaverRawArticleRepository.java b/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/naver/NaverRawArticleRepository.java index 51670824..b288adcb 100644 --- a/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/naver/NaverRawArticleRepository.java +++ b/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/naver/NaverRawArticleRepository.java @@ -9,6 +9,7 @@ import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; +import org.springframework.data.jpa.repository.query.Procedure; import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; import org.springframework.transaction.annotation.Transactional; @@ -58,9 +59,6 @@ public interface NaverRawArticleRepository extends JpaRepository findByCreatedAtAfter(LocalDateTime date); - /** - * 특정 마이그레이션 상태의 데이터 수 조회 - */ long countByMigrationStatus(MigrationStatus status); + + @Procedure(name = "MigrateAllPendingArticles") + void migrateAllPendingArticles(); + + @Procedure(name = "MigrateArticlesByRegion") + void migrateArticlesByRegion(@Param("region_code") Long regionCode); + + @Procedure(name = "RetryFailedMigrations") + void retryFailedMigrations(); + } diff --git a/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/zigbang/ZigBangArticleRepository.java b/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/zigbang/ZigBangArticleRepository.java index a1eb0fde..c45c0788 100644 --- a/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/zigbang/ZigBangArticleRepository.java +++ b/crawlerserver/infrastructure/src/main/java/com/zipline/infrastructure/zigbang/ZigBangArticleRepository.java @@ -4,6 +4,9 @@ import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.stereotype.Repository; +import java.util.List; + @Repository public interface ZigBangArticleRepository extends JpaRepository { + List findAllByArticleIdIn(List articleIds); } diff --git a/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationService.java b/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationService.java index a0e8bf54..b334cb80 100644 --- a/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationService.java +++ b/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationService.java @@ -5,6 +5,7 @@ public interface NaverMigrationService { TaskResponseDto startFullMigration(); - TaskResponseDto migrateRegion(Long cortarNo); + TaskResponseDto retryFailedMigrations(); + TaskResponseDto migrateRegion(Long cortarNo); TaskResponseDto getTaskStatus(TaskType TaskType); } diff --git a/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationServiceImpl.java b/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationServiceImpl.java index c7d331ad..c303460f 100644 --- a/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationServiceImpl.java +++ b/crawlerserver/service/src/main/java/com/zipline/service/migration/NaverMigrationServiceImpl.java @@ -1,166 +1,104 @@ package com.zipline.service.migration; -import java.util.List; import java.util.concurrent.CompletableFuture; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.zipline.domain.entity.enums.MigrationStatus; -import com.zipline.domain.entity.enums.Platform; -import com.zipline.domain.entity.migration.Migration; -import com.zipline.domain.entity.naver.NaverRawArticle; -import com.zipline.domain.entity.publicitem.PropertyArticle; import com.zipline.global.exception.migration.MigrationException; import com.zipline.global.exception.migration.errorcode.MigrationErrorCode; import com.zipline.global.exception.task.TaskException; import com.zipline.global.exception.task.errorcode.TaskErrorCode; -import com.zipline.infrastructure.migration.MigrationRepository; import com.zipline.service.task.Task; +import com.zipline.service.task.TaskDefinition; +import com.zipline.service.task.TaskExecutionHandler; import com.zipline.service.task.TaskManager; import com.zipline.service.task.dto.TaskResponseDto; import com.zipline.service.task.enums.TaskType; import com.zipline.infrastructure.naver.NaverRawArticleRepository; -import com.zipline.infrastructure.publicItem.PropertyArticleRepository; import org.springframework.core.task.TaskExecutor; -import org.springframework.data.domain.Page; -import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.transaction.annotation.Transactional; @Slf4j @Service @RequiredArgsConstructor public class NaverMigrationServiceImpl implements NaverMigrationService { - private final TaskManager taskManager; - private final TaskExecutor taskExecutor; - private final MigrationRepository migrationRepository; + private final TaskManager taskManager; private final NaverRawArticleRepository naverRawArticleRepository; - private final PropertyArticleRepository propertyArticleRepository; - private final ObjectMapper objectMapper; - private static final int BATCH_SIZE = 100; + private final TaskExecutionHandler taskExecutionHandler; @Override public TaskResponseDto startFullMigration() { - if (taskManager.isTaskRunning(TaskType.MIGRATION)){ - throw new TaskException(TaskErrorCode.TASK_ALREADY_RUNNING);} - Task task = taskManager.createTask(TaskType.MIGRATION); - try { - CompletableFuture.runAsync(() -> { - executeFullMigrationAsync(task); - }, taskExecutor); - } catch (Exception e) { - log.error("마이그레이션 작업 실행중 오류 발생: {}", e.getMessage(), e); - taskManager.removeTask(TaskType.MIGRATION); - } - taskManager.removeTask(TaskType.MIGRATION); - return TaskResponseDto.fromTask(task); + return taskExecutionHandler.execute( + TaskDefinition.of( + TaskType.MIGRATION, + "전체 지역 마이그레이션", + () -> executeFullMigrationAsync()) + ); } - private void executeFullMigrationAsync(Task task) { - log.info("=== 네이버 원본 매물 데이터 마이그레이션 작업 시작 ==="); - List allRegionCodes = migrationRepository.findAllCortarNos(); - log.info("총 {} 개 지역에 대한 마이그레이션 시작", allRegionCodes.size()); - for (Long cortarNo : allRegionCodes) { - try { - executeRegionMigrationAsync(task, cortarNo); - log.info("지역 {} 마이그레이션 완료", cortarNo); - } catch (Exception e) { - String errorMessage = String.format("지역 %s 마이그레이션 중 오류 발생: %s", cortarNo, e.getMessage()); - log.error("지역 {} 마이그레이션 중 오류 발생: {}", cortarNo, e.getMessage()); - } - } - log.info("=== 네이버 원본 매물 데이터 마이그레이션 작업 완료 ==="); + @Override + public TaskResponseDto migrateRegion(Long regionId) { + return taskExecutionHandler.execute( + TaskDefinition.of( + TaskType.MIGRATION, + "특정 지역 마이그레이션", + () -> executeRegionMigrationAsync(regionId)) + ); } @Override - public TaskResponseDto migrateRegion(Long regionId) { - if (taskManager.isTaskRunning(TaskType.MIGRATION)){ - log.error("마이그레이션 작업이 이미 실행 중입니다."); - throw new TaskException(TaskErrorCode.TASK_ALREADY_RUNNING); - } - Task task = taskManager.createTask(TaskType.MIGRATION, regionId); + public TaskResponseDto retryFailedMigrations() { + return taskExecutionHandler.execute( + TaskDefinition.of( + TaskType.MIGRATION, + "실패한 마이그레이션 재시도", + () -> executeRetryFailedMigrationsAsync()) + ); + } + + @Override + public TaskResponseDto getTaskStatus(TaskType taskName) { + Task task = taskManager.getTaskByType(taskName); + return TaskResponseDto.fromTask(task); + } + + private void executeFullMigrationAsync() { + log.info("=== 전체 지역 마이그레이션 시작 (저장 프로시저 호출) ==="); try { - CompletableFuture.runAsync(() -> { - executeRegionMigrationAsync(task, regionId); - }, taskExecutor); + naverRawArticleRepository.migrateAllPendingArticles(); + log.info("전체 마이그레이션 성공적으로 완료"); } catch (Exception e) { - log.error("지역 {} 마이그레이션 작업 중 오류 발생: {}", regionId, e.getMessage(), e); - // 추후 실패작업 재실행 구현시 삭제 + log.error("전체 마이그레이션 실패: {}", e.getMessage(), e); + throw new MigrationException(MigrationErrorCode.MIGRATION_FAILED); + } finally { taskManager.removeTask(TaskType.MIGRATION); } - taskManager.removeTask(TaskType.MIGRATION); - return TaskResponseDto.fromTask(task); } - private void executeRegionMigrationAsync(Task task, Long regionId) { - log.info("지역 코드 {} 원본 매물 데이터 마이그레이션 시작", regionId); - boolean hasMoreData = true; - int pageNumber = 0; - int failedCount = 0; - - while (hasMoreData) { - PageRequest pageRequest = PageRequest.of(0, BATCH_SIZE); - Page pendingArticle = naverRawArticleRepository.findByCortarNoAndMigrationStatus(regionId, MigrationStatus.PENDING, pageRequest); - int batchSize = pendingArticle.getSize(); - - for (NaverRawArticle rawArticle : pendingArticle) { - try { - migrateRawArticle(rawArticle); - } catch (Exception e) { - failedCount++; - String errorMessage = String.format("지역 코드 %s 마이그레이션 중 오류 발생: 실패 카운트{} message: {}", regionId, failedCount, e.getMessage(), e); - Migration migration = migrationRepository.findByCortarNo(regionId); - migration.errorWithLog(Platform.NAVER, errorMessage, 1000, MigrationStatus.FAILED); - migrationRepository.save(migration); - log.error(errorMessage, e); - } - } - pageNumber++; - hasMoreData = pendingArticle.hasNext(); - // 메모리 관리를 위해 주기적으로 GC 힌트 - if (pageNumber % 10 == 0) { - System.gc(); - } - } - if (failedCount > 0) { - log.error("{}개 지역 마이그레이션 중 오류 발생", failedCount); + private void executeRegionMigrationAsync(Long regionId) { + log.info("지역 코드 {} 마이그레이션 시작 (저장 프로시저 호출)", regionId); + try { + naverRawArticleRepository.migrateArticlesByRegion(regionId); + log.info("지역 코드 {} 마이그레이션 완료", regionId); + } catch (Exception e) { + log.error("지역 코드 {} 마이그레이션 실패: {}", regionId, e.getMessage(), e); throw new MigrationException(MigrationErrorCode.MIGRATION_FAILED); + } finally { + taskManager.removeTask(TaskType.MIGRATION); } - log.info("지역 코드 {} 원본 매물 데이터 마이그레이션 완료", regionId); } - @Transactional - private void migrateRawArticle(NaverRawArticle rawArticle) { - log.info("네이버 원본 매물 데이터 {} 마이그레이션 시작", rawArticle.getArticleId()); + private void executeRetryFailedMigrationsAsync() { + log.info("실패한 마이그레이션 재시도 시작"); try { - JsonNode articleNode = objectMapper.readTree(rawArticle.getRawData()); - String articleId = articleNode.path("atclNo").asText(); - - // 기존 데이터가 있다면 삭제 (ID 재사용 대비) - propertyArticleRepository.deleteByArticleId(articleId); - - // 새롭게 생성 - PropertyArticle newArticle = PropertyArticle.createFromNaverRawArticle(articleNode, String.valueOf(rawArticle.getCortarNo())); - propertyArticleRepository.save(newArticle); - - // 상태 업데이트 - rawArticle.updateMigrationStatus(MigrationStatus.COMPLETED); - naverRawArticleRepository.save(rawArticle); - - log.info("네이버 원본 매물 데이터 {} 마이그레이션 완료", rawArticle.getArticleId()); - + naverRawArticleRepository.retryFailedMigrations(); + log.info("실패한 마이그레이션 재시도 완료"); } catch (Exception e) { - log.error("네이버 원본 매물 데이터 {} 마이그레이션 중 오류 발생", rawArticle.getArticleId(), e); + log.error("실패한 마이그레이션 재시도 실패: {}", e.getMessage(), e); throw new MigrationException(MigrationErrorCode.MIGRATION_FAILED); + } finally { + taskManager.removeTask(TaskType.MIGRATION); } } - - @Override - public TaskResponseDto getTaskStatus(TaskType taskName) { - Task task = taskManager.getTaskByType(taskName); - return TaskResponseDto.fromTask(task); - } } \ No newline at end of file diff --git a/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/DefaultZigBangArticleCrawler.java b/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/DefaultZigBangArticleCrawler.java index 042e5344..9adb743a 100644 --- a/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/DefaultZigBangArticleCrawler.java +++ b/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/DefaultZigBangArticleCrawler.java @@ -26,10 +26,6 @@ public void executeCrawl(Fetcher fetcher) { for (PropertyCategory category : PropertyCategory.supportedCategories()) { for (String geohash : geohashes) { try { - if (isAlreadyCompleted(geohash, category)) { - log.info("이미 완료된 geohash: {}, category: {}", geohash, category); - continue; - } List itemIds = fetchItemIds(fetcher, geohash, category); if (!itemIds.isEmpty()) { fetchAndSaveItemDetails(fetcher, geohash, category, itemIds); diff --git a/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/SharedZigbangCrawler.java b/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/SharedZigbangCrawler.java index a9b62038..7ba58fbe 100644 --- a/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/SharedZigbangCrawler.java +++ b/crawlerserver/service/src/main/java/com/zipline/service/zigbang/crawler/SharedZigbangCrawler.java @@ -16,6 +16,7 @@ import lombok.extern.slf4j.Slf4j; import java.util.*; +import java.util.stream.Collectors; @Slf4j public abstract class SharedZigbangCrawler implements ZigBangArticleCrawler { @@ -37,6 +38,9 @@ public abstract class SharedZigbangCrawler implements ZigBangArticleCrawler { private static final double LAT_STEP = 0.05; private static final double LON_STEP = 0.05; + // 한번에 요청할 아이템 개수 + private static final int ITEMS_PER_REQUEST = 14; + protected final ObjectMapper objectMapper; protected final ZigBangCrawlRepository crawlRepo; protected final ZigBangArticleRepository articleRepo; @@ -101,40 +105,27 @@ protected void updateCrawlComplete(String geohash, PropertyCategory category) { protected void updateCrawlError(String geohash, PropertyCategory category, String message) { ZigBangCrawl crawl = crawlRepo.findById(ZigBangCrawl.buildId(geohash, category)) .orElse(ZigBangCrawl.create(geohash, category)); - crawlRepo.save(crawl.errorWithLog(message, 1000, CrawlStatus.FAILED)); } - protected void randomDelay() { - RandomSleepUtil.sleep(); - } - protected void fetchAndSaveItemDetails(Fetcher fetcher, String geohash, PropertyCategory category, List itemIds) throws Exception { log.info("직방 매물 상세 정보 수집 시작 - geohash: {}, category: {}, 총 건수: {}", geohash, category, itemIds.size()); FetchConfigDTO config = FetchConfigDTO.zigbangPostConfig(); RandomSleepUtil randomSleepUtil = new RandomSleepUtil(); - // 14= 잘라서보내는 요청크기 - for (int i = 0; i < itemIds.size(); i += 14) { - int end = Math.min(i + 14, itemIds.size()); + for (int i = 0; i < itemIds.size(); i += ITEMS_PER_REQUEST) { + int end = Math.min(i + ITEMS_PER_REQUEST, itemIds.size()); List subList = itemIds.subList(i, end); String body = buildJsonBody(subList); log.info("바디 확인" + body); String response = fetcher.fetchPost(ITEM_DETAIL_POST_URL, body, config); if (response != null && !response.isEmpty()) { saveRawArticles(geohash, category, response); - }else if (response.contains("400")) { - log.warn("지오해시 {} / 카테고리 {} 요청 실패 400", geohash, category); - randomSleepUtil.sleepShort(); - }else if (response.contains("307")) { - log.warn("지오해시 {} / 카테고리 {} 리다이렉트 발생 307", geohash, category); - randomSleepUtil.sleepShort(); - }else if (response==null || response.isEmpty()) { + }else{ log.warn("지오해시 {} / 카테고리 {} 요청 실패 → 빈 리스트로 대체", geohash, category); - randomSleepUtil.sleepShort(); } - randomDelay(); + RandomSleepUtil.sleep(); } log.info("직방 매물 상세 정보 수집 완료 - geohash: {}, category: {}", geohash, category); } @@ -178,7 +169,7 @@ protected List fetchItemIds(Fetcher fetcher, String geohash, PropertyCateg log.warn("지오해시 {} / 카테고리 {} 요청 실패 → 빈 리스트로 대체", geohash, category); return Collections.emptyList(); } - randomDelay(); + RandomSleepUtil.sleep(); } log.info("직방 매물 ID 수집 완료 - geohash: {}, category: {}, 총 건수: {}", geohash, category, itemIds.size()); @@ -191,16 +182,42 @@ protected void saveRawArticles(String geohash, PropertyCategory category, String JsonNode itemsNode = rootNode.get("items"); if (itemsNode != null && itemsNode.isArray()) { + List idsToFetch = new ArrayList<>(); + Map nodeMap = new HashMap<>(); + for (JsonNode node : itemsNode) { - Long id = node.get("item_id").asLong(); - ZigBangArticle article = new ZigBangArticle(); - article.create(id.toString(), geohash, category, node.toString()); - articleRepo.save(article); + String id = node.get("item_id").toString(); + idsToFetch.add(id); + nodeMap.put(id, node); } - } + List existingArticles = articleRepo.findAllByArticleIdIn(idsToFetch); + Set existingIds = existingArticles.stream() + .map(ZigBangArticle::getArticleId) + .collect(Collectors.toSet()); + + List articlesToSave = new ArrayList<>(); + for (String id : idsToFetch) { + JsonNode node = nodeMap.get(id); + String dataString = node.toString(); + + ZigBangArticle article; + + if (existingIds.contains(id)) { + article = existingArticles.stream() + .filter(a -> a.getId().equals(id)) + .findFirst() + .orElseThrow(); + article.update(geohash, category, dataString); + } else { + article = new ZigBangArticle(); + article.create(id, geohash, category, dataString); + } + articlesToSave.add(article); + } + articleRepo.saveAll(articlesToSave); + } log.info("직방 매물 상세데이터 저장 완료 - geohash: {}, item_count: {}", geohash, itemsNode.size()); - } catch (Exception e) { log.error("직방 매물 저장 실패 - geohash: {}, 오류: {}", geohash, e.getMessage()); } diff --git a/db/dev/crawler/reset_checksum.sql b/db/dev/crawler/reset_checksum.sql new file mode 100644 index 00000000..df789f86 --- /dev/null +++ b/db/dev/crawler/reset_checksum.sql @@ -0,0 +1,3 @@ +SET SQL_SAFE_UPDATES = 0; +DELETE FROM ziplinedb.DATABASECHANGELOG; +SET SQL_SAFE_UPDATES = 1; \ No newline at end of file diff --git a/docs/manual/Liquebase/Procedure&Function.md b/docs/manual/Liquebase/Procedure&Function.md new file mode 100644 index 00000000..9def2bc9 --- /dev/null +++ b/docs/manual/Liquebase/Procedure&Function.md @@ -0,0 +1,73 @@ +#Liquebase 프로시져 함수 유지보수 가이드 + +### 1. changeset과 id의 중요성 + +- `changeset` 태그에 있는 `author:id` 조합은 Liquibase에서 고유한 식별자로 작동합니다. +- 동일한 `author:id`를 가진 changeset은 한 번만 실행됩니다. +- 따라서 **함수를 수정할 때는 새로운 id를 사용**해야 합니다. + +### 2. 유지보수를 위한 접근 방법 + +#### 방법 1: DROP-CREATE 방식 +```sql +--changeset jungwoo_shin:create-trigger-after-update-v1 endDelimiter:// dbms:mariadb +DROP FUNCTION IF EXISTS MyFunction// +CREATE FUNCTION MyFunction... + +--changeset jungwoo_shin:create-trigger-after-update-v1 endDelimiter:// dbms:mariadb +DROP FUNCTION IF EXISTS MyFunction// +CREATE FUNCTION MyFunction... -- 수정된 버전 +``` +dbms:mariadb +#### 방법 2: runOnChange 속성 사용 +```sql +--changeset jungwoo_shin:create-trigger-after-update-v1 runOnChange:true endDelimiter:// dbms:mariadb +DROP FUNCTION IF EXISTS MyFunction// +CREATE FUNCTION MyFunction... +``` +- `runOnChange:true`를 사용하면 changeset의 내용이 변경되었을 때만 재실행됩니다. +- 이는 함수 내용이 변경되었을 때만 업데이트 되므로 더 효율적입니다. + +#### 방법 3: runAlways 속성 사용(현재 사용중인 방법) +```sql +--changeset jungwoo_shin:create-function runAlways:true dbms:mariadb +DROP FUNCTION IF EXISTS MyFunction; +CREATE FUNCTION MyFunction... +``` +- `runAlways:true`를 사용하면 매번 Liquibase가 실행될 때마다 해당 changeset이 실행됩니다. +- 개발 환경에서는 유용할 수 있지만, 프로덕션 환경에서는 주의해서 사용해야 합니다. + +### 3. 버전 관리 전략 + +함수나 프로시저를 업데이트할 때는 다음과 같은 버전 관리 전략을 고려할 수 있습니다: + +(현재 사용중인 방법) +1. **버전 번호를 id에 포함시키기**: + ```sql + --changeset jungwoo_shin:function-safe-to-double-v1 dbms:mariadb + --changeset jungwoo_shin:function-safe-to-double-v2 dbms:mariadb + ``` + +2. **날짜를 id에 포함시키기**: + ```sql + --changeset jungwoo_shin:function-safe-to-double-20250513 dbms:mariadb + ``` + +### 4. 최종 권장 방식 +```sql +--liquibase formatted sql +--changeset jungwoo_shin:function-safe-to-double-v1 dbms:mariadb runOnChange:true +DROP FUNCTION IF EXISTS SafeToDouble; + +CREATE FUNCTION SafeToDouble(str VARCHAR(255)) +RETURNS DOUBLE DETERMINISTIC +BEGIN + -- 함수 내 +용 +END; +``` + +이렇게 설정하면: +1. 함수 변경 시 자동으로 업데이트됩니다 +2. 변경이 없으면 실행되지 않습니다 +3. 버전 히스토리가 명확히 추적됩니다 \ No newline at end of file diff --git a/docs/manual/Liquebase/Table.md b/docs/manual/Liquebase/Table.md new file mode 100644 index 00000000..3d649936 --- /dev/null +++ b/docs/manual/Liquebase/Table.md @@ -0,0 +1,124 @@ +# Liquibase에서의 테이블 관리 가이드 + +## 테이블 생성 및 변경 관리 + +### 1. 테이블 생성 방식 + +테이블 생성은 `DROP TABLE`을 사용하지 않고 `CREATE TABLE IF NOT EXISTS`를 사용하는 것이 안전합니다: + +```sql +--changeset author:create-table-v1 runOnChange:true dbms:mariadb +CREATE TABLE IF NOT EXISTS table_name ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + /* 기타 컬럼 */ +); +``` + +### 2. 테이블 수정 방식 + +테이블 구조를 변경할 때는 `ALTER TABLE` 명령을 사용합니다: + +#### 2.1 컬럼 추가 +```sql +--changeset author:add-column-v1 runOnChange:false dbms:mariadb +ALTER TABLE table_name +ADD COLUMN IF NOT EXISTS new_column VARCHAR(100); +``` + +#### 2.2 컬럼 수정 +```sql +--changeset author:modify-column-v1 runOnChange:false dbms:mariadb +ALTER TABLE table_name +MODIFY COLUMN existing_column VARCHAR(200) NOT NULL; +``` + +#### 2.3 컬럼 삭제 (주의 필요) +```sql +--changeset author:drop-column-v1 runOnChange:false dbms:mariadb +ALTER TABLE table_name +DROP COLUMN old_column; +``` + +### 3. 인덱스 관리 + +인덱스도 데이터 보존을 위해 안전하게 관리해야 합니다: + +```sql +--changeset author:add-index-v1 runOnChange:false dbms:mariadb +CREATE INDEX IF NOT EXISTS idx_column_name ON table_name (column_name); +``` + +## 새로운 테이블 버전 관리 전략 + +### 전략 1: 작은 변경 사항은 ALTER TABLE 사용 +- 컬럼 추가/수정/삭제와 같은 작은 변경사항은 `ALTER TABLE` 사용 + +### 전략 2: 대규모 변경은 임시 테이블 사용 +대규모 구조 변경이 필요할 경우: + +```sql +--changeset author:table-restructure-v1 runOnChange:false dbms:mariadb +-- 1. 임시 테이블 생성 +CREATE TABLE table_name_new ( + /* 새로운 구조 */ +); + +-- 2. 데이터 복사 +INSERT INTO table_name_new (/*컬럼들*/) +SELECT /*변환된 컬럼들*/ FROM table_name; + +-- 3. 테이블 교체 +RENAME TABLE table_name TO table_name_old, + table_name_new TO table_name; + +-- 4. 이전 테이블 삭제 (백업 후) +-- DROP TABLE table_name_old; +``` + +## 데이터 마이그레이션 관리 + +### 데이터 초기 로드 +```sql +--changeset author:data-load-v1 runOnChange:false dbms:mariadb +INSERT INTO table_name (col1, col2) +VALUES ('value1', 'value2'), + ('value3', 'value4'); +``` + +### 조건부 데이터 변경 +```sql +--changeset author:update-data-v1 runOnChange:false dbms:mariadb +UPDATE table_name +SET col1 = '새값' +WHERE col2 = '조건값'; +``` + +## Liquibase 속성 사용 팁 + +### runOnChange vs. runAlways + +- **runOnChange:true** - 스크립트 내용이 변경된 경우에만 실행됨 + - 테이블 생성에 적합: `CREATE TABLE IF NOT EXISTS` + +- **runAlways:false** (기본값) - 한 번만 실행됨 + - 데이터 변경에 적합: `ALTER TABLE`, `INSERT`, `UPDATE` + +### context 속성 활용 + +특정 환경에서만 실행되도록 설정: + +```sql +--changeset author:id context:dev,test +``` + +이렇게 하면 개발/테스트 환경에서만 실행되는 변경사항을 정의할 수 있습니다. + +## 테이블 변경 모범 사례 + +1. **스키마 변경은 별도 파일로 관리**: 각 변경사항을 개별 SQL 파일로 관리 +2. **의미 있는 changeset ID 사용**: `create-user-table`, `add-email-column-to-user` 등 +3. **주석 활용**: 복잡한 변경의 이유를 주석으로 기록 +4. **트랜잭션 고려**: 대규모 데이터 변경은 트랜잭션 관리 필요 +5. **롤백 계획 수립**: 변경사항 실패 시 롤백 방법 미리 준비 + +이 가이드를 따르면 데이터 손실 없이 안전하게 테이블을 관리할 수 있습니다. \ No newline at end of file