From 96f7a5e3321b309b5fd23530733cf4e82cb5bd41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E7=8F=82?= <2400017432@stu.pku.edu.cn> Date: Thu, 29 Jan 2026 05:33:54 +0000 Subject: [PATCH 1/2] feat: add person status check command --- dm/management/commands/check_persons.py | 109 ++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 dm/management/commands/check_persons.py diff --git a/dm/management/commands/check_persons.py b/dm/management/commands/check_persons.py new file mode 100644 index 00000000..f87de624 --- /dev/null +++ b/dm/management/commands/check_persons.py @@ -0,0 +1,109 @@ +import pandas as pd +from django.core.management.base import BaseCommand +from django.db import transaction +from app.models import NaturalPerson, User + +class Command(BaseCommand): + help = '检查并修正数据库人员状态与给定的Excel表是否一致' + + def add_arguments(self, parser): + parser.add_argument('excel_path', type=str, help='Excel文件的路径(包含姓名、学号、状态)') + parser.add_argument( + '--dry-run', + action='store_true', + help='只显示差异和预期操作,不实际修改数据库' + ) + + def handle(self, *args, **options): + excel_path = options['excel_path'] + is_dry_run = options['dry_run'] + + if is_dry_run: + self.stdout.write(self.style.NOTICE(">>> 当前模式:[DRY RUN] 只读检查,不会写入数据库 <<<")) + + # 1. 状态映射配置 + # 根据 show_info 定义:在校 -> UNDERGRADUATED, 毕业 -> GRADUATED + STATUS_MAP = { + "在校": NaturalPerson.GraduateStatus.UNDERGRADUATED, + "毕业": NaturalPerson.GraduateStatus.GRADUATED, + # TODO: 未来可扩展映射关系,如: + # "休学": NaturalPerson.GraduateStatus.SUSPENDED, + } + + # 2. 读取 Excel + try: + df = pd.read_excel(excel_path) + except Exception as e: + self.stderr.write(f"读取文件失败: {e}") + return + + # 3. 初始化统计指标 + stats = { + "total": len(df), + "not_found": 0, + "mismatch": 0, + "updated": 0, + "unknown_status": 0, + "matched": 0 + } + + # 4. 使用原子事务包裹全过程,确保数据治理的安全性 + with transaction.atomic(): + for index, row in df.iterrows(): + name = str(row['姓名']).strip() + stu_id = str(row['学号']).strip() + excel_status_str = str(row['状态']).strip() + + # A. 查找人员 (YPPF 逻辑:NaturalPerson.person_id 是关联 User.username 的 OneToOne) + person = NaturalPerson.objects.filter( + name=name, + person_id__username=stu_id + ).first() + + if not person: + self.stdout.write(self.style.WARNING(f" [缺失] 第{index+2}行找不到人员: {name} ({stu_id})")) + stats["not_found"] += 1 + continue + + # B. 解析数据库当前状态 + # 利用 models.py 里 show_info 的逻辑或 get_xxx_display + db_status_str = "在校" if person.status == NaturalPerson.GraduateStatus.UNDERGRADUATED else "已毕业" + + # C. 比对状态 + if db_status_str == excel_status_str: + stats["matched"] += 1 + continue + + # D. 发现不一致 + stats["mismatch"] += 1 + target_code = STATUS_MAP.get(excel_status_str) + + if target_code is None: + self.stdout.write(self.style.NOTICE(f" [未知] 第 {index+2} 行无法映射Excel状态: '{excel_status_str}' (姓名: {name})")) + stats["unknown_status"] += 1 + continue + + # E. 执行或模拟修正 + self.stdout.write(self.style.ERROR( + f" [差异] 第 {index+2} 行 {name} ({stu_id}): 数据库({db_status_str}) -> Excel({excel_status_str})" + )) + + if not is_dry_run: + person.status = target_code + person.save() + stats["updated"] += 1 + + # 5. 打印运维报告 + self.stdout.write("\n" + "="*30) + self.stdout.write(" 数据核对运维报告") + self.stdout.write("="*30) + self.stdout.write(f" 总处理行数: {stats['total']}") + self.stdout.write(f" 状态完全一致: {stats['matched']}") + self.stdout.write(f" [待修正] 状态不一致: {stats['mismatch']}") + self.stdout.write(f" [异常] 数据库查无此人: {stats['not_found']}") + self.stdout.write(f" [异常] Excel状态无法识别: {stats['unknown_status']}") + + if is_dry_run: + self.stdout.write(self.style.SUCCESS(f"\n预览完毕,预计修正: {stats['updated'] + (stats['mismatch'] - stats['unknown_status'])} 条数据")) + else: + self.stdout.write(self.style.SUCCESS(f"\n数据库修正成功,实际更新: {stats['updated']} 条数据")) \ No newline at end of file From c44b0045219e31782756ac6383c932c39f4b81c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E7=8F=82?= <2400017432@stu.pku.edu.cn> Date: Mon, 9 Feb 2026 03:18:46 +0000 Subject: [PATCH 2/2] feat: add person status check command with transaction safety --- dm/management/commands/check_persons.py | 234 ++++++++++++++++-------- 1 file changed, 160 insertions(+), 74 deletions(-) diff --git a/dm/management/commands/check_persons.py b/dm/management/commands/check_persons.py index f87de624..4694c4c7 100644 --- a/dm/management/commands/check_persons.py +++ b/dm/management/commands/check_persons.py @@ -1,109 +1,195 @@ import pandas as pd +import os +import sys from django.core.management.base import BaseCommand from django.db import transaction from app.models import NaturalPerson, User +# 自定义异常,用于在用户拒绝确认时回滚事务 + + +class DryRunRollback(Exception): + pass + class Command(BaseCommand): - help = '检查并修正数据库人员状态与给定的Excel表是否一致' + help = '检查并修正数据库人员状态,支持Diff导出与事务回滚' def add_arguments(self, parser): - parser.add_argument('excel_path', type=str, help='Excel文件的路径(包含姓名、学号、状态)') + parser.add_argument('excel_path', type=str, + help='Excel文件的路径(需包含列:姓名、学号、状态)') parser.add_argument( - '--dry-run', + '--no-confirm', action='store_true', - help='只显示差异和预期操作,不实际修改数据库' + help='无需确认直接执行' ) def handle(self, *args, **options): excel_path = options['excel_path'] - is_dry_run = options['dry_run'] - - if is_dry_run: - self.stdout.write(self.style.NOTICE(">>> 当前模式:[DRY RUN] 只读检查,不会写入数据库 <<<")) + skip_confirm = options['no_confirm'] - # 1. 状态映射配置 - # 根据 show_info 定义:在校 -> UNDERGRADUATED, 毕业 -> GRADUATED + # ================= 配置区域 ================= + # 1. 状态映射配置:根据 app/models.py 中的定义进行完善 + # 依据:models.py 中 GraduateStatus 的定义,未来可更新更多状态 STATUS_MAP = { "在校": NaturalPerson.GraduateStatus.UNDERGRADUATED, "毕业": NaturalPerson.GraduateStatus.GRADUATED, - # TODO: 未来可扩展映射关系,如: - # "休学": NaturalPerson.GraduateStatus.SUSPENDED, } # 2. 读取 Excel + if not os.path.exists(excel_path): + self.stderr.write(self.style.ERROR(f"文件不存在: {excel_path}")) + return + try: - df = pd.read_excel(excel_path) + df = pd.read_excel(excel_path, dtype={'学号': str}) except Exception as e: - self.stderr.write(f"读取文件失败: {e}") + self.stderr.write(f"读取 Excel 失败: {e}") return - # 3. 初始化统计指标 + # 3. 初始化统计与记录 + diff_records = [] stats = { "total": len(df), - "not_found": 0, - "mismatch": 0, + "processed": 0, "updated": 0, + "not_found": 0, "unknown_status": 0, - "matched": 0 + "unchanged": 0 } - # 4. 使用原子事务包裹全过程,确保数据治理的安全性 - with transaction.atomic(): - for index, row in df.iterrows(): - name = str(row['姓名']).strip() - stu_id = str(row['学号']).strip() - excel_status_str = str(row['状态']).strip() - - # A. 查找人员 (YPPF 逻辑:NaturalPerson.person_id 是关联 User.username 的 OneToOne) - person = NaturalPerson.objects.filter( - name=name, - person_id__username=stu_id - ).first() - - if not person: - self.stdout.write(self.style.WARNING(f" [缺失] 第{index+2}行找不到人员: {name} ({stu_id})")) - stats["not_found"] += 1 - continue - - # B. 解析数据库当前状态 - # 利用 models.py 里 show_info 的逻辑或 get_xxx_display - db_status_str = "在校" if person.status == NaturalPerson.GraduateStatus.UNDERGRADUATED else "已毕业" - - # C. 比对状态 - if db_status_str == excel_status_str: - stats["matched"] += 1 - continue - - # D. 发现不一致 - stats["mismatch"] += 1 - target_code = STATUS_MAP.get(excel_status_str) - - if target_code is None: - self.stdout.write(self.style.NOTICE(f" [未知] 第 {index+2} 行无法映射Excel状态: '{excel_status_str}' (姓名: {name})")) - stats["unknown_status"] += 1 - continue - - # E. 执行或模拟修正 - self.stdout.write(self.style.ERROR( - f" [差异] 第 {index+2} 行 {name} ({stu_id}): 数据库({db_status_str}) -> Excel({excel_status_str})" - )) - - if not is_dry_run: + self.stdout.write(self.style.NOTICE( + f">>> 正在读取 {len(df)} 条数据,准备计算差异...")) + self.stdout.write(self.style.NOTICE(">>> 数据库连接已建立,事务开启中...")) + + # 4. 开启原子事务 + try: + with transaction.atomic(): + for index, row in df.iterrows(): + stats["processed"] += 1 + + # 数据清洗 + name = str(row.get('姓名', '')).strip() + stu_id = str(row.get('学号', '')).strip() + excel_status_str = str(row.get('状态', '')).strip() + + if not name or not stu_id: + self.stdout.write(self.style.WARNING( + f" [跳过] 第{index+2}行数据不完整")) + continue + + # A. 查找人员 + # 优先使用学号查找,双重校验姓名 + person = NaturalPerson.objects.filter( + person_id__username=stu_id).first() + + if not person: + # 尝试用姓名查找 + self.stdout.write(self.style.WARNING( + f" [缺失] 第{index+2}行: 学号 {stu_id} 未找到用户")) + stats["not_found"] += 1 + continue + + if person.name != name: + self.stdout.write(self.style.WARNING( + f" [疑义] 第{index+2}行: 学号 {stu_id} 数据库名为 '{person.name}',Excel名为 '{name}'。以学号为准继续。")) + + # B. 获取旧状态 + old_status_code = person.status + old_status_str = person.get_status_display() + + # C. 目标状态转换 + target_code = STATUS_MAP.get(excel_status_str) + + if target_code is None: + self.stdout.write(self.style.ERROR( + f" [未知状态] 第{index+2}行: '{excel_status_str}' 不在映射表中")) + stats["unknown_status"] += 1 + continue + + # D. 比对状态 + if old_status_code == target_code: + stats["unchanged"] += 1 + continue + + # E. 模拟修改 + # 此时修改仅在事务内存中 person.status = target_code - person.save() + person.save(update_fields=['status']) + stats["updated"] += 1 - # 5. 打印运维报告 - self.stdout.write("\n" + "="*30) - self.stdout.write(" 数据核对运维报告") - self.stdout.write("="*30) - self.stdout.write(f" 总处理行数: {stats['total']}") - self.stdout.write(f" 状态完全一致: {stats['matched']}") - self.stdout.write(f" [待修正] 状态不一致: {stats['mismatch']}") - self.stdout.write(f" [异常] 数据库查无此人: {stats['not_found']}") - self.stdout.write(f" [异常] Excel状态无法识别: {stats['unknown_status']}") - - if is_dry_run: - self.stdout.write(self.style.SUCCESS(f"\n预览完毕,预计修正: {stats['updated'] + (stats['mismatch'] - stats['unknown_status'])} 条数据")) + # F. 记录差异 + diff_records.append({ + "行号": index + 2, + "学号": stu_id, + "姓名": person.name, + "原状态": old_status_str, + "新状态": excel_status_str, + }) + + # ================= 确认环节 ================= + + self.stdout.write("\n" + "="*50) + self.stdout.write(" 处理统计:") + self.stdout.write(f" 总行数: {stats['total']}") + self.stdout.write(f" 需更新: {stats['updated']} (差异)") + self.stdout.write(f" 无变化: {stats['unchanged']}") + self.stdout.write(f" 未找到: {stats['not_found']}") + self.stdout.write("="*50) + + if not diff_records: + self.stdout.write(self.style.SUCCESS("🎉 所有数据与数据库一致,无需变更。")) + return # 正常退出,事务会自动提交,但因为没改数据所以无所谓 + + # 导出 Diff 报表到 raw_data + output_dir = "raw_data" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + diff_file = os.path.join(output_dir, "status_diff_report.xlsx") + pd.DataFrame(diff_records).to_excel(diff_file, index=False) + + self.stdout.write(self.style.SUCCESS( + f"\n📄 详细差异表已生成: {diff_file}")) + self.stdout.write(self.style.SUCCESS( + "请务必检查 Excel 中的变更是否符合预期!")) + + # 终端预览前 10 条 + self.stdout.write("\n变更预览 (Top 10):") + print(f"{'姓名':<10} | {'学号':<12} | {'原状态':<10} -> {'新状态'}") + print("-" * 50) + for rec in diff_records[:10]: + print( + f"{rec['姓名']:<10} | {rec['学号']:<12} | {rec['原状态']:<10} -> {rec['新状态']}") + + if len(diff_records) > 10: + print(f"... 以及其他 {len(diff_records)-10} 条 ...") + + # 最终确认逻辑 + if skip_confirm: + self.stdout.write(self.style.WARNING( + "\n>>> 参数指定跳过确认,正在提交事务...")) + else: + self.stdout.write(self.style.ERROR( + "\n⚠️ 警告:该操作将修改线上数据库!")) + confirm = input("请输入 'YES' (大写) 确认提交,输入其他任意内容回滚: ") + + if confirm != 'YES': + raise DryRunRollback("用户取消操作") + + except DryRunRollback: + self.stdout.write(self.style.SUCCESS( + "\n🛑 操作已取消,数据库状态已回滚,未发生任何变更。")) + + except Exception as e: + # 捕获其他未知代码错误,打印堆栈并回滚 + import traceback + traceback.print_exc() + self.stdout.write(self.style.ERROR(f"\n❌ 发生程序错误: {e}")) + self.stdout.write(self.style.ERROR("事务已自动回滚。")) + sys.exit(1) + else: - self.stdout.write(self.style.SUCCESS(f"\n数据库修正成功,实际更新: {stats['updated']} 条数据")) \ No newline at end of file + # 只有当 try 块成功执行到底(即用户输入YES且无报错)时执行 + self.stdout.write(self.style.SUCCESS( + f"\n✅ 事务提交成功!已更新 {stats['updated']} 条人员状态。"))