""" 招聘信息测试数据生成器 生成符合校验规则的招聘信息测试数据并保存到Excel文件 """ import random import string from datetime import datetime, timedelta from openpyxl import Workbook from openpyxl.styles import Font, Alignment, PatternFill # 数据配置 RECRUIT_COUNT = 2000 # 生成数据条数 # 招聘项目名称列表 RECRUIT_NAMES = [ "2025春季校园招聘", "2025秋季校园招聘", "2025社会招聘", "2025技术专项招聘", "2025管培生招聘", "2025实习生招聘", "2025高端人才引进", "2025春季研发岗招聘", "2025夏季校园招聘", "2025冬季校园招聘", "2025春季销售岗招聘", "2025秋季市场岗招聘", "2025春季运营岗招聘", "2025秋季产品岗招聘", "2025春季客服岗招聘", "2025秋季人事岗招聘" ] # 职位名称列表 POSITION_NAMES = [ "Java开发工程师", "Python开发工程师", "前端开发工程师", "后端开发工程师", "全栈工程师", "算法工程师", "数据分析师", "产品经理", "UI设计师", "测试工程师", "运维工程师", "架构师", "软件工程师", "系统分析师", "数据库管理员", "网络工程师", "移动端开发工程师", "嵌入式开发工程师", "大数据工程师", "人工智能工程师" ] # 职位类别 POSITION_CATEGORIES = [ "技术类", "产品类", "设计类", "运营类", "市场类", "销售类", "客服类", "人事类", "财务类", "行政类", "管理类", "研发类" ] # 职位描述模板 POSITION_DESCS = [ "负责公司核心业务系统的设计和开发,要求熟悉相关技术栈,具备良好的编码规范和团队协作能力。", "参与产品需求分析和技术方案设计,负责模块开发和维护,优化系统性能,保障系统稳定性。", "负责系统架构设计和技术选型,解决技术难题,指导团队成员开发,推动技术创新。", "负责数据采集、清洗、分析和可视化,为业务决策提供数据支持,优化业务流程。", "负责产品规划、需求分析和产品设计,协调研发、测试、运营等团队,推动产品落地。", "负责用户界面设计和用户体验优化,与产品经理和开发团队协作,确保设计还原度。", "负责系统测试和质量保障,编写测试用例,执行测试,跟踪缺陷,保障产品质量。", "负责系统运维和监控,保障系统稳定运行,优化系统性能,处理故障和应急响应。" ] # 常见姓氏和名字 SURNAMES = ["王", "李", "张", "刘", "陈", "杨", "黄", "赵", "周", "吴", "徐", "孙", "马", "朱", "胡", "郭", "何", "高", "林", "罗"] GIVEN_NAMES = ["伟", "芳", "娜", "敏", "静", "丽", "强", "磊", "军", "洋", "勇", "艳", "杰", "娟", "涛", "明", "超", "秀英", "华", "英"] # 学历列表 EDUCATIONS = ["本科", "硕士", "博士", "大专", "高中"] # 毕业院校列表 UNIVERSITIES = [ "清华大学", "北京大学", "复旦大学", "上海交通大学", "浙江大学", "中国科学技术大学", "南京大学", "中山大学", "华中科技大学", "哈尔滨工业大学", "西安交通大学", "北京理工大学", "中国人民大学", "北京航空航天大学", "同济大学", "南开大学", "天津大学", "东南大学", "武汉大学", "厦门大学", "山东大学", "四川大学", "吉林大学", "中南大学", "华南理工大学", "西北工业大学", "华东师范大学", "北京师范大学", "重庆大学" ] # 专业列表 MAJORS = [ "计算机科学与技术", "软件工程", "人工智能", "数据科学与大数据技术", "物联网工程", "电子信息工程", "通信工程", "自动化", "电气工程及其自动化", "机械工程", "材料科学与工程", "化学工程与工艺", "生物工程", "环境工程", "土木工程", "数学与应用数学", "统计学", "物理学", "化学", "生物学", "工商管理", "市场营销", "会计学", "金融学", "国际经济与贸易", "人力资源管理", "公共事业管理", "行政管理", "法学", "汉语言文学", "英语", "日语", "新闻传播学", "广告学", "艺术设计" ] # 录用状态 ADMIT_STATUSES = ["录用", "未录用", "放弃"] # 面试官姓名和工号 INTERVIEWERS = [ ("张伟", "INT001"), ("李芳", "INT002"), ("王磊", "INT003"), ("刘娜", "INT004"), ("陈军", "INT005"), ("杨静", "INT006"), ("黄勇", "INT007"), ("赵丽", "INT008"), ("周涛", "INT009"), ("吴明", "INT010"), ("徐超", "INT011"), ("孙杰", "INT012"), ("马娟", "INT013"), ("朱华", "INT014"), ("胡英", "INT015"), ("郭强", "INT016") ] def generate_chinese_name(): """生成中文姓名""" surname = random.choice(SURNAMES) # 50%概率双字名,50%概率单字名 if random.random() > 0.5: given_name = random.choice(GIVEN_NAMES) + random.choice(GIVEN_NAMES) else: given_name = random.choice(GIVEN_NAMES) return surname + given_name def generate_id_number(): """生成18位身份证号码""" # 地区码(前6位) area_code = f"{random.randint(110000, 659001):06d}" # 出生日期(8位) - 生成1990-2005年的出生日期 birth_year = random.randint(1990, 2005) birth_month = f"{random.randint(1, 12):02d}" birth_day = f"{random.randint(1, 28):02d}" birth_date = f"{birth_year}{birth_month}{birth_day}" # 顺序码(3位) sequence_code = f"{random.randint(1, 999):03d}" # 前17位 id_17 = area_code + birth_date + sequence_code # 计算校验码(最后1位) weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2'] total = sum(int(id_17[i]) * weights[i] for i in range(17)) check_code = check_codes[total % 11] return id_17 + check_code def generate_graduation_date(): """生成毕业年月(YYYYMM格式)""" # 生成2020-2030年之间的毕业年月 year = random.randint(2020, 2030) month = f"{random.randint(1, 12):02d}" return f"{year}{month}" def generate_recruitment_data(start_index): """生成招聘测试数据""" data = [] for i in range(start_index, start_index + RECRUIT_COUNT): # 生成招聘项目编号 recruit_id = f"REC{datetime.now().strftime('%Y%m%d')}{i:06d}" # 选择面试官(50%概率有两个面试官,50%概率只有一个) if random.random() > 0.5: interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS) interviewer2_name, interviewer2_id = random.choice(INTERVIEWERS) else: interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS) interviewer2_name = "" interviewer2_id = "" row_data = [ recruit_id, # 招聘项目编号 random.choice(RECRUIT_NAMES), # 招聘项目名称 random.choice(POSITION_NAMES), # 职位名称 random.choice(POSITION_CATEGORIES), # 职位类别 random.choice(POSITION_DESCS), # 职位描述 generate_chinese_name(), # 应聘人员姓名 random.choice(EDUCATIONS), # 应聘人员学历 generate_id_number(), # 应聘人员证件号码 random.choice(UNIVERSITIES), # 应聘人员毕业院校 random.choice(MAJORS), # 应聘人员专业 generate_graduation_date(), # 应聘人员毕业年月 random.choice(ADMIT_STATUSES), # 录用情况 interviewer1_name, # 面试官1姓名 interviewer1_id, # 面试官1工号 interviewer2_name, # 面试官2姓名 interviewer2_id # 面试官2工号 ] data.append(row_data) return data def create_excel(data, filename): """创建Excel文件""" wb = Workbook() ws = wb.active ws.title = "招聘信息" # 表头 headers = [ "招聘项目编号", "招聘项目名称", "职位名称", "职位类别", "职位描述", "应聘人员姓名", "应聘人员学历", "应聘人员证件号码", "应聘人员毕业院校", "应聘人员专业", "应聘人员毕业年月", "录用情况", "面试官1姓名", "面试官1工号", "面试官2姓名", "面试官2工号" ] # 写入表头 ws.append(headers) # 设置表头样式 header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") header_font = Font(bold=True, color="FFFFFF") for col_num, header in enumerate(headers, 1): cell = ws.cell(row=1, column=col_num) cell.fill = header_fill cell.font = header_font cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) # 写入数据 for row_data in data: ws.append(row_data) # 设置列宽 column_widths = [20, 20, 20, 15, 30, 15, 15, 20, 20, 15, 15, 10, 15, 15, 15, 15] for col_num, width in enumerate(column_widths, 1): ws.column_dimensions[chr(64 + col_num)].width = width # 设置所有单元格居中对齐 for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=ws.max_column): for cell in row: cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) # 保存文件 wb.save(filename) print(f"✓ 已生成文件: {filename}") print(f" 数据行数: {len(data)}") def main(): """主函数""" print("=" * 70) print("招聘信息测试数据生成器") print("=" * 70) # 检查是否安装了openpyxl try: import openpyxl except ImportError: print("✗ 未安装openpyxl库,正在安装...") import subprocess subprocess.check_call(["pip", "install", "openpyxl"]) print("✓ openpyxl库安装成功") print(f"\n配置信息:") print(f" - 生成数据量: {RECRUIT_COUNT} 条/文件") print(f" - 生成文件数: 2 个") print(f" - 总数据量: {RECRUIT_COUNT * 2} 条") print(f"\n开始生成数据...") # 生成第一个文件 print(f"\n正在生成第1个文件...") data1 = generate_recruitment_data(1) filename1 = "doc/test-data/recruitment/recruitment_test_data_2000_1.xlsx" create_excel(data1, filename1) # 生成第二个文件 print(f"\n正在生成第2个文件...") data2 = generate_recruitment_data(RECRUIT_COUNT + 1) filename2 = "doc/test-data/recruitment/recruitment_test_data_2000_2.xlsx" create_excel(data2, filename2) print("\n" + "=" * 70) print("✓ 所有文件生成完成!") print("=" * 70) print(f"\n生成的文件:") print(f" 1. {filename1}") print(f" 2. {filename2}") print(f"\n数据统计:") print(f" - 总数据量: {RECRUIT_COUNT * 2} 条") print(f" - 文件1: {len(data1)} 条") print(f" - 文件2: {len(data2)} 条") if __name__ == "__main__": main()