- 添加异步导入服务接口和实现 - 创建导入失败记录VO类 - 添加导入设计文档和测试数据生成脚本 - 支持大批量招聘数据的异步处理 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
272 lines
11 KiB
Python
272 lines
11 KiB
Python
"""
|
|
招聘信息测试数据生成器
|
|
生成符合校验规则的招聘信息测试数据并保存到Excel文件
|
|
"""
|
|
|
|
import random
|
|
import string
|
|
from datetime import datetime, timedelta
|
|
from openpyxl import Workbook
|
|
from openpyxl.styles import Font, Alignment, PatternFill
|
|
|
|
# 数据配置
|
|
RECRUIT_COUNT = 2000 # 生成数据条数
|
|
|
|
# 招聘项目名称列表
|
|
RECRUIT_NAMES = [
|
|
"2025春季校园招聘", "2025秋季校园招聘", "2025社会招聘", "2025技术专项招聘",
|
|
"2025管培生招聘", "2025实习生招聘", "2025高端人才引进", "2025春季研发岗招聘",
|
|
"2025夏季校园招聘", "2025冬季校园招聘", "2025春季销售岗招聘", "2025秋季市场岗招聘",
|
|
"2025春季运营岗招聘", "2025秋季产品岗招聘", "2025春季客服岗招聘", "2025秋季人事岗招聘"
|
|
]
|
|
|
|
# 职位名称列表
|
|
POSITION_NAMES = [
|
|
"Java开发工程师", "Python开发工程师", "前端开发工程师", "后端开发工程师",
|
|
"全栈工程师", "算法工程师", "数据分析师", "产品经理",
|
|
"UI设计师", "测试工程师", "运维工程师", "架构师",
|
|
"软件工程师", "系统分析师", "数据库管理员", "网络工程师",
|
|
"移动端开发工程师", "嵌入式开发工程师", "大数据工程师", "人工智能工程师"
|
|
]
|
|
|
|
# 职位类别
|
|
POSITION_CATEGORIES = [
|
|
"技术类", "产品类", "设计类", "运营类",
|
|
"市场类", "销售类", "客服类", "人事类",
|
|
"财务类", "行政类", "管理类", "研发类"
|
|
]
|
|
|
|
# 职位描述模板
|
|
POSITION_DESCS = [
|
|
"负责公司核心业务系统的设计和开发,要求熟悉相关技术栈,具备良好的编码规范和团队协作能力。",
|
|
"参与产品需求分析和技术方案设计,负责模块开发和维护,优化系统性能,保障系统稳定性。",
|
|
"负责系统架构设计和技术选型,解决技术难题,指导团队成员开发,推动技术创新。",
|
|
"负责数据采集、清洗、分析和可视化,为业务决策提供数据支持,优化业务流程。",
|
|
"负责产品规划、需求分析和产品设计,协调研发、测试、运营等团队,推动产品落地。",
|
|
"负责用户界面设计和用户体验优化,与产品经理和开发团队协作,确保设计还原度。",
|
|
"负责系统测试和质量保障,编写测试用例,执行测试,跟踪缺陷,保障产品质量。",
|
|
"负责系统运维和监控,保障系统稳定运行,优化系统性能,处理故障和应急响应。"
|
|
]
|
|
|
|
# 常见姓氏和名字
|
|
SURNAMES = ["王", "李", "张", "刘", "陈", "杨", "黄", "赵", "周", "吴", "徐", "孙", "马", "朱", "胡", "郭", "何", "高", "林", "罗"]
|
|
GIVEN_NAMES = ["伟", "芳", "娜", "敏", "静", "丽", "强", "磊", "军", "洋", "勇", "艳", "杰", "娟", "涛", "明", "超", "秀英", "华", "英"]
|
|
|
|
# 学历列表
|
|
EDUCATIONS = ["本科", "硕士", "博士", "大专", "高中"]
|
|
|
|
# 毕业院校列表
|
|
UNIVERSITIES = [
|
|
"清华大学", "北京大学", "复旦大学", "上海交通大学", "浙江大学", "中国科学技术大学",
|
|
"南京大学", "中山大学", "华中科技大学", "哈尔滨工业大学", "西安交通大学", "北京理工大学",
|
|
"中国人民大学", "北京航空航天大学", "同济大学", "南开大学", "天津大学", "东南大学",
|
|
"武汉大学", "厦门大学", "山东大学", "四川大学", "吉林大学", "中南大学",
|
|
"华南理工大学", "西北工业大学", "华东师范大学", "北京师范大学", "重庆大学"
|
|
]
|
|
|
|
# 专业列表
|
|
MAJORS = [
|
|
"计算机科学与技术", "软件工程", "人工智能", "数据科学与大数据技术", "物联网工程",
|
|
"电子信息工程", "通信工程", "自动化", "电气工程及其自动化", "机械工程",
|
|
"材料科学与工程", "化学工程与工艺", "生物工程", "环境工程", "土木工程",
|
|
"数学与应用数学", "统计学", "物理学", "化学", "生物学",
|
|
"工商管理", "市场营销", "会计学", "金融学", "国际经济与贸易",
|
|
"人力资源管理", "公共事业管理", "行政管理", "法学", "汉语言文学",
|
|
"英语", "日语", "新闻传播学", "广告学", "艺术设计"
|
|
]
|
|
|
|
# 录用状态
|
|
ADMIT_STATUSES = ["录用", "未录用", "放弃"]
|
|
|
|
# 面试官姓名和工号
|
|
INTERVIEWERS = [
|
|
("张伟", "INT001"), ("李芳", "INT002"), ("王磊", "INT003"), ("刘娜", "INT004"),
|
|
("陈军", "INT005"), ("杨静", "INT006"), ("黄勇", "INT007"), ("赵丽", "INT008"),
|
|
("周涛", "INT009"), ("吴明", "INT010"), ("徐超", "INT011"), ("孙杰", "INT012"),
|
|
("马娟", "INT013"), ("朱华", "INT014"), ("胡英", "INT015"), ("郭强", "INT016")
|
|
]
|
|
|
|
|
|
def generate_chinese_name():
|
|
"""生成中文姓名"""
|
|
surname = random.choice(SURNAMES)
|
|
# 50%概率双字名,50%概率单字名
|
|
if random.random() > 0.5:
|
|
given_name = random.choice(GIVEN_NAMES) + random.choice(GIVEN_NAMES)
|
|
else:
|
|
given_name = random.choice(GIVEN_NAMES)
|
|
return surname + given_name
|
|
|
|
|
|
def generate_id_number():
|
|
"""生成18位身份证号码"""
|
|
# 地区码(前6位)
|
|
area_code = f"{random.randint(110000, 659001):06d}"
|
|
|
|
# 出生日期(8位) - 生成1990-2005年的出生日期
|
|
birth_year = random.randint(1990, 2005)
|
|
birth_month = f"{random.randint(1, 12):02d}"
|
|
birth_day = f"{random.randint(1, 28):02d}"
|
|
birth_date = f"{birth_year}{birth_month}{birth_day}"
|
|
|
|
# 顺序码(3位)
|
|
sequence_code = f"{random.randint(1, 999):03d}"
|
|
|
|
# 前17位
|
|
id_17 = area_code + birth_date + sequence_code
|
|
|
|
# 计算校验码(最后1位)
|
|
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
|
|
check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
|
|
|
|
total = sum(int(id_17[i]) * weights[i] for i in range(17))
|
|
check_code = check_codes[total % 11]
|
|
|
|
return id_17 + check_code
|
|
|
|
|
|
def generate_graduation_date():
|
|
"""生成毕业年月(YYYYMM格式)"""
|
|
# 生成2020-2030年之间的毕业年月
|
|
year = random.randint(2020, 2030)
|
|
month = f"{random.randint(1, 12):02d}"
|
|
return f"{year}{month}"
|
|
|
|
|
|
def generate_recruitment_data(start_index):
|
|
"""生成招聘测试数据"""
|
|
data = []
|
|
|
|
for i in range(start_index, start_index + RECRUIT_COUNT):
|
|
# 生成招聘项目编号
|
|
recruit_id = f"REC{datetime.now().strftime('%Y%m%d')}{i:06d}"
|
|
|
|
# 选择面试官(50%概率有两个面试官,50%概率只有一个)
|
|
if random.random() > 0.5:
|
|
interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS)
|
|
interviewer2_name, interviewer2_id = random.choice(INTERVIEWERS)
|
|
else:
|
|
interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS)
|
|
interviewer2_name = ""
|
|
interviewer2_id = ""
|
|
|
|
row_data = [
|
|
recruit_id, # 招聘项目编号
|
|
random.choice(RECRUIT_NAMES), # 招聘项目名称
|
|
random.choice(POSITION_NAMES), # 职位名称
|
|
random.choice(POSITION_CATEGORIES), # 职位类别
|
|
random.choice(POSITION_DESCS), # 职位描述
|
|
generate_chinese_name(), # 应聘人员姓名
|
|
random.choice(EDUCATIONS), # 应聘人员学历
|
|
generate_id_number(), # 应聘人员证件号码
|
|
random.choice(UNIVERSITIES), # 应聘人员毕业院校
|
|
random.choice(MAJORS), # 应聘人员专业
|
|
generate_graduation_date(), # 应聘人员毕业年月
|
|
random.choice(ADMIT_STATUSES), # 录用情况
|
|
interviewer1_name, # 面试官1姓名
|
|
interviewer1_id, # 面试官1工号
|
|
interviewer2_name, # 面试官2姓名
|
|
interviewer2_id # 面试官2工号
|
|
]
|
|
|
|
data.append(row_data)
|
|
|
|
return data
|
|
|
|
|
|
def create_excel(data, filename):
|
|
"""创建Excel文件"""
|
|
wb = Workbook()
|
|
ws = wb.active
|
|
ws.title = "招聘信息"
|
|
|
|
# 表头
|
|
headers = [
|
|
"招聘项目编号", "招聘项目名称", "职位名称", "职位类别", "职位描述",
|
|
"应聘人员姓名", "应聘人员学历", "应聘人员证件号码", "应聘人员毕业院校",
|
|
"应聘人员专业", "应聘人员毕业年月", "录用情况",
|
|
"面试官1姓名", "面试官1工号", "面试官2姓名", "面试官2工号"
|
|
]
|
|
|
|
# 写入表头
|
|
ws.append(headers)
|
|
|
|
# 设置表头样式
|
|
header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
|
|
header_font = Font(bold=True, color="FFFFFF")
|
|
|
|
for col_num, header in enumerate(headers, 1):
|
|
cell = ws.cell(row=1, column=col_num)
|
|
cell.fill = header_fill
|
|
cell.font = header_font
|
|
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
|
|
|
# 写入数据
|
|
for row_data in data:
|
|
ws.append(row_data)
|
|
|
|
# 设置列宽
|
|
column_widths = [20, 20, 20, 15, 30, 15, 15, 20, 20, 15, 15, 10, 15, 15, 15, 15]
|
|
for col_num, width in enumerate(column_widths, 1):
|
|
ws.column_dimensions[chr(64 + col_num)].width = width
|
|
|
|
# 设置所有单元格居中对齐
|
|
for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=ws.max_column):
|
|
for cell in row:
|
|
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
|
|
|
# 保存文件
|
|
wb.save(filename)
|
|
print(f"✓ 已生成文件: {filename}")
|
|
print(f" 数据行数: {len(data)}")
|
|
|
|
|
|
def main():
|
|
"""主函数"""
|
|
print("=" * 70)
|
|
print("招聘信息测试数据生成器")
|
|
print("=" * 70)
|
|
|
|
# 检查是否安装了openpyxl
|
|
try:
|
|
import openpyxl
|
|
except ImportError:
|
|
print("✗ 未安装openpyxl库,正在安装...")
|
|
import subprocess
|
|
subprocess.check_call(["pip", "install", "openpyxl"])
|
|
print("✓ openpyxl库安装成功")
|
|
|
|
print(f"\n配置信息:")
|
|
print(f" - 生成数据量: {RECRUIT_COUNT} 条/文件")
|
|
print(f" - 生成文件数: 2 个")
|
|
print(f" - 总数据量: {RECRUIT_COUNT * 2} 条")
|
|
|
|
print(f"\n开始生成数据...")
|
|
|
|
# 生成第一个文件
|
|
print(f"\n正在生成第1个文件...")
|
|
data1 = generate_recruitment_data(1)
|
|
filename1 = "doc/test-data/recruitment/recruitment_test_data_2000_1.xlsx"
|
|
create_excel(data1, filename1)
|
|
|
|
# 生成第二个文件
|
|
print(f"\n正在生成第2个文件...")
|
|
data2 = generate_recruitment_data(RECRUIT_COUNT + 1)
|
|
filename2 = "doc/test-data/recruitment/recruitment_test_data_2000_2.xlsx"
|
|
create_excel(data2, filename2)
|
|
|
|
print("\n" + "=" * 70)
|
|
print("✓ 所有文件生成完成!")
|
|
print("=" * 70)
|
|
print(f"\n生成的文件:")
|
|
print(f" 1. {filename1}")
|
|
print(f" 2. {filename2}")
|
|
print(f"\n数据统计:")
|
|
print(f" - 总数据量: {RECRUIT_COUNT * 2} 条")
|
|
print(f" - 文件1: {len(data1)} 条")
|
|
print(f" - 文件2: {len(data2)} 条")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|