Files
ccdi/assets/implementation/scripts/generate_recruitment_test_data.py
2026-03-03 16:14:16 +08:00

272 lines
11 KiB
Python

"""
招聘信息测试数据生成器
生成符合校验规则的招聘信息测试数据并保存到Excel文件
"""
import random
import string
from datetime import datetime, timedelta
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill
# 数据配置
RECRUIT_COUNT = 2000 # 生成数据条数
# 招聘项目名称列表
RECRUIT_NAMES = [
"2025春季校园招聘", "2025秋季校园招聘", "2025社会招聘", "2025技术专项招聘",
"2025管培生招聘", "2025实习生招聘", "2025高端人才引进", "2025春季研发岗招聘",
"2025夏季校园招聘", "2025冬季校园招聘", "2025春季销售岗招聘", "2025秋季市场岗招聘",
"2025春季运营岗招聘", "2025秋季产品岗招聘", "2025春季客服岗招聘", "2025秋季人事岗招聘"
]
# 职位名称列表
POSITION_NAMES = [
"Java开发工程师", "Python开发工程师", "前端开发工程师", "后端开发工程师",
"全栈工程师", "算法工程师", "数据分析师", "产品经理",
"UI设计师", "测试工程师", "运维工程师", "架构师",
"软件工程师", "系统分析师", "数据库管理员", "网络工程师",
"移动端开发工程师", "嵌入式开发工程师", "大数据工程师", "人工智能工程师"
]
# 职位类别
POSITION_CATEGORIES = [
"技术类", "产品类", "设计类", "运营类",
"市场类", "销售类", "客服类", "人事类",
"财务类", "行政类", "管理类", "研发类"
]
# 职位描述模板
POSITION_DESCS = [
"负责公司核心业务系统的设计和开发,要求熟悉相关技术栈,具备良好的编码规范和团队协作能力。",
"参与产品需求分析和技术方案设计,负责模块开发和维护,优化系统性能,保障系统稳定性。",
"负责系统架构设计和技术选型,解决技术难题,指导团队成员开发,推动技术创新。",
"负责数据采集、清洗、分析和可视化,为业务决策提供数据支持,优化业务流程。",
"负责产品规划、需求分析和产品设计,协调研发、测试、运营等团队,推动产品落地。",
"负责用户界面设计和用户体验优化,与产品经理和开发团队协作,确保设计还原度。",
"负责系统测试和质量保障,编写测试用例,执行测试,跟踪缺陷,保障产品质量。",
"负责系统运维和监控,保障系统稳定运行,优化系统性能,处理故障和应急响应。"
]
# 常见姓氏和名字
SURNAMES = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]
GIVEN_NAMES = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "秀英", "", ""]
# 学历列表
EDUCATIONS = ["本科", "硕士", "博士", "大专", "高中"]
# 毕业院校列表
UNIVERSITIES = [
"清华大学", "北京大学", "复旦大学", "上海交通大学", "浙江大学", "中国科学技术大学",
"南京大学", "中山大学", "华中科技大学", "哈尔滨工业大学", "西安交通大学", "北京理工大学",
"中国人民大学", "北京航空航天大学", "同济大学", "南开大学", "天津大学", "东南大学",
"武汉大学", "厦门大学", "山东大学", "四川大学", "吉林大学", "中南大学",
"华南理工大学", "西北工业大学", "华东师范大学", "北京师范大学", "重庆大学"
]
# 专业列表
MAJORS = [
"计算机科学与技术", "软件工程", "人工智能", "数据科学与大数据技术", "物联网工程",
"电子信息工程", "通信工程", "自动化", "电气工程及其自动化", "机械工程",
"材料科学与工程", "化学工程与工艺", "生物工程", "环境工程", "土木工程",
"数学与应用数学", "统计学", "物理学", "化学", "生物学",
"工商管理", "市场营销", "会计学", "金融学", "国际经济与贸易",
"人力资源管理", "公共事业管理", "行政管理", "法学", "汉语言文学",
"英语", "日语", "新闻传播学", "广告学", "艺术设计"
]
# 录用状态
ADMIT_STATUSES = ["录用", "未录用", "放弃"]
# 面试官姓名和工号
INTERVIEWERS = [
("张伟", "INT001"), ("李芳", "INT002"), ("王磊", "INT003"), ("刘娜", "INT004"),
("陈军", "INT005"), ("杨静", "INT006"), ("黄勇", "INT007"), ("赵丽", "INT008"),
("周涛", "INT009"), ("吴明", "INT010"), ("徐超", "INT011"), ("孙杰", "INT012"),
("马娟", "INT013"), ("朱华", "INT014"), ("胡英", "INT015"), ("郭强", "INT016")
]
def generate_chinese_name():
"""生成中文姓名"""
surname = random.choice(SURNAMES)
# 50%概率双字名,50%概率单字名
if random.random() > 0.5:
given_name = random.choice(GIVEN_NAMES) + random.choice(GIVEN_NAMES)
else:
given_name = random.choice(GIVEN_NAMES)
return surname + given_name
def generate_id_number():
"""生成18位身份证号码"""
# 地区码(前6位)
area_code = f"{random.randint(110000, 659001):06d}"
# 出生日期(8位) - 生成1990-2005年的出生日期
birth_year = random.randint(1990, 2005)
birth_month = f"{random.randint(1, 12):02d}"
birth_day = f"{random.randint(1, 28):02d}"
birth_date = f"{birth_year}{birth_month}{birth_day}"
# 顺序码(3位)
sequence_code = f"{random.randint(1, 999):03d}"
# 前17位
id_17 = area_code + birth_date + sequence_code
# 计算校验码(最后1位)
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
total = sum(int(id_17[i]) * weights[i] for i in range(17))
check_code = check_codes[total % 11]
return id_17 + check_code
def generate_graduation_date():
"""生成毕业年月(YYYYMM格式)"""
# 生成2020-2030年之间的毕业年月
year = random.randint(2020, 2030)
month = f"{random.randint(1, 12):02d}"
return f"{year}{month}"
def generate_recruitment_data(start_index):
"""生成招聘测试数据"""
data = []
for i in range(start_index, start_index + RECRUIT_COUNT):
# 生成招聘项目编号
recruit_id = f"REC{datetime.now().strftime('%Y%m%d')}{i:06d}"
# 选择面试官(50%概率有两个面试官,50%概率只有一个)
if random.random() > 0.5:
interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS)
interviewer2_name, interviewer2_id = random.choice(INTERVIEWERS)
else:
interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS)
interviewer2_name = ""
interviewer2_id = ""
row_data = [
recruit_id, # 招聘项目编号
random.choice(RECRUIT_NAMES), # 招聘项目名称
random.choice(POSITION_NAMES), # 职位名称
random.choice(POSITION_CATEGORIES), # 职位类别
random.choice(POSITION_DESCS), # 职位描述
generate_chinese_name(), # 应聘人员姓名
random.choice(EDUCATIONS), # 应聘人员学历
generate_id_number(), # 应聘人员证件号码
random.choice(UNIVERSITIES), # 应聘人员毕业院校
random.choice(MAJORS), # 应聘人员专业
generate_graduation_date(), # 应聘人员毕业年月
random.choice(ADMIT_STATUSES), # 录用情况
interviewer1_name, # 面试官1姓名
interviewer1_id, # 面试官1工号
interviewer2_name, # 面试官2姓名
interviewer2_id # 面试官2工号
]
data.append(row_data)
return data
def create_excel(data, filename):
"""创建Excel文件"""
wb = Workbook()
ws = wb.active
ws.title = "招聘信息"
# 表头
headers = [
"招聘项目编号", "招聘项目名称", "职位名称", "职位类别", "职位描述",
"应聘人员姓名", "应聘人员学历", "应聘人员证件号码", "应聘人员毕业院校",
"应聘人员专业", "应聘人员毕业年月", "录用情况",
"面试官1姓名", "面试官1工号", "面试官2姓名", "面试官2工号"
]
# 写入表头
ws.append(headers)
# 设置表头样式
header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
header_font = Font(bold=True, color="FFFFFF")
for col_num, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col_num)
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
# 写入数据
for row_data in data:
ws.append(row_data)
# 设置列宽
column_widths = [20, 20, 20, 15, 30, 15, 15, 20, 20, 15, 15, 10, 15, 15, 15, 15]
for col_num, width in enumerate(column_widths, 1):
ws.column_dimensions[chr(64 + col_num)].width = width
# 设置所有单元格居中对齐
for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=ws.max_column):
for cell in row:
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
# 保存文件
wb.save(filename)
print(f"✓ 已生成文件: {filename}")
print(f" 数据行数: {len(data)}")
def main():
"""主函数"""
print("=" * 70)
print("招聘信息测试数据生成器")
print("=" * 70)
# 检查是否安装了openpyxl
try:
import openpyxl
except ImportError:
print("✗ 未安装openpyxl库,正在安装...")
import subprocess
subprocess.check_call(["pip", "install", "openpyxl"])
print("✓ openpyxl库安装成功")
print(f"\n配置信息:")
print(f" - 生成数据量: {RECRUIT_COUNT} 条/文件")
print(f" - 生成文件数: 2 个")
print(f" - 总数据量: {RECRUIT_COUNT * 2}")
print(f"\n开始生成数据...")
# 生成第一个文件
print(f"\n正在生成第1个文件...")
data1 = generate_recruitment_data(1)
filename1 = "doc/test-data/recruitment/recruitment_test_data_2000_1.xlsx"
create_excel(data1, filename1)
# 生成第二个文件
print(f"\n正在生成第2个文件...")
data2 = generate_recruitment_data(RECRUIT_COUNT + 1)
filename2 = "doc/test-data/recruitment/recruitment_test_data_2000_2.xlsx"
create_excel(data2, filename2)
print("\n" + "=" * 70)
print("✓ 所有文件生成完成!")
print("=" * 70)
print(f"\n生成的文件:")
print(f" 1. {filename1}")
print(f" 2. {filename2}")
print(f"\n数据统计:")
print(f" - 总数据量: {RECRUIT_COUNT * 2}")
print(f" - 文件1: {len(data1)}")
print(f" - 文件2: {len(data2)}")
if __name__ == "__main__":
main()