Files
ccdi/assets/implementation/scripts/generate_recruitment_test_data.py

272 lines
11 KiB
Python
Raw Normal View History

"""
招聘信息测试数据生成器
生成符合校验规则的招聘信息测试数据并保存到Excel文件
"""
import random
import string
from datetime import datetime, timedelta
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill
# 数据配置
RECRUIT_COUNT = 2000 # 生成数据条数
# 招聘项目名称列表
RECRUIT_NAMES = [
"2025春季校园招聘", "2025秋季校园招聘", "2025社会招聘", "2025技术专项招聘",
"2025管培生招聘", "2025实习生招聘", "2025高端人才引进", "2025春季研发岗招聘",
"2025夏季校园招聘", "2025冬季校园招聘", "2025春季销售岗招聘", "2025秋季市场岗招聘",
"2025春季运营岗招聘", "2025秋季产品岗招聘", "2025春季客服岗招聘", "2025秋季人事岗招聘"
]
# 职位名称列表
POSITION_NAMES = [
"Java开发工程师", "Python开发工程师", "前端开发工程师", "后端开发工程师",
"全栈工程师", "算法工程师", "数据分析师", "产品经理",
"UI设计师", "测试工程师", "运维工程师", "架构师",
"软件工程师", "系统分析师", "数据库管理员", "网络工程师",
"移动端开发工程师", "嵌入式开发工程师", "大数据工程师", "人工智能工程师"
]
# 职位类别
POSITION_CATEGORIES = [
"技术类", "产品类", "设计类", "运营类",
"市场类", "销售类", "客服类", "人事类",
"财务类", "行政类", "管理类", "研发类"
]
# 职位描述模板
POSITION_DESCS = [
"负责公司核心业务系统的设计和开发,要求熟悉相关技术栈,具备良好的编码规范和团队协作能力。",
"参与产品需求分析和技术方案设计,负责模块开发和维护,优化系统性能,保障系统稳定性。",
"负责系统架构设计和技术选型,解决技术难题,指导团队成员开发,推动技术创新。",
"负责数据采集、清洗、分析和可视化,为业务决策提供数据支持,优化业务流程。",
"负责产品规划、需求分析和产品设计,协调研发、测试、运营等团队,推动产品落地。",
"负责用户界面设计和用户体验优化,与产品经理和开发团队协作,确保设计还原度。",
"负责系统测试和质量保障,编写测试用例,执行测试,跟踪缺陷,保障产品质量。",
"负责系统运维和监控,保障系统稳定运行,优化系统性能,处理故障和应急响应。"
]
# 常见姓氏和名字
SURNAMES = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]
GIVEN_NAMES = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "秀英", "", ""]
# 学历列表
EDUCATIONS = ["本科", "硕士", "博士", "大专", "高中"]
# 毕业院校列表
UNIVERSITIES = [
"清华大学", "北京大学", "复旦大学", "上海交通大学", "浙江大学", "中国科学技术大学",
"南京大学", "中山大学", "华中科技大学", "哈尔滨工业大学", "西安交通大学", "北京理工大学",
"中国人民大学", "北京航空航天大学", "同济大学", "南开大学", "天津大学", "东南大学",
"武汉大学", "厦门大学", "山东大学", "四川大学", "吉林大学", "中南大学",
"华南理工大学", "西北工业大学", "华东师范大学", "北京师范大学", "重庆大学"
]
# 专业列表
MAJORS = [
"计算机科学与技术", "软件工程", "人工智能", "数据科学与大数据技术", "物联网工程",
"电子信息工程", "通信工程", "自动化", "电气工程及其自动化", "机械工程",
"材料科学与工程", "化学工程与工艺", "生物工程", "环境工程", "土木工程",
"数学与应用数学", "统计学", "物理学", "化学", "生物学",
"工商管理", "市场营销", "会计学", "金融学", "国际经济与贸易",
"人力资源管理", "公共事业管理", "行政管理", "法学", "汉语言文学",
"英语", "日语", "新闻传播学", "广告学", "艺术设计"
]
# 录用状态
ADMIT_STATUSES = ["录用", "未录用", "放弃"]
# 面试官姓名和工号
INTERVIEWERS = [
("张伟", "INT001"), ("李芳", "INT002"), ("王磊", "INT003"), ("刘娜", "INT004"),
("陈军", "INT005"), ("杨静", "INT006"), ("黄勇", "INT007"), ("赵丽", "INT008"),
("周涛", "INT009"), ("吴明", "INT010"), ("徐超", "INT011"), ("孙杰", "INT012"),
("马娟", "INT013"), ("朱华", "INT014"), ("胡英", "INT015"), ("郭强", "INT016")
]
def generate_chinese_name():
"""生成中文姓名"""
surname = random.choice(SURNAMES)
# 50%概率双字名,50%概率单字名
if random.random() > 0.5:
given_name = random.choice(GIVEN_NAMES) + random.choice(GIVEN_NAMES)
else:
given_name = random.choice(GIVEN_NAMES)
return surname + given_name
def generate_id_number():
"""生成18位身份证号码"""
# 地区码(前6位)
area_code = f"{random.randint(110000, 659001):06d}"
# 出生日期(8位) - 生成1990-2005年的出生日期
birth_year = random.randint(1990, 2005)
birth_month = f"{random.randint(1, 12):02d}"
birth_day = f"{random.randint(1, 28):02d}"
birth_date = f"{birth_year}{birth_month}{birth_day}"
# 顺序码(3位)
sequence_code = f"{random.randint(1, 999):03d}"
# 前17位
id_17 = area_code + birth_date + sequence_code
# 计算校验码(最后1位)
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
total = sum(int(id_17[i]) * weights[i] for i in range(17))
check_code = check_codes[total % 11]
return id_17 + check_code
def generate_graduation_date():
"""生成毕业年月(YYYYMM格式)"""
# 生成2020-2030年之间的毕业年月
year = random.randint(2020, 2030)
month = f"{random.randint(1, 12):02d}"
return f"{year}{month}"
def generate_recruitment_data(start_index):
"""生成招聘测试数据"""
data = []
for i in range(start_index, start_index + RECRUIT_COUNT):
# 生成招聘项目编号
recruit_id = f"REC{datetime.now().strftime('%Y%m%d')}{i:06d}"
# 选择面试官(50%概率有两个面试官,50%概率只有一个)
if random.random() > 0.5:
interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS)
interviewer2_name, interviewer2_id = random.choice(INTERVIEWERS)
else:
interviewer1_name, interviewer1_id = random.choice(INTERVIEWERS)
interviewer2_name = ""
interviewer2_id = ""
row_data = [
recruit_id, # 招聘项目编号
random.choice(RECRUIT_NAMES), # 招聘项目名称
random.choice(POSITION_NAMES), # 职位名称
random.choice(POSITION_CATEGORIES), # 职位类别
random.choice(POSITION_DESCS), # 职位描述
generate_chinese_name(), # 应聘人员姓名
random.choice(EDUCATIONS), # 应聘人员学历
generate_id_number(), # 应聘人员证件号码
random.choice(UNIVERSITIES), # 应聘人员毕业院校
random.choice(MAJORS), # 应聘人员专业
generate_graduation_date(), # 应聘人员毕业年月
random.choice(ADMIT_STATUSES), # 录用情况
interviewer1_name, # 面试官1姓名
interviewer1_id, # 面试官1工号
interviewer2_name, # 面试官2姓名
interviewer2_id # 面试官2工号
]
data.append(row_data)
return data
def create_excel(data, filename):
"""创建Excel文件"""
wb = Workbook()
ws = wb.active
ws.title = "招聘信息"
# 表头
headers = [
"招聘项目编号", "招聘项目名称", "职位名称", "职位类别", "职位描述",
"应聘人员姓名", "应聘人员学历", "应聘人员证件号码", "应聘人员毕业院校",
"应聘人员专业", "应聘人员毕业年月", "录用情况",
"面试官1姓名", "面试官1工号", "面试官2姓名", "面试官2工号"
]
# 写入表头
ws.append(headers)
# 设置表头样式
header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
header_font = Font(bold=True, color="FFFFFF")
for col_num, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col_num)
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
# 写入数据
for row_data in data:
ws.append(row_data)
# 设置列宽
column_widths = [20, 20, 20, 15, 30, 15, 15, 20, 20, 15, 15, 10, 15, 15, 15, 15]
for col_num, width in enumerate(column_widths, 1):
ws.column_dimensions[chr(64 + col_num)].width = width
# 设置所有单元格居中对齐
for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=ws.max_column):
for cell in row:
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
# 保存文件
wb.save(filename)
print(f"✓ 已生成文件: {filename}")
print(f" 数据行数: {len(data)}")
def main():
"""主函数"""
print("=" * 70)
print("招聘信息测试数据生成器")
print("=" * 70)
# 检查是否安装了openpyxl
try:
import openpyxl
except ImportError:
print("✗ 未安装openpyxl库,正在安装...")
import subprocess
subprocess.check_call(["pip", "install", "openpyxl"])
print("✓ openpyxl库安装成功")
print(f"\n配置信息:")
print(f" - 生成数据量: {RECRUIT_COUNT} 条/文件")
print(f" - 生成文件数: 2 个")
print(f" - 总数据量: {RECRUIT_COUNT * 2}")
print(f"\n开始生成数据...")
# 生成第一个文件
print(f"\n正在生成第1个文件...")
data1 = generate_recruitment_data(1)
filename1 = "doc/test-data/recruitment/recruitment_test_data_2000_1.xlsx"
create_excel(data1, filename1)
# 生成第二个文件
print(f"\n正在生成第2个文件...")
data2 = generate_recruitment_data(RECRUIT_COUNT + 1)
filename2 = "doc/test-data/recruitment/recruitment_test_data_2000_2.xlsx"
create_excel(data2, filename2)
print("\n" + "=" * 70)
print("✓ 所有文件生成完成!")
print("=" * 70)
print(f"\n生成的文件:")
print(f" 1. {filename1}")
print(f" 2. {filename2}")
print(f"\n数据统计:")
print(f" - 总数据量: {RECRUIT_COUNT * 2}")
print(f" - 文件1: {len(data1)}")
print(f" - 文件2: {len(data2)}")
if __name__ == "__main__":
main()