修改目录
This commit is contained in:
193
assets/implementation/scripts/generate_test_data.py
Normal file
193
assets/implementation/scripts/generate_test_data.py
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
根据模板文件生成1000条个人中介黑名单测试数据
|
||||
"""
|
||||
|
||||
import openpyxl
|
||||
from openpyxl.styles import Font, PatternFill, Alignment
|
||||
import random
|
||||
from datetime import datetime
|
||||
|
||||
# 配置
|
||||
TEMPLATE_FILE = "doc/个人中介黑名单模板_1769667622015.xlsx"
|
||||
OUTPUT_FILE = "doc/个人中介黑名单测试数据_1000条_第2批.xlsx"
|
||||
ROW_COUNT = 1000
|
||||
|
||||
# 姓氏和名字库
|
||||
SURNAMES = ['王', '李', '张', '刘', '陈', '杨', '黄', '赵', '周', '吴', '徐', '孙', '马', '朱', '胡', '郭', '何', '高', '林', '罗']
|
||||
GIVEN_NAMES = ['伟', '芳', '娜', '敏', '静', '丽', '强', '磊', '军', '洋', '勇', '艳', '杰', '娟', '涛', '明', '超', '秀英', '霞', '平', '刚', '桂英', '玉兰', '萍', '毅', '浩', '宇', '轩', '然', '梓']
|
||||
|
||||
# 人员类型
|
||||
INDIV_TYPES = ['中介', '职业背债人', '房产中介', '贷款中介', '其他']
|
||||
|
||||
# 人员子类型
|
||||
INDIV_SUB_TYPES = ['本人', '配偶', '父亲', '母亲', '儿子', '女儿']
|
||||
|
||||
# 性别
|
||||
GENDERS = ['男', '女']
|
||||
|
||||
# 证件类型
|
||||
CERT_TYPES = ['身份证', '护照', '军官证', '其他']
|
||||
|
||||
# 关联关系
|
||||
RELATIONS = ['配偶', '父母', '子女', '兄弟姐妹', '同事', '朋友', '合伙人', '其他']
|
||||
|
||||
# 公司类型
|
||||
COMPANIES = ['中原地产', '链家地产', '我爱我家', '21世纪不动产', 'Q房网', '安居客', '房天下', '麦田房产', '鑫置地产', '嘉业地产']
|
||||
|
||||
# 职位
|
||||
POSITIONS = ['经纪人', '高级经纪人', '店长', '区域经理', '业务员', '顾问', '总监', '助理', '专员']
|
||||
|
||||
# 城市和区域数据
|
||||
CITIES = {
|
||||
'北京': ['朝阳区', '海淀区', '东城区', '西城区', '丰台区', '通州区'],
|
||||
'上海': ['浦东新区', '黄浦区', '徐汇区', '长宁区', '静安区', '普陀区'],
|
||||
'广州': ['天河区', '越秀区', '海珠区', '荔湾区', '白云区', '番禺区'],
|
||||
'深圳': ['福田区', '南山区', '罗湖区', '宝安区', '龙岗区', '盐田区'],
|
||||
'杭州': ['西湖区', '上城区', '下城区', '江干区', '拱墅区', '滨江区'],
|
||||
'成都': ['武侯区', '锦江区', '青羊区', '金牛区', '成华区', '高新区'],
|
||||
'武汉': ['武昌区', '江岸区', '江汉区', '硚口区', '汉阳区', '洪山区'],
|
||||
'南京': ['玄武区', '秦淮区', '建邺区', '鼓楼区', '浦口区', '栖霞区']
|
||||
}
|
||||
|
||||
|
||||
def generate_id_number(cert_type):
|
||||
"""生成证件号码"""
|
||||
if cert_type == '身份证':
|
||||
# 生成18位身份证号码
|
||||
area_code = f"{random.randint(110000, 659000)}"
|
||||
birth = f"{random.randint(1960, 2000)}{random.randint(1, 12):02d}{random.randint(1, 28):02d}"
|
||||
sequence = f"{random.randint(100, 999)}"
|
||||
id_num = f"{area_code}{birth}{sequence}"
|
||||
# 计算校验码
|
||||
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
|
||||
check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
|
||||
total = sum(int(id_num[i]) * weights[i] for i in range(17))
|
||||
check_code = check_codes[total % 11]
|
||||
return id_num + check_code
|
||||
elif cert_type == '护照':
|
||||
return f"E{random.randint(10000000, 99999999)}"
|
||||
elif cert_type == '军官证':
|
||||
return f"军字第{random.randint(1000000, 9999999)}号"
|
||||
else:
|
||||
return f"QT{random.randint(100000000, 999999999)}"
|
||||
|
||||
|
||||
def generate_phone():
|
||||
"""生成手机号码"""
|
||||
prefixes = ['130', '131', '132', '133', '134', '135', '136', '137', '138', '139',
|
||||
'150', '151', '152', '153', '155', '156', '157', '158', '159',
|
||||
'180', '181', '182', '183', '184', '185', '186', '187', '188', '189']
|
||||
return f"{random.choice(prefixes)}{random.randint(10000000, 99999999)}"
|
||||
|
||||
|
||||
def generate_wechat():
|
||||
"""生成微信号"""
|
||||
return f"wx_{random.randint(10000000, 99999999)}"
|
||||
|
||||
|
||||
def generate_address():
|
||||
"""生成联系地址"""
|
||||
city = random.choice(list(CITIES.keys()))
|
||||
district = random.choice(CITIES[city])
|
||||
street = random.choice(['中山路', '解放路', '人民路', '建设路', '文化路', '和平路', '友谊路', '光明路'])
|
||||
number = random.randint(1, 999)
|
||||
building = random.choice(['A座', 'B座', '1号楼', '2号楼', '东苑', '西苑', '南区', '北区'])
|
||||
room = random.randint(101, 2606)
|
||||
return f"{city}{district}{street}{number}号{building}{room}室"
|
||||
|
||||
|
||||
def generate_name():
|
||||
"""生成姓名"""
|
||||
surname = random.choice(SURNAMES)
|
||||
if random.random() > 0.3: # 70%概率两个字的名字
|
||||
return surname + random.choice(GIVEN_NAMES)
|
||||
else: # 30%概率三个字的名字
|
||||
return surname + random.choice(GIVEN_NAMES) + random.choice(GIVEN_NAMES)
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print(f"正在读取模板文件: {TEMPLATE_FILE}")
|
||||
|
||||
try:
|
||||
# 读取模板文件
|
||||
wb = openpyxl.load_workbook(TEMPLATE_FILE)
|
||||
ws = wb.active
|
||||
|
||||
# 获取表头
|
||||
headers = []
|
||||
for cell in ws[1]:
|
||||
if cell.value:
|
||||
headers.append(cell.value)
|
||||
|
||||
print(f"模板表头: {headers}")
|
||||
print(f"开始生成 {ROW_COUNT} 条测试数据...")
|
||||
|
||||
# 清除除表头外的所有数据行
|
||||
for row in range(2, ws.max_row + 1):
|
||||
for col in range(1, ws.max_column + 1):
|
||||
ws.cell(row=row, column=col).value = None
|
||||
|
||||
# 生成数据行
|
||||
for i in range(2, ROW_COUNT + 2):
|
||||
indiv_type = random.choice(INDIV_TYPES)
|
||||
gender = random.choice(GENDERS)
|
||||
cert_type = random.choice(CERT_TYPES)
|
||||
|
||||
# 根据表头索引填充数据
|
||||
row_data = {
|
||||
'姓名': generate_name(),
|
||||
'证件号码': generate_id_number(cert_type),
|
||||
'人员类型': indiv_type,
|
||||
'人员子类型': random.choice(INDIV_SUB_TYPES),
|
||||
'性别': gender,
|
||||
'证件类型': cert_type,
|
||||
'手机号': generate_phone(),
|
||||
'微信号': generate_wechat(),
|
||||
'联系地址': generate_address(),
|
||||
'所在公司': random.choice(COMPANIES),
|
||||
'职位': random.choice(POSITIONS),
|
||||
'关联人员ID': str(random.randint(1000, 9999)) if random.random() > 0.8 else '',
|
||||
'关联关系': random.choice(RELATIONS) if random.random() > 0.5 else '',
|
||||
'备注': f'测试数据{i-1}'
|
||||
}
|
||||
|
||||
# 写入行数据
|
||||
for col_idx, header in enumerate(headers, start=1):
|
||||
if header in row_data:
|
||||
ws.cell(row=i, column=col_idx, value=row_data[header])
|
||||
|
||||
if (i - 1) % 100 == 0:
|
||||
print(f"已生成 {i-1} 条数据...")
|
||||
|
||||
# 保存文件
|
||||
print(f"\n正在保存文件到: {OUTPUT_FILE}")
|
||||
wb.save(OUTPUT_FILE)
|
||||
|
||||
print(f"✓ 成功生成 {ROW_COUNT} 条测试数据")
|
||||
print(f"✓ 文件已保存至: {OUTPUT_FILE}")
|
||||
print(f"✓ 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
# 输出前3条数据示例
|
||||
print("\n前3条数据示例:")
|
||||
print("-" * 100)
|
||||
for i in range(2, 5):
|
||||
row_data = []
|
||||
for col_idx in range(1, len(headers) + 1):
|
||||
val = ws.cell(row=i, column=col_idx).value
|
||||
row_data.append(str(val) if val else "")
|
||||
print(f"第{i-1}行: {', '.join([f'{h}:{v}' for h, v in zip(headers[:6], row_data[:6])])}")
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"✗ 错误:找不到模板文件 {TEMPLATE_FILE}")
|
||||
print("请确保模板文件存在于正确的路径")
|
||||
except Exception as e:
|
||||
print(f"✗ 错误:{str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user