Files
ccdi/doc/test-data/intermediary/generate-test-data-1000.py
2026-02-09 00:13:32 +08:00

164 lines
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import random
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment
# 读取模板文件
template_file = 'doc/test-data/intermediary/person_1770542031351.xlsx'
output_file = 'doc/test-data/intermediary/intermediary_test_data_1000.xlsx'
# 定义数据生成规则
last_names = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
first_names_male = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
first_names_female = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
person_types = ['中介']
person_sub_types = ['本人', '配偶', '子女', '父母', '其他']
genders = ['M', 'F', 'O']
id_types = ['身份证', '护照', '台胞证', '港澳通行证']
companies = ['房屋租赁公司', '房产经纪公司', '投资咨询公司', '置业咨询公司', '不动产咨询公司', '物业管理公司', '资产评估公司', '土地评估公司', '地产代理公司', '房产咨询公司']
positions = ['区域经理', '店长', '高级经纪人', '房产经纪人', '销售经理', '置业顾问', '物业顾问', '评估师', '业务员', '总监', '主管', None]
relation_types = ['配偶', '子女', '父母', '兄弟姐妹', None, None]
provinces = ['北京市', '上海市', '广东省', '江苏省', '浙江省', '四川省', '河南省', '福建省', '湖北省', '湖南省']
districts = ['海淀区', '朝阳区', '天河区', '浦东新区', '西湖区', '黄浦区', '静安区', '徐汇区', '福田区', '罗湖区']
streets = ['', '大街', '大道', '街道', '', '广场', '大厦', '花园']
buildings = ['1号楼', '2号楼', '3号楼', '4号楼', '5号楼', '6号楼', '7号楼', '8号楼', 'A座', 'B座']
# 现有数据样本(从数据库获取的格式)
existing_data_samples = [
{'name': '林玉兰', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'F', 'id_type': '护照', 'person_id': '45273944', 'mobile': '18080309834', 'wechat_no': 'wx_rt54d59p', 'contact_address': '福建省黄浦区巷4号', 'company': '房屋租赁公司', 'social_credit_code': '911981352496905281', 'position': '区域经理', 'related_num_id': 'ID92351', 'relation_type': None},
{'name': '刘平', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'F', 'id_type': '台胞证', 'person_id': '38639164', 'mobile': '19360856434', 'wechat_no': None, 'contact_address': '四川省海淀区路3号', 'company': '房产经纪公司', 'social_credit_code': '918316437629447909', 'position': None, 'related_num_id': None, 'relation_type': None},
{'name': '何娜', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'O', 'id_type': '港澳通行证', 'person_id': '83433341', 'mobile': '18229577387', 'wechat_no': 'wx_8ikozqjx', 'contact_address': '河南省天河区巷4号', 'company': '房产经纪公司', 'social_credit_code': '918315578905616368', 'position': '店长', 'related_num_id': None, 'relation_type': '父母'},
{'name': '王毅', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'M', 'id_type': '台胞证', 'person_id': '76369869', 'mobile': '17892993806', 'wechat_no': None, 'contact_address': '江苏省西湖区街道1号', 'company': '投资咨询公司', 'social_credit_code': None, 'position': '高级经纪人', 'related_num_id': 'ID61198', 'relation_type': None},
{'name': '李桂英', 'person_type': '中介', 'person_sub_type': '配偶', 'gender': 'F', 'id_type': '护照', 'person_id': '75874216', 'mobile': '15648713336', 'wechat_no': 'wx_5n0e926w', 'contact_address': '浙江省海淀区大道2号', 'company': '投资咨询公司', 'social_credit_code': None, 'position': '店长', 'related_num_id': None, 'relation_type': None},
]
def generate_name(gender):
first_names = first_names_male if gender == 'M' else first_names_female
return random.choice(last_names) + random.choice(first_names)
def generate_mobile():
return f"1{random.choice([3, 5, 7, 8, 9])}{random.randint(0, 9)}{random.randint(10000000, 99999999)}"
def generate_wechat():
return f"wx_{''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=8))}"
def generate_person_id(id_type):
if id_type == '身份证':
# 18位身份证号6位地区码 + 4位年份 + 2位月份 + 2位日期 + 3位顺序码 + 1位校验码
area_code = f"{random.randint(110000, 659999)}"
birth_year = random.randint(1960, 2000)
birth_month = f"{random.randint(1, 12):02d}"
birth_day = f"{random.randint(1, 28):02d}"
sequence_code = f"{random.randint(0, 999):03d}"
# 简单校验码随机0-9或X
check_code = random.choice(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'X'])
return f"{area_code}{birth_year}{birth_month}{birth_day}{sequence_code}{check_code}"
else:
return str(random.randint(10000000, 99999999))
def generate_social_credit_code():
return f"91{random.randint(0, 9)}{random.randint(10000000000000000, 99999999999999999)}"
def generate_address():
return f"{random.choice(provinces)}{random.choice(districts)}{random.choice(streets)}{random.randint(1, 100)}"
def generate_related_num_id():
return f"ID{random.randint(10000, 99999)}"
def generate_row(index, is_existing):
if is_existing:
sample = existing_data_samples[index % len(existing_data_samples)]
return {
'姓名*': sample['name'],
'人员类型': sample['person_type'],
'人员子类型': sample['person_sub_type'],
'性别': sample['gender'],
'证件类型': sample['id_type'],
'证件号码*': sample['person_id'],
'手机号码': sample['mobile'],
'微信号': sample['wechat_no'],
'联系地址': sample['contact_address'],
'所在公司': sample['company'],
'企业统一信用码': sample['social_credit_code'],
'职位': sample['position'],
'关联人员ID': sample['related_num_id'],
'关系类型': sample['relation_type'],
'备注': None
}
else:
gender = random.choice(genders)
person_sub_type = random.choice(person_sub_types)
id_type = random.choice(id_types)
return {
'姓名*': generate_name(gender),
'人员类型': '中介',
'人员子类型': person_sub_type,
'性别': gender,
'证件类型': id_type,
'证件号码*': generate_person_id(id_type),
'手机号码': generate_mobile(),
'微信号': random.choice([generate_wechat(), None, None]),
'联系地址': generate_address(),
'所在公司': random.choice(companies),
'企业统一信用码': random.choice([generate_social_credit_code(), None, None]),
'职位': random.choice(positions),
'关联人员ID': random.choice([generate_related_num_id(), None, None, None]),
'关系类型': random.choice(relation_types),
'备注': None
}
# 生成1000条数据
data = []
for i in range(1000):
is_existing = i < 500
row = generate_row(i, is_existing)
data.append(row)
# 创建DataFrame
df = pd.DataFrame(data)
# 保存到Excel
df.to_excel(output_file, index=False, engine='openpyxl')
# 格式化Excel文件
wb = load_workbook(output_file)
ws = wb.active
# 设置列宽
ws.column_dimensions['A'].width = 15
ws.column_dimensions['B'].width = 12
ws.column_dimensions['C'].width = 12
ws.column_dimensions['D'].width = 8
ws.column_dimensions['E'].width = 12
ws.column_dimensions['F'].width = 20
ws.column_dimensions['G'].width = 15
ws.column_dimensions['H'].width = 15
ws.column_dimensions['I'].width = 30
ws.column_dimensions['J'].width = 20
ws.column_dimensions['K'].width = 20
ws.column_dimensions['L'].width = 12
ws.column_dimensions['M'].width = 15
ws.column_dimensions['N'].width = 12
ws.column_dimensions['O'].width = 20
# 设置表头样式
header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid')
header_font = Font(bold=True)
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal='center', vertical='center')
# 冻结首行
ws.freeze_panes = 'A2'
wb.save(output_file)
print(f'成功生成1000条测试数据到: {output_file}')
print('- 500条现有数据前500行')
print('- 500条新数据后500行')