164 lines
8.5 KiB
Python
164 lines
8.5 KiB
Python
import pandas as pd
|
||
import random
|
||
from openpyxl import load_workbook
|
||
from openpyxl.styles import Font, PatternFill, Alignment
|
||
|
||
# 读取模板文件
|
||
template_file = 'doc/test-data/intermediary/person_1770542031351.xlsx'
|
||
output_file = 'doc/test-data/intermediary/intermediary_test_data_1000.xlsx'
|
||
|
||
# 定义数据生成规则
|
||
last_names = ['王', '李', '张', '刘', '陈', '杨', '赵', '黄', '周', '吴', '徐', '孙', '胡', '朱', '高', '林', '何', '郭', '马', '罗']
|
||
first_names_male = ['伟', '强', '磊', '洋', '勇', '军', '杰', '涛', '超', '明', '刚', '平', '辉', '鹏', '华', '飞', '鑫', '波', '斌', '宇']
|
||
first_names_female = ['芳', '娜', '敏', '静', '丽', '娟', '燕', '艳', '玲', '婷', '慧', '君', '萍', '颖', '琳', '雪', '梅', '兰', '红', '霞']
|
||
|
||
person_types = ['中介']
|
||
person_sub_types = ['本人', '配偶', '子女', '父母', '其他']
|
||
genders = ['M', 'F', 'O']
|
||
id_types = ['身份证', '护照', '台胞证', '港澳通行证']
|
||
|
||
companies = ['房屋租赁公司', '房产经纪公司', '投资咨询公司', '置业咨询公司', '不动产咨询公司', '物业管理公司', '资产评估公司', '土地评估公司', '地产代理公司', '房产咨询公司']
|
||
positions = ['区域经理', '店长', '高级经纪人', '房产经纪人', '销售经理', '置业顾问', '物业顾问', '评估师', '业务员', '总监', '主管', None]
|
||
relation_types = ['配偶', '子女', '父母', '兄弟姐妹', None, None]
|
||
|
||
provinces = ['北京市', '上海市', '广东省', '江苏省', '浙江省', '四川省', '河南省', '福建省', '湖北省', '湖南省']
|
||
districts = ['海淀区', '朝阳区', '天河区', '浦东新区', '西湖区', '黄浦区', '静安区', '徐汇区', '福田区', '罗湖区']
|
||
streets = ['路', '大街', '大道', '街道', '巷', '广场', '大厦', '花园']
|
||
buildings = ['1号楼', '2号楼', '3号楼', '4号楼', '5号楼', '6号楼', '7号楼', '8号楼', 'A座', 'B座']
|
||
|
||
# 现有数据样本(从数据库获取的格式)
|
||
existing_data_samples = [
|
||
{'name': '林玉兰', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'F', 'id_type': '护照', 'person_id': '45273944', 'mobile': '18080309834', 'wechat_no': 'wx_rt54d59p', 'contact_address': '福建省黄浦区巷4号', 'company': '房屋租赁公司', 'social_credit_code': '911981352496905281', 'position': '区域经理', 'related_num_id': 'ID92351', 'relation_type': None},
|
||
{'name': '刘平', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'F', 'id_type': '台胞证', 'person_id': '38639164', 'mobile': '19360856434', 'wechat_no': None, 'contact_address': '四川省海淀区路3号', 'company': '房产经纪公司', 'social_credit_code': '918316437629447909', 'position': None, 'related_num_id': None, 'relation_type': None},
|
||
{'name': '何娜', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'O', 'id_type': '港澳通行证', 'person_id': '83433341', 'mobile': '18229577387', 'wechat_no': 'wx_8ikozqjx', 'contact_address': '河南省天河区巷4号', 'company': '房产经纪公司', 'social_credit_code': '918315578905616368', 'position': '店长', 'related_num_id': None, 'relation_type': '父母'},
|
||
{'name': '王毅', 'person_type': '中介', 'person_sub_type': '本人', 'gender': 'M', 'id_type': '台胞证', 'person_id': '76369869', 'mobile': '17892993806', 'wechat_no': None, 'contact_address': '江苏省西湖区街道1号', 'company': '投资咨询公司', 'social_credit_code': None, 'position': '高级经纪人', 'related_num_id': 'ID61198', 'relation_type': None},
|
||
{'name': '李桂英', 'person_type': '中介', 'person_sub_type': '配偶', 'gender': 'F', 'id_type': '护照', 'person_id': '75874216', 'mobile': '15648713336', 'wechat_no': 'wx_5n0e926w', 'contact_address': '浙江省海淀区大道2号', 'company': '投资咨询公司', 'social_credit_code': None, 'position': '店长', 'related_num_id': None, 'relation_type': None},
|
||
]
|
||
|
||
def generate_name(gender):
|
||
first_names = first_names_male if gender == 'M' else first_names_female
|
||
return random.choice(last_names) + random.choice(first_names)
|
||
|
||
def generate_mobile():
|
||
return f"1{random.choice([3, 5, 7, 8, 9])}{random.randint(0, 9)}{random.randint(10000000, 99999999)}"
|
||
|
||
def generate_wechat():
|
||
return f"wx_{''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=8))}"
|
||
|
||
def generate_person_id(id_type):
|
||
if id_type == '身份证':
|
||
# 18位身份证号:6位地区码 + 4位年份 + 2位月份 + 2位日期 + 3位顺序码 + 1位校验码
|
||
area_code = f"{random.randint(110000, 659999)}"
|
||
birth_year = random.randint(1960, 2000)
|
||
birth_month = f"{random.randint(1, 12):02d}"
|
||
birth_day = f"{random.randint(1, 28):02d}"
|
||
sequence_code = f"{random.randint(0, 999):03d}"
|
||
# 简单校验码(随机0-9或X)
|
||
check_code = random.choice(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'X'])
|
||
return f"{area_code}{birth_year}{birth_month}{birth_day}{sequence_code}{check_code}"
|
||
else:
|
||
return str(random.randint(10000000, 99999999))
|
||
|
||
def generate_social_credit_code():
|
||
return f"91{random.randint(0, 9)}{random.randint(10000000000000000, 99999999999999999)}"
|
||
|
||
def generate_address():
|
||
return f"{random.choice(provinces)}{random.choice(districts)}{random.choice(streets)}{random.randint(1, 100)}号"
|
||
|
||
def generate_related_num_id():
|
||
return f"ID{random.randint(10000, 99999)}"
|
||
|
||
def generate_row(index, is_existing):
|
||
if is_existing:
|
||
sample = existing_data_samples[index % len(existing_data_samples)]
|
||
return {
|
||
'姓名*': sample['name'],
|
||
'人员类型': sample['person_type'],
|
||
'人员子类型': sample['person_sub_type'],
|
||
'性别': sample['gender'],
|
||
'证件类型': sample['id_type'],
|
||
'证件号码*': sample['person_id'],
|
||
'手机号码': sample['mobile'],
|
||
'微信号': sample['wechat_no'],
|
||
'联系地址': sample['contact_address'],
|
||
'所在公司': sample['company'],
|
||
'企业统一信用码': sample['social_credit_code'],
|
||
'职位': sample['position'],
|
||
'关联人员ID': sample['related_num_id'],
|
||
'关系类型': sample['relation_type'],
|
||
'备注': None
|
||
}
|
||
else:
|
||
gender = random.choice(genders)
|
||
person_sub_type = random.choice(person_sub_types)
|
||
id_type = random.choice(id_types)
|
||
|
||
return {
|
||
'姓名*': generate_name(gender),
|
||
'人员类型': '中介',
|
||
'人员子类型': person_sub_type,
|
||
'性别': gender,
|
||
'证件类型': id_type,
|
||
'证件号码*': generate_person_id(id_type),
|
||
'手机号码': generate_mobile(),
|
||
'微信号': random.choice([generate_wechat(), None, None]),
|
||
'联系地址': generate_address(),
|
||
'所在公司': random.choice(companies),
|
||
'企业统一信用码': random.choice([generate_social_credit_code(), None, None]),
|
||
'职位': random.choice(positions),
|
||
'关联人员ID': random.choice([generate_related_num_id(), None, None, None]),
|
||
'关系类型': random.choice(relation_types),
|
||
'备注': None
|
||
}
|
||
|
||
# 生成1000条数据
|
||
data = []
|
||
for i in range(1000):
|
||
is_existing = i < 500
|
||
row = generate_row(i, is_existing)
|
||
data.append(row)
|
||
|
||
# 创建DataFrame
|
||
df = pd.DataFrame(data)
|
||
|
||
# 保存到Excel
|
||
df.to_excel(output_file, index=False, engine='openpyxl')
|
||
|
||
# 格式化Excel文件
|
||
wb = load_workbook(output_file)
|
||
ws = wb.active
|
||
|
||
# 设置列宽
|
||
ws.column_dimensions['A'].width = 15
|
||
ws.column_dimensions['B'].width = 12
|
||
ws.column_dimensions['C'].width = 12
|
||
ws.column_dimensions['D'].width = 8
|
||
ws.column_dimensions['E'].width = 12
|
||
ws.column_dimensions['F'].width = 20
|
||
ws.column_dimensions['G'].width = 15
|
||
ws.column_dimensions['H'].width = 15
|
||
ws.column_dimensions['I'].width = 30
|
||
ws.column_dimensions['J'].width = 20
|
||
ws.column_dimensions['K'].width = 20
|
||
ws.column_dimensions['L'].width = 12
|
||
ws.column_dimensions['M'].width = 15
|
||
ws.column_dimensions['N'].width = 12
|
||
ws.column_dimensions['O'].width = 20
|
||
|
||
# 设置表头样式
|
||
header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid')
|
||
header_font = Font(bold=True)
|
||
|
||
for cell in ws[1]:
|
||
cell.fill = header_fill
|
||
cell.font = header_font
|
||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||
|
||
# 冻结首行
|
||
ws.freeze_panes = 'A2'
|
||
|
||
wb.save(output_file)
|
||
print(f'成功生成1000条测试数据到: {output_file}')
|
||
print('- 500条现有数据(前500行)')
|
||
print('- 500条新数据(后500行)')
|