doc/test-data/intermediary/generate_1000_intermediary_data.py

import random
import string
from datetime import datetime
import pandas as pd

# 常用姓氏和名字
surnames = ['王', '李', '张', '刘', '陈', '杨', '黄', '赵', '周', '吴', '徐', '孙', '马', '胡', '朱', '郭', '何', '罗', '高', '林']
given_names = ['伟', '芳', '娜', '敏', '静', '丽', '强', '磊', '军', '洋', '勇', '艳', '杰', '娟', '涛', '明', '超', '秀英', '霞', '平', '刚', '桂英', '玉兰', '萍', '毅', '浩', '宇', '轩', '然', '凯']

# 人员类型
person_types = ['中介', '职业背债人', '房产中介']
person_sub_types = ['本人', '配偶', '子女', '其他']
genders = ['M', 'F', 'O']
id_types = ['身份证', '护照', '港澳通行证', '台胞证', '军官证']
relation_types = ['配偶', '子女', '父母', '兄弟姐妹', '其他']

# 常用地址
provinces = ['北京市', '上海市', '广东省', '浙江省', '江苏省', '四川省', '湖北省', '河南省', '山东省', '福建省']
cities = ['朝阳区', '海淀区', '浦东新区', '黄浦区', '天河区', '福田区', '西湖区', '滨江区', '鼓楼区', '玄武区']
districts = ['街道1号', '大道2号', '路3号', '巷4号', '小区5栋', '花园6号', '广场7号', '大厦8号楼']

# 公司和职位
companies = ['房产中介有限公司', '置业咨询公司', '房产经纪公司', '地产代理公司', '不动产咨询公司', '房屋租赁公司', '物业管理公司', '投资咨询公司']
positions = ['房产经纪人', '销售经理', '业务员', '置业顾问', '店长', '区域经理', '高级经纪人', '项目经理']

# 生成身份证号码（简化版，仅用于测试）
def generate_id_card():
    # 地区码（前6位）
    area_code = f"{random.randint(110000, 650000):06d}"
    # 出生日期（8位）
    birth_year = random.randint(1960, 2000)
    birth_month = f"{random.randint(1, 12):02d}"
    birth_day = f"{random.randint(1, 28):02d}"
    birth_date = f"{birth_year}{birth_month}{birth_day}"
    # 顺序码（3位）
    sequence = f"{random.randint(1, 999):03d}"
    # 校验码（1位）
    check_code = random.randint(0, 9)
    return f"{area_code}{birth_date}{sequence}{check_code}"

# 生成手机号
def generate_phone():
    second_digits = ['3', '5', '7', '8', '9']
    second = random.choice(second_digits)
    return f"1{second}{''.join([str(random.randint(0, 9)) for _ in range(9)])}"

# 生成统一信用代码
def generate_credit_code():
    return f"91{''.join([str(random.randint(0, 9)) for _ in range(16)])}"

# 生成微信号
def generate_wechat():
    return f"wx_{''.join([random.choice(string.ascii_lowercase + string.digits) for _ in range(8)])}"

# 生成单条数据
def generate_person(index):
    person_type = random.choice(person_types)
    gender = random.choice(genders)

    # 根据性别选择更合适的名字
    if gender == 'M':
        name = random.choice(surnames) + random.choice(['伟', '强', '磊', '军', '勇', '杰', '涛', '明', '超', '毅', '浩', '宇', '轩'])
    else:
        name = random.choice(surnames) + random.choice(['芳', '娜', '敏', '静', '丽', '艳', '娟', '秀英', '霞', '平', '桂英', '玉兰', '萍'])

    id_type = random.choice(id_types)
    id_card = generate_id_card() if id_type == '身份证' else f"{random.randint(10000000, 99999999):08d}"

    return {
        '姓名': name,
        '人员类型': person_type,
        '人员子类型': random.choice(person_sub_types),
        '性别': gender,
        '证件类型': id_type,
        '证件号码': id_card,
        '手机号码': generate_phone(),
        '微信号': generate_wechat() if random.random() > 0.3 else '',
        '联系地址': f"{random.choice(provinces)}{random.choice(cities)}{random.choice(districts)}",
        '所在公司': random.choice(companies) if random.random() > 0.2 else '',
        '企业统一信用码': generate_credit_code() if random.random() > 0.5 else '',
        '职位': random.choice(positions) if random.random() > 0.3 else '',
        '关联人员ID': f"ID{random.randint(10000, 99999)}" if random.random() > 0.6 else '',
        '关系类型': random.choice(relation_types) if random.random() > 0.6 else '',
        '备注': f'测试数据{index}' if random.random() > 0.5 else ''
    }

# 生成1000条数据
print("正在生成1000条个人中介黑名单数据...")
data = [generate_person(i) for i in range(1, 1001)]
df = pd.DataFrame(data)

# 保存第一个文件
output1 = r'D:\ccdi\ccdi\doc\test-data\intermediary\个人中介黑名单测试数据_1000条_第1批.xlsx'
df.to_excel(output1, index=False)
print(f"已生成第一个文件: {output1}")

# 生成第二个1000条数据
print("正在生成第二批1000条个人中介黑名单数据...")
data2 = [generate_person(i) for i in range(1, 1001)]
df2 = pd.DataFrame(data2)

# 保存第二个文件
output2 = r'D:\ccdi\ccdi\doc\test-data\intermediary\个人中介黑名单测试数据_1000条_第2批.xlsx'
df2.to_excel(output2, index=False)
print(f"已生成第二个文件: {output2}")

print("\n生成完成！")
print(f"文件1: {output1}")
print(f"文件2: {output2}")
print(f"\n每个文件包含1000条测试数据")