Files
ccdi/doc/test-data/generate_org_data.py
2026-01-29 22:03:42 +08:00

193 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import openpyxl
from openpyxl import Workbook
import random
from datetime import datetime, timedelta
# 机构名称前缀
org_prefixes = [
"北京", "上海", "广州", "深圳", "杭州", "成都", "重庆", "武汉", "西安", "南京",
"天津", "苏州", "长沙", "郑州", "东莞", "青岛", "沈阳", "宁波", "厦门", "佛山"
]
# 机构类型关键词
org_types = [
"投资咨询", "资产管理", "证券投资", "基金管理", "股权投资",
"财富管理", "金融信息服务", "商务咨询", "企业咨询", "投资顾问"
]
# 机构后缀
org_suffixes = ["有限公司", "股份有限公司", "集团", "企业", "事务所"]
# 主体类型
entity_types = ["企业", "事业单位", "社会组织"]
# 企业性质
corp_natures = [
"有限责任公司", "股份有限公司", "国有独资", "集体企业",
"私营企业", "中外合资", "外商独资", "港澳台合资"
]
# 行业分类
industry_classes = ["金融业", "商务服务业", "科学研究和技术服务业"]
# 所属行业
industries = [
"货币金融服务", "资本市场服务", "保险业", "其他金融业",
"企业管理服务", "法律服务", "咨询与调查", "广告业",
"研究和试验发展", "专业技术服务业", "科技推广和应用服务业"
]
# 证件类型
id_types = ["身份证", "护照", "其他"]
# 统一社会信用代码生成18位
def generate_credit_code():
area_code = f"{random.randint(110000, 659900):06d}"
org_code = ''.join([str(random.randint(0, 9)) for _ in range(9)])
check_code = random.randint(0, 9)
return f"{area_code}{org_code}{check_code}"
# 生成法定代表人姓名
def generate_person_name():
surnames = ["", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", ""]
names1 = ["", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "秀英", "", ""]
names2 = ["", "", "", "", "", "", "", "", "", ""]
return random.choice(surnames) + random.choice(names1) + random.choice(names2)
# 生成身份证号18位
def generate_id_card():
# 地区码(6位) + 出生日期(8位) + 顺序码(3位) + 校验码(1位)
area_code = f"{random.randint(110000, 659900):06d}"
year = random.randint(1960, 1995)
month = f"{random.randint(1, 12):02d}"
day = f"{random.randint(1, 28):02d}"
birth_date = f"{year}{month}{day}"
sequence = f"{random.randint(1, 999):03d}"
check_code = random.randint(0, 9)
return f"{area_code}{birth_date}{sequence}{check_code}"
# 生成注册地址
def generate_address():
districts = ["朝阳区", "海淀区", "西城区", "东城区", "丰台区",
"浦东新区", "黄浦区", "静安区", "徐汇区", "天河区",
"福田区", "南山区", "罗湖区", "西湖区", "江干区"]
streets = ["建设路", "人民路", "解放路", "和平路", "文化路",
"科技路", "创新路", "发展路", "创业路", "工业路"]
buildings = ["大厦", "中心", "广场", "写字楼", "科技园"]
return f"{random.choice(districts)}{random.choice(streets)}{random.randint(1,999)}{random.choice(buildings)}"
# 生成成立日期
def generate_establish_date():
start_date = datetime(2000, 1, 1)
end_date = datetime(2024, 12, 31)
days_between = (end_date - start_date).days
random_days = random.randint(0, days_between)
return (start_date + timedelta(days=random_days)).strftime("%Y-%m-%d")
# 生成股东名称
def generate_shareholder():
types = [
lambda: f"{random.choice(org_prefixes)}{random.choice(['投资', '资本', '控股', '集团'])}有限公司",
lambda: generate_person_name() + random.choice(["", "(自然人)"])
]
return random.choice(types)()
# 生成备注
def generate_remark():
remarks = [
"", "", "", "",
"重点监控", "已整改", "存在风险", "待核查"
]
return random.choice(remarks)
# 生成单条机构数据
def generate_org_data(index):
# 随机决定有几个股东1-5个
shareholder_count = random.randint(1, 5)
shareholders = [generate_shareholder() for _ in range(shareholder_count)]
# 补齐到5个
while len(shareholders) < 5:
shareholders.append("")
# 证件类型
id_type = random.choice(id_types)
id_card = generate_id_card() if id_type == "身份证" else f"{random.choice(['A', 'B', 'C'])}{random.randint(10000, 99999)}"
return {
"id": index,
"orgName": f"{random.choice(org_prefixes)}{random.choice(org_types)}{random.choice(org_suffixes)}",
"creditCode": generate_credit_code(),
"entityType": random.choice(entity_types),
"corpNature": random.choice(corp_natures) if random.choice([True, False]) else "",
"industryClass": random.choice(industry_classes),
"industry": random.choice(industries),
"establishDate": generate_establish_date(),
"regAddress": generate_address(),
"legalRep": generate_person_name(),
"legalRepIdType": id_type,
"legalRepIdNo": id_card,
"shareholder1": shareholders[0],
"shareholder2": shareholders[1],
"shareholder3": shareholders[2],
"shareholder4": shareholders[3],
"shareholder5": shareholders[4],
"remark": generate_remark()
}
# 生成数据并保存到Excel
def generate_org_test_data(filename, count=1000, start_id=1):
# 读取模板获取表头
template_path = "机构中介黑名单模板_1769674571626.xlsx"
template_wb = openpyxl.load_workbook(template_path)
template_ws = template_wb.active
# 创建新工作簿
wb = Workbook()
ws = wb.active
ws.title = "机构中介黑名单"
# 复制表头
for cell in template_ws[1]:
new_cell = ws.cell(row=1, column=cell.column, value=cell.value)
# 生成数据
data_list = []
for i in range(count):
data = generate_org_data(start_id + i)
data_list.append(data)
# 按照模板列顺序写入数据
# 列顺序:机构名称、统一社会信用代码、主体类型、企业性质、行业分类、所属行业、
# 成立日期、注册地址、法定代表人、法定代表人证件类型、法定代表人证件号码、
# 股东1、股东2、股东3、股东4、股东5、备注
for row_idx, data in enumerate(data_list, start=2):
ws.cell(row=row_idx, column=1, value=data["orgName"])
ws.cell(row=row_idx, column=2, value=data["creditCode"])
ws.cell(row=row_idx, column=3, value=data["entityType"])
ws.cell(row=row_idx, column=4, value=data["corpNature"])
ws.cell(row=row_idx, column=5, value=data["industryClass"])
ws.cell(row=row_idx, column=6, value=data["industry"])
ws.cell(row=row_idx, column=7, value=data["establishDate"])
ws.cell(row=row_idx, column=8, value=data["regAddress"])
ws.cell(row=row_idx, column=9, value=data["legalRep"])
ws.cell(row=row_idx, column=10, value=data["legalRepIdType"])
ws.cell(row=row_idx, column=11, value=data["legalRepIdNo"])
ws.cell(row=row_idx, column=12, value=data["shareholder1"])
ws.cell(row=row_idx, column=13, value=data["shareholder2"])
ws.cell(row=row_idx, column=14, value=data["shareholder3"])
ws.cell(row=row_idx, column=15, value=data["shareholder4"])
ws.cell(row=row_idx, column=16, value=data["shareholder5"])
ws.cell(row=row_idx, column=17, value=data["remark"])
# 保存文件
wb.save(filename)
print(f"已生成文件: {filename}")
if __name__ == "__main__":
print("开始生成机构中介黑名单测试数据...")
generate_org_test_data("机构中介黑名单测试数据_1000条.xlsx", 1000, 1)
generate_org_test_data("机构中介黑名单测试数据_1000条_第2批.xlsx", 1000, 1001)
print("完成!")