193 lines
7.9 KiB
Python
193 lines
7.9 KiB
Python
import openpyxl
|
||
from openpyxl import Workbook
|
||
import random
|
||
from datetime import datetime, timedelta
|
||
|
||
# 机构名称前缀
|
||
org_prefixes = [
|
||
"北京", "上海", "广州", "深圳", "杭州", "成都", "重庆", "武汉", "西安", "南京",
|
||
"天津", "苏州", "长沙", "郑州", "东莞", "青岛", "沈阳", "宁波", "厦门", "佛山"
|
||
]
|
||
|
||
# 机构类型关键词
|
||
org_types = [
|
||
"投资咨询", "资产管理", "证券投资", "基金管理", "股权投资",
|
||
"财富管理", "金融信息服务", "商务咨询", "企业咨询", "投资顾问"
|
||
]
|
||
|
||
# 机构后缀
|
||
org_suffixes = ["有限公司", "股份有限公司", "集团", "企业", "事务所"]
|
||
|
||
# 主体类型
|
||
entity_types = ["企业", "事业单位", "社会组织"]
|
||
|
||
# 企业性质
|
||
corp_natures = [
|
||
"有限责任公司", "股份有限公司", "国有独资", "集体企业",
|
||
"私营企业", "中外合资", "外商独资", "港澳台合资"
|
||
]
|
||
|
||
# 行业分类
|
||
industry_classes = ["金融业", "商务服务业", "科学研究和技术服务业"]
|
||
|
||
# 所属行业
|
||
industries = [
|
||
"货币金融服务", "资本市场服务", "保险业", "其他金融业",
|
||
"企业管理服务", "法律服务", "咨询与调查", "广告业",
|
||
"研究和试验发展", "专业技术服务业", "科技推广和应用服务业"
|
||
]
|
||
|
||
# 证件类型
|
||
id_types = ["身份证", "护照", "其他"]
|
||
|
||
# 统一社会信用代码生成(18位)
|
||
def generate_credit_code():
|
||
area_code = f"{random.randint(110000, 659900):06d}"
|
||
org_code = ''.join([str(random.randint(0, 9)) for _ in range(9)])
|
||
check_code = random.randint(0, 9)
|
||
return f"{area_code}{org_code}{check_code}"
|
||
|
||
# 生成法定代表人姓名
|
||
def generate_person_name():
|
||
surnames = ["王", "李", "张", "刘", "陈", "杨", "黄", "赵", "周", "吴",
|
||
"徐", "孙", "马", "胡", "朱", "郭", "何", "罗", "高", "林"]
|
||
names1 = ["伟", "芳", "娜", "敏", "静", "丽", "强", "磊", "军", "洋",
|
||
"勇", "艳", "杰", "娟", "涛", "明", "超", "秀英", "霞", "平"]
|
||
names2 = ["", "刚", "英", "华", "文", "平", "建", "国", "志", "海"]
|
||
return random.choice(surnames) + random.choice(names1) + random.choice(names2)
|
||
|
||
# 生成身份证号(18位)
|
||
def generate_id_card():
|
||
# 地区码(6位) + 出生日期(8位) + 顺序码(3位) + 校验码(1位)
|
||
area_code = f"{random.randint(110000, 659900):06d}"
|
||
year = random.randint(1960, 1995)
|
||
month = f"{random.randint(1, 12):02d}"
|
||
day = f"{random.randint(1, 28):02d}"
|
||
birth_date = f"{year}{month}{day}"
|
||
sequence = f"{random.randint(1, 999):03d}"
|
||
check_code = random.randint(0, 9)
|
||
return f"{area_code}{birth_date}{sequence}{check_code}"
|
||
|
||
# 生成注册地址
|
||
def generate_address():
|
||
districts = ["朝阳区", "海淀区", "西城区", "东城区", "丰台区",
|
||
"浦东新区", "黄浦区", "静安区", "徐汇区", "天河区",
|
||
"福田区", "南山区", "罗湖区", "西湖区", "江干区"]
|
||
streets = ["建设路", "人民路", "解放路", "和平路", "文化路",
|
||
"科技路", "创新路", "发展路", "创业路", "工业路"]
|
||
buildings = ["大厦", "中心", "广场", "写字楼", "科技园"]
|
||
return f"{random.choice(districts)}{random.choice(streets)}{random.randint(1,999)}号{random.choice(buildings)}"
|
||
|
||
# 生成成立日期
|
||
def generate_establish_date():
|
||
start_date = datetime(2000, 1, 1)
|
||
end_date = datetime(2024, 12, 31)
|
||
days_between = (end_date - start_date).days
|
||
random_days = random.randint(0, days_between)
|
||
return (start_date + timedelta(days=random_days)).strftime("%Y-%m-%d")
|
||
|
||
# 生成股东名称
|
||
def generate_shareholder():
|
||
types = [
|
||
lambda: f"{random.choice(org_prefixes)}{random.choice(['投资', '资本', '控股', '集团'])}有限公司",
|
||
lambda: generate_person_name() + random.choice(["", "(自然人)"])
|
||
]
|
||
return random.choice(types)()
|
||
|
||
# 生成备注
|
||
def generate_remark():
|
||
remarks = [
|
||
"", "", "", "",
|
||
"重点监控", "已整改", "存在风险", "待核查"
|
||
]
|
||
return random.choice(remarks)
|
||
|
||
# 生成单条机构数据
|
||
def generate_org_data(index):
|
||
# 随机决定有几个股东(1-5个)
|
||
shareholder_count = random.randint(1, 5)
|
||
shareholders = [generate_shareholder() for _ in range(shareholder_count)]
|
||
# 补齐到5个
|
||
while len(shareholders) < 5:
|
||
shareholders.append("")
|
||
|
||
# 证件类型
|
||
id_type = random.choice(id_types)
|
||
id_card = generate_id_card() if id_type == "身份证" else f"{random.choice(['A', 'B', 'C'])}{random.randint(10000, 99999)}"
|
||
|
||
return {
|
||
"id": index,
|
||
"orgName": f"{random.choice(org_prefixes)}{random.choice(org_types)}{random.choice(org_suffixes)}",
|
||
"creditCode": generate_credit_code(),
|
||
"entityType": random.choice(entity_types),
|
||
"corpNature": random.choice(corp_natures) if random.choice([True, False]) else "",
|
||
"industryClass": random.choice(industry_classes),
|
||
"industry": random.choice(industries),
|
||
"establishDate": generate_establish_date(),
|
||
"regAddress": generate_address(),
|
||
"legalRep": generate_person_name(),
|
||
"legalRepIdType": id_type,
|
||
"legalRepIdNo": id_card,
|
||
"shareholder1": shareholders[0],
|
||
"shareholder2": shareholders[1],
|
||
"shareholder3": shareholders[2],
|
||
"shareholder4": shareholders[3],
|
||
"shareholder5": shareholders[4],
|
||
"remark": generate_remark()
|
||
}
|
||
|
||
# 生成数据并保存到Excel
|
||
def generate_org_test_data(filename, count=1000, start_id=1):
|
||
# 读取模板获取表头
|
||
template_path = "机构中介黑名单模板_1769674571626.xlsx"
|
||
template_wb = openpyxl.load_workbook(template_path)
|
||
template_ws = template_wb.active
|
||
|
||
# 创建新工作簿
|
||
wb = Workbook()
|
||
ws = wb.active
|
||
ws.title = "机构中介黑名单"
|
||
|
||
# 复制表头
|
||
for cell in template_ws[1]:
|
||
new_cell = ws.cell(row=1, column=cell.column, value=cell.value)
|
||
|
||
# 生成数据
|
||
data_list = []
|
||
for i in range(count):
|
||
data = generate_org_data(start_id + i)
|
||
data_list.append(data)
|
||
|
||
# 按照模板列顺序写入数据
|
||
# 列顺序:机构名称、统一社会信用代码、主体类型、企业性质、行业分类、所属行业、
|
||
# 成立日期、注册地址、法定代表人、法定代表人证件类型、法定代表人证件号码、
|
||
# 股东1、股东2、股东3、股东4、股东5、备注
|
||
for row_idx, data in enumerate(data_list, start=2):
|
||
ws.cell(row=row_idx, column=1, value=data["orgName"])
|
||
ws.cell(row=row_idx, column=2, value=data["creditCode"])
|
||
ws.cell(row=row_idx, column=3, value=data["entityType"])
|
||
ws.cell(row=row_idx, column=4, value=data["corpNature"])
|
||
ws.cell(row=row_idx, column=5, value=data["industryClass"])
|
||
ws.cell(row=row_idx, column=6, value=data["industry"])
|
||
ws.cell(row=row_idx, column=7, value=data["establishDate"])
|
||
ws.cell(row=row_idx, column=8, value=data["regAddress"])
|
||
ws.cell(row=row_idx, column=9, value=data["legalRep"])
|
||
ws.cell(row=row_idx, column=10, value=data["legalRepIdType"])
|
||
ws.cell(row=row_idx, column=11, value=data["legalRepIdNo"])
|
||
ws.cell(row=row_idx, column=12, value=data["shareholder1"])
|
||
ws.cell(row=row_idx, column=13, value=data["shareholder2"])
|
||
ws.cell(row=row_idx, column=14, value=data["shareholder3"])
|
||
ws.cell(row=row_idx, column=15, value=data["shareholder4"])
|
||
ws.cell(row=row_idx, column=16, value=data["shareholder5"])
|
||
ws.cell(row=row_idx, column=17, value=data["remark"])
|
||
|
||
# 保存文件
|
||
wb.save(filename)
|
||
print(f"已生成文件: {filename}")
|
||
|
||
if __name__ == "__main__":
|
||
print("开始生成机构中介黑名单测试数据...")
|
||
generate_org_test_data("机构中介黑名单测试数据_1000条.xlsx", 1000, 1)
|
||
generate_org_test_data("机构中介黑名单测试数据_1000条_第2批.xlsx", 1000, 1001)
|
||
print("完成!")
|