import openpyxl from openpyxl import Workbook import random from datetime import datetime, timedelta # 机构名称前缀 org_prefixes = [ "北京", "上海", "广州", "深圳", "杭州", "成都", "重庆", "武汉", "西安", "南京", "天津", "苏州", "长沙", "郑州", "东莞", "青岛", "沈阳", "宁波", "厦门", "佛山" ] # 机构类型关键词 org_types = [ "投资咨询", "资产管理", "证券投资", "基金管理", "股权投资", "财富管理", "金融信息服务", "商务咨询", "企业咨询", "投资顾问" ] # 机构后缀 org_suffixes = ["有限公司", "股份有限公司", "集团", "企业", "事务所"] # 主体类型 entity_types = ["企业", "事业单位", "社会组织"] # 企业性质 corp_natures = [ "有限责任公司", "股份有限公司", "国有独资", "集体企业", "私营企业", "中外合资", "外商独资", "港澳台合资" ] # 行业分类 industry_classes = ["金融业", "商务服务业", "科学研究和技术服务业"] # 所属行业 industries = [ "货币金融服务", "资本市场服务", "保险业", "其他金融业", "企业管理服务", "法律服务", "咨询与调查", "广告业", "研究和试验发展", "专业技术服务业", "科技推广和应用服务业" ] # 证件类型 id_types = ["身份证", "护照", "其他"] # 统一社会信用代码生成(18位) def generate_credit_code(): area_code = f"{random.randint(110000, 659900):06d}" org_code = ''.join([str(random.randint(0, 9)) for _ in range(9)]) check_code = random.randint(0, 9) return f"{area_code}{org_code}{check_code}" # 生成法定代表人姓名 def generate_person_name(): surnames = ["王", "李", "张", "刘", "陈", "杨", "黄", "赵", "周", "吴", "徐", "孙", "马", "胡", "朱", "郭", "何", "罗", "高", "林"] names1 = ["伟", "芳", "娜", "敏", "静", "丽", "强", "磊", "军", "洋", "勇", "艳", "杰", "娟", "涛", "明", "超", "秀英", "霞", "平"] names2 = ["", "刚", "英", "华", "文", "平", "建", "国", "志", "海"] return random.choice(surnames) + random.choice(names1) + random.choice(names2) # 生成身份证号(18位) def generate_id_card(): # 地区码(6位) + 出生日期(8位) + 顺序码(3位) + 校验码(1位) area_code = f"{random.randint(110000, 659900):06d}" year = random.randint(1960, 1995) month = f"{random.randint(1, 12):02d}" day = f"{random.randint(1, 28):02d}" birth_date = f"{year}{month}{day}" sequence = f"{random.randint(1, 999):03d}" check_code = random.randint(0, 9) return f"{area_code}{birth_date}{sequence}{check_code}" # 生成注册地址 def generate_address(): districts = ["朝阳区", "海淀区", "西城区", "东城区", "丰台区", "浦东新区", "黄浦区", "静安区", "徐汇区", "天河区", "福田区", "南山区", "罗湖区", "西湖区", "江干区"] streets = ["建设路", "人民路", "解放路", "和平路", "文化路", "科技路", "创新路", "发展路", "创业路", "工业路"] buildings = ["大厦", "中心", "广场", "写字楼", "科技园"] return f"{random.choice(districts)}{random.choice(streets)}{random.randint(1,999)}号{random.choice(buildings)}" # 生成成立日期 def generate_establish_date(): start_date = datetime(2000, 1, 1) end_date = datetime(2024, 12, 31) days_between = (end_date - start_date).days random_days = random.randint(0, days_between) return (start_date + timedelta(days=random_days)).strftime("%Y-%m-%d") # 生成股东名称 def generate_shareholder(): types = [ lambda: f"{random.choice(org_prefixes)}{random.choice(['投资', '资本', '控股', '集团'])}有限公司", lambda: generate_person_name() + random.choice(["", "(自然人)"]) ] return random.choice(types)() # 生成备注 def generate_remark(): remarks = [ "", "", "", "", "重点监控", "已整改", "存在风险", "待核查" ] return random.choice(remarks) # 生成单条机构数据 def generate_org_data(index): # 随机决定有几个股东(1-5个) shareholder_count = random.randint(1, 5) shareholders = [generate_shareholder() for _ in range(shareholder_count)] # 补齐到5个 while len(shareholders) < 5: shareholders.append("") # 证件类型 id_type = random.choice(id_types) id_card = generate_id_card() if id_type == "身份证" else f"{random.choice(['A', 'B', 'C'])}{random.randint(10000, 99999)}" return { "id": index, "orgName": f"{random.choice(org_prefixes)}{random.choice(org_types)}{random.choice(org_suffixes)}", "creditCode": generate_credit_code(), "entityType": random.choice(entity_types), "corpNature": random.choice(corp_natures) if random.choice([True, False]) else "", "industryClass": random.choice(industry_classes), "industry": random.choice(industries), "establishDate": generate_establish_date(), "regAddress": generate_address(), "legalRep": generate_person_name(), "legalRepIdType": id_type, "legalRepIdNo": id_card, "shareholder1": shareholders[0], "shareholder2": shareholders[1], "shareholder3": shareholders[2], "shareholder4": shareholders[3], "shareholder5": shareholders[4], "remark": generate_remark() } # 生成数据并保存到Excel def generate_org_test_data(filename, count=1000, start_id=1): # 读取模板获取表头 template_path = "机构中介黑名单模板_1769674571626.xlsx" template_wb = openpyxl.load_workbook(template_path) template_ws = template_wb.active # 创建新工作簿 wb = Workbook() ws = wb.active ws.title = "机构中介黑名单" # 复制表头 for cell in template_ws[1]: new_cell = ws.cell(row=1, column=cell.column, value=cell.value) # 生成数据 data_list = [] for i in range(count): data = generate_org_data(start_id + i) data_list.append(data) # 按照模板列顺序写入数据 # 列顺序:机构名称、统一社会信用代码、主体类型、企业性质、行业分类、所属行业、 # 成立日期、注册地址、法定代表人、法定代表人证件类型、法定代表人证件号码、 # 股东1、股东2、股东3、股东4、股东5、备注 for row_idx, data in enumerate(data_list, start=2): ws.cell(row=row_idx, column=1, value=data["orgName"]) ws.cell(row=row_idx, column=2, value=data["creditCode"]) ws.cell(row=row_idx, column=3, value=data["entityType"]) ws.cell(row=row_idx, column=4, value=data["corpNature"]) ws.cell(row=row_idx, column=5, value=data["industryClass"]) ws.cell(row=row_idx, column=6, value=data["industry"]) ws.cell(row=row_idx, column=7, value=data["establishDate"]) ws.cell(row=row_idx, column=8, value=data["regAddress"]) ws.cell(row=row_idx, column=9, value=data["legalRep"]) ws.cell(row=row_idx, column=10, value=data["legalRepIdType"]) ws.cell(row=row_idx, column=11, value=data["legalRepIdNo"]) ws.cell(row=row_idx, column=12, value=data["shareholder1"]) ws.cell(row=row_idx, column=13, value=data["shareholder2"]) ws.cell(row=row_idx, column=14, value=data["shareholder3"]) ws.cell(row=row_idx, column=15, value=data["shareholder4"]) ws.cell(row=row_idx, column=16, value=data["shareholder5"]) ws.cell(row=row_idx, column=17, value=data["remark"]) # 保存文件 wb.save(filename) print(f"已生成文件: {filename}") if __name__ == "__main__": print("开始生成机构中介黑名单测试数据...") generate_org_test_data("机构中介黑名单测试数据_1000条.xlsx", 1000, 1) generate_org_test_data("机构中介黑名单测试数据_1000条_第2批.xlsx", 1000, 1001) print("完成!")