Files
ccdi/doc/test-data/intermediary/convert-all-to-idcard.py
2026-02-09 00:13:32 +08:00

152 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import random
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment
def calculate_id_check_code(id_17):
"""
计算身份证校验码符合GB 11643-1999标准
"""
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
weighted_sum = sum(int(id_17[i]) * weights[i] for i in range(17))
mod = weighted_sum % 11
return check_codes[mod]
def generate_valid_person_id():
"""
生成符合校验标准的18位身份证号
"""
area_code = f"{random.randint(110000, 659999)}"
birth_year = random.randint(1960, 2000)
birth_month = f"{random.randint(1, 12):02d}"
birth_day = f"{random.randint(1, 28):02d}"
sequence_code = f"{random.randint(0, 999):03d}"
id_17 = f"{area_code}{birth_year}{birth_month}{birth_day}{sequence_code}"
check_code = calculate_id_check_code(id_17)
return f"{id_17}{check_code}"
def validate_id_check_code(person_id):
"""
验证身份证校验码是否正确
"""
if len(str(person_id)) != 18:
return False
id_17 = str(person_id)[:17]
check_code = str(person_id)[17]
return calculate_id_check_code(id_17) == check_code.upper()
# 读取现有文件
input_file = 'doc/test-data/intermediary/intermediary_test_data_1000_valid.xlsx'
output_file = 'doc/test-data/intermediary/intermediary_test_data_1000_valid.xlsx'
print(f"正在读取文件: {input_file}")
df = pd.read_excel(input_file)
print(f"总行数: {len(df)}\n")
# 统计各证件类型
print("=== 原始证件类型分布 ===")
for id_type, count in df['证件类型'].value_counts().items():
print(f"{id_type}: {count}")
# 找出所有非身份证类型的记录
non_id_mask = df['证件类型'] != '身份证'
non_id_count = non_id_mask.sum()
id_card_count = (~non_id_mask).sum()
print(f"\n需要转换的证件数量: {non_id_count}")
print(f"现有身份证数量: {id_card_count}条(保持不变)")
# 备份现有身份证号码
existing_id_cards = df[~non_id_mask]['证件号码*'].copy()
print(f"\n已备份 {len(existing_id_cards)} 条现有身份证号码")
# 转换证件类型并生成新身份证号
print(f"\n正在转换证件类型并生成身份证号码...")
updated_count = 0
for idx in df[non_id_mask].index:
# 修改证件类型为身份证
df.loc[idx, '证件类型'] = '身份证'
# 生成新的身份证号
new_id = generate_valid_person_id()
df.loc[idx, '证件号码*'] = new_id
updated_count += 1
if (updated_count % 100 == 0) or (updated_count == non_id_count):
print(f"已处理 {updated_count}/{non_id_count}")
# 保存到Excel
df.to_excel(output_file, index=False, engine='openpyxl')
# 格式化Excel文件
wb = load_workbook(output_file)
ws = wb.active
# 设置列宽
ws.column_dimensions['A'].width = 15
ws.column_dimensions['B'].width = 12
ws.column_dimensions['C'].width = 12
ws.column_dimensions['D'].width = 8
ws.column_dimensions['E'].width = 12
ws.column_dimensions['F'].width = 20
ws.column_dimensions['G'].width = 15
ws.column_dimensions['H'].width = 15
ws.column_dimensions['I'].width = 30
ws.column_dimensions['J'].width = 20
ws.column_dimensions['K'].width = 20
ws.column_dimensions['L'].width = 12
ws.column_dimensions['M'].width = 15
ws.column_dimensions['N'].width = 12
ws.column_dimensions['O'].width = 20
# 设置表头样式
header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid')
header_font = Font(bold=True)
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal='center', vertical='center')
# 冻结首行
ws.freeze_panes = 'A2'
wb.save(output_file)
# 最终验证
print("\n正在进行最终验证...")
df_verify = pd.read_excel(output_file)
# 验证所有记录都是身份证
all_id_card = (df_verify['证件类型'] == '身份证').all()
print(f"所有证件类型均为身份证: {'✅ 是' if all_id_card else '❌ 否'}")
# 验证所有身份证号码
all_valid = True
invalid_count = 0
for idx, person_id in df_verify['证件号码*'].items():
if not validate_id_check_code(str(person_id)):
all_valid = False
invalid_count += 1
if invalid_count <= 5:
print(f"❌ 错误: {person_id}")
print(f"\n身份证号码验证:")
print(f"总数: {len(df_verify)}")
print(f"校验通过: {len(df_verify) - invalid_count}条 ✅")
if invalid_count > 0:
print(f"校验失败: {invalid_count}条 ❌")
print(f"\n=== 更新完成 ===")
print(f"文件: {output_file}")
print(f"转换证件数量: {updated_count}")
print(f"保持不变: {len(existing_id_cards)}")
print(f"总记录数: {len(df_verify)}")
print(f"\n✅ 所有1000条记录现在都使用身份证类型")
print(f"✅ 所有身份证号码已通过GB 11643-1999标准校验")