Files
ccdi/doc/test-data/test-data/intermediary/fix-id-cards.py
2026-02-09 14:28:25 +08:00

144 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import random
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment
def calculate_id_check_code(id_17):
"""
计算身份证校验码符合GB 11643-1999标准
"""
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
weighted_sum = sum(int(id_17[i]) * weights[i] for i in range(17))
mod = weighted_sum % 11
return check_codes[mod]
def generate_valid_person_id():
"""
生成符合校验标准的18位身份证号
"""
area_code = f"{random.randint(110000, 659999)}"
birth_year = random.randint(1960, 2000)
birth_month = f"{random.randint(1, 12):02d}"
birth_day = f"{random.randint(1, 28):02d}"
sequence_code = f"{random.randint(0, 999):03d}"
id_17 = f"{area_code}{birth_year}{birth_month}{birth_day}{sequence_code}"
check_code = calculate_id_check_code(id_17)
return f"{id_17}{check_code}"
def validate_id_check_code(person_id):
"""
验证身份证校验码是否正确
"""
if len(person_id) != 18:
return False
id_17 = person_id[:17]
check_code = person_id[17]
return calculate_id_check_code(id_17) == check_code.upper()
# 读取现有文件
input_file = 'doc/test-data/intermediary/intermediary_test_data_1000_valid.xlsx'
output_file = 'doc/test-data/intermediary/intermediary_test_data_1000_valid.xlsx'
print(f"正在读取文件: {input_file}")
df = pd.read_excel(input_file)
print(f"总行数: {len(df)}")
# 找出所有身份证类型的记录
id_card_mask = df['证件类型'] == '身份证'
id_card_count = id_card_mask.sum()
print(f"\n找到 {id_card_count} 条身份证记录")
# 验证现有身份证
print("\n正在验证现有身份证校验码...")
invalid_count = 0
invalid_indices = []
for idx in df[id_card_mask].index:
person_id = str(df.loc[idx, '证件号码*'])
if not validate_id_check_code(person_id):
invalid_count += 1
invalid_indices.append(idx)
print(f"校验正确: {id_card_count - invalid_count}")
print(f"校验错误: {invalid_count}")
if invalid_count > 0:
print(f"\n需要重新生成 {invalid_count} 条身份证号码")
# 重新生成所有身份证号码
print(f"\n正在重新生成所有身份证号码...")
updated_count = 0
for idx in df[id_card_mask].index:
old_id = df.loc[idx, '证件号码*']
new_id = generate_valid_person_id()
df.loc[idx, '证件号码*'] = new_id
updated_count += 1
if (updated_count % 50 == 0) or (updated_count == id_card_count):
print(f"已更新 {updated_count}/{id_card_count}")
# 保存到Excel
df.to_excel(output_file, index=False, engine='openpyxl')
# 格式化Excel文件
wb = load_workbook(output_file)
ws = wb.active
# 设置列宽
ws.column_dimensions['A'].width = 15
ws.column_dimensions['B'].width = 12
ws.column_dimensions['C'].width = 12
ws.column_dimensions['D'].width = 8
ws.column_dimensions['E'].width = 12
ws.column_dimensions['F'].width = 20
ws.column_dimensions['G'].width = 15
ws.column_dimensions['H'].width = 15
ws.column_dimensions['I'].width = 30
ws.column_dimensions['J'].width = 20
ws.column_dimensions['K'].width = 20
ws.column_dimensions['L'].width = 12
ws.column_dimensions['M'].width = 15
ws.column_dimensions['N'].width = 12
ws.column_dimensions['O'].width = 20
# 设置表头样式
header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid')
header_font = Font(bold=True)
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal='center', vertical='center')
# 冻结首行
ws.freeze_panes = 'A2'
wb.save(output_file)
# 最终验证
print("\n正在进行最终验证...")
df_verify = pd.read_excel(output_file)
id_cards = df_verify[df_verify['证件类型'] == '身份证']['证件号码*']
all_valid = True
for idx, person_id in id_cards.items():
if not validate_id_check_code(str(person_id)):
all_valid = False
print(f"❌ 错误: {person_id}")
if all_valid:
print(f"✅ 所有 {len(id_cards)} 条身份证号码校验通过!")
else:
print("❌ 存在校验失败的身份证号码")
print(f"\n=== 更新完成 ===")
print(f"文件: {output_file}")
print(f"更新身份证数量: {updated_count}")
print(f"其他证件类型保持不变")