152 lines
4.8 KiB
Python
152 lines
4.8 KiB
Python
|
|
import pandas as pd
|
|||
|
|
import random
|
|||
|
|
from openpyxl import load_workbook
|
|||
|
|
from openpyxl.styles import Font, PatternFill, Alignment
|
|||
|
|
|
|||
|
|
def calculate_id_check_code(id_17):
|
|||
|
|
"""
|
|||
|
|
计算身份证校验码(符合GB 11643-1999标准)
|
|||
|
|
"""
|
|||
|
|
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
|
|||
|
|
check_codes = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
|
|||
|
|
weighted_sum = sum(int(id_17[i]) * weights[i] for i in range(17))
|
|||
|
|
mod = weighted_sum % 11
|
|||
|
|
return check_codes[mod]
|
|||
|
|
|
|||
|
|
def generate_valid_person_id():
|
|||
|
|
"""
|
|||
|
|
生成符合校验标准的18位身份证号
|
|||
|
|
"""
|
|||
|
|
area_code = f"{random.randint(110000, 659999)}"
|
|||
|
|
birth_year = random.randint(1960, 2000)
|
|||
|
|
birth_month = f"{random.randint(1, 12):02d}"
|
|||
|
|
birth_day = f"{random.randint(1, 28):02d}"
|
|||
|
|
sequence_code = f"{random.randint(0, 999):03d}"
|
|||
|
|
|
|||
|
|
id_17 = f"{area_code}{birth_year}{birth_month}{birth_day}{sequence_code}"
|
|||
|
|
check_code = calculate_id_check_code(id_17)
|
|||
|
|
|
|||
|
|
return f"{id_17}{check_code}"
|
|||
|
|
|
|||
|
|
def validate_id_check_code(person_id):
|
|||
|
|
"""
|
|||
|
|
验证身份证校验码是否正确
|
|||
|
|
"""
|
|||
|
|
if len(str(person_id)) != 18:
|
|||
|
|
return False
|
|||
|
|
id_17 = str(person_id)[:17]
|
|||
|
|
check_code = str(person_id)[17]
|
|||
|
|
return calculate_id_check_code(id_17) == check_code.upper()
|
|||
|
|
|
|||
|
|
# 读取现有文件
|
|||
|
|
input_file = 'doc/test-data/intermediary/intermediary_test_data_1000_valid.xlsx'
|
|||
|
|
output_file = 'doc/test-data/intermediary/intermediary_test_data_1000_valid.xlsx'
|
|||
|
|
|
|||
|
|
print(f"正在读取文件: {input_file}")
|
|||
|
|
df = pd.read_excel(input_file)
|
|||
|
|
|
|||
|
|
print(f"总行数: {len(df)}\n")
|
|||
|
|
|
|||
|
|
# 统计各证件类型
|
|||
|
|
print("=== 原始证件类型分布 ===")
|
|||
|
|
for id_type, count in df['证件类型'].value_counts().items():
|
|||
|
|
print(f"{id_type}: {count}条")
|
|||
|
|
|
|||
|
|
# 找出所有非身份证类型的记录
|
|||
|
|
non_id_mask = df['证件类型'] != '身份证'
|
|||
|
|
non_id_count = non_id_mask.sum()
|
|||
|
|
id_card_count = (~non_id_mask).sum()
|
|||
|
|
|
|||
|
|
print(f"\n需要转换的证件数量: {non_id_count}条")
|
|||
|
|
print(f"现有身份证数量: {id_card_count}条(保持不变)")
|
|||
|
|
|
|||
|
|
# 备份现有身份证号码
|
|||
|
|
existing_id_cards = df[~non_id_mask]['证件号码*'].copy()
|
|||
|
|
print(f"\n已备份 {len(existing_id_cards)} 条现有身份证号码")
|
|||
|
|
|
|||
|
|
# 转换证件类型并生成新身份证号
|
|||
|
|
print(f"\n正在转换证件类型并生成身份证号码...")
|
|||
|
|
updated_count = 0
|
|||
|
|
|
|||
|
|
for idx in df[non_id_mask].index:
|
|||
|
|
# 修改证件类型为身份证
|
|||
|
|
df.loc[idx, '证件类型'] = '身份证'
|
|||
|
|
|
|||
|
|
# 生成新的身份证号
|
|||
|
|
new_id = generate_valid_person_id()
|
|||
|
|
df.loc[idx, '证件号码*'] = new_id
|
|||
|
|
updated_count += 1
|
|||
|
|
|
|||
|
|
if (updated_count % 100 == 0) or (updated_count == non_id_count):
|
|||
|
|
print(f"已处理 {updated_count}/{non_id_count} 条")
|
|||
|
|
|
|||
|
|
# 保存到Excel
|
|||
|
|
df.to_excel(output_file, index=False, engine='openpyxl')
|
|||
|
|
|
|||
|
|
# 格式化Excel文件
|
|||
|
|
wb = load_workbook(output_file)
|
|||
|
|
ws = wb.active
|
|||
|
|
|
|||
|
|
# 设置列宽
|
|||
|
|
ws.column_dimensions['A'].width = 15
|
|||
|
|
ws.column_dimensions['B'].width = 12
|
|||
|
|
ws.column_dimensions['C'].width = 12
|
|||
|
|
ws.column_dimensions['D'].width = 8
|
|||
|
|
ws.column_dimensions['E'].width = 12
|
|||
|
|
ws.column_dimensions['F'].width = 20
|
|||
|
|
ws.column_dimensions['G'].width = 15
|
|||
|
|
ws.column_dimensions['H'].width = 15
|
|||
|
|
ws.column_dimensions['I'].width = 30
|
|||
|
|
ws.column_dimensions['J'].width = 20
|
|||
|
|
ws.column_dimensions['K'].width = 20
|
|||
|
|
ws.column_dimensions['L'].width = 12
|
|||
|
|
ws.column_dimensions['M'].width = 15
|
|||
|
|
ws.column_dimensions['N'].width = 12
|
|||
|
|
ws.column_dimensions['O'].width = 20
|
|||
|
|
|
|||
|
|
# 设置表头样式
|
|||
|
|
header_fill = PatternFill(start_color='D3D3D3', end_color='D3D3D3', fill_type='solid')
|
|||
|
|
header_font = Font(bold=True)
|
|||
|
|
|
|||
|
|
for cell in ws[1]:
|
|||
|
|
cell.fill = header_fill
|
|||
|
|
cell.font = header_font
|
|||
|
|
cell.alignment = Alignment(horizontal='center', vertical='center')
|
|||
|
|
|
|||
|
|
# 冻结首行
|
|||
|
|
ws.freeze_panes = 'A2'
|
|||
|
|
|
|||
|
|
wb.save(output_file)
|
|||
|
|
|
|||
|
|
# 最终验证
|
|||
|
|
print("\n正在进行最终验证...")
|
|||
|
|
df_verify = pd.read_excel(output_file)
|
|||
|
|
|
|||
|
|
# 验证所有记录都是身份证
|
|||
|
|
all_id_card = (df_verify['证件类型'] == '身份证').all()
|
|||
|
|
print(f"所有证件类型均为身份证: {'✅ 是' if all_id_card else '❌ 否'}")
|
|||
|
|
|
|||
|
|
# 验证所有身份证号码
|
|||
|
|
all_valid = True
|
|||
|
|
invalid_count = 0
|
|||
|
|
for idx, person_id in df_verify['证件号码*'].items():
|
|||
|
|
if not validate_id_check_code(str(person_id)):
|
|||
|
|
all_valid = False
|
|||
|
|
invalid_count += 1
|
|||
|
|
if invalid_count <= 5:
|
|||
|
|
print(f"❌ 错误: {person_id}")
|
|||
|
|
|
|||
|
|
print(f"\n身份证号码验证:")
|
|||
|
|
print(f"总数: {len(df_verify)}条")
|
|||
|
|
print(f"校验通过: {len(df_verify) - invalid_count}条 ✅")
|
|||
|
|
if invalid_count > 0:
|
|||
|
|
print(f"校验失败: {invalid_count}条 ❌")
|
|||
|
|
|
|||
|
|
print(f"\n=== 更新完成 ===")
|
|||
|
|
print(f"文件: {output_file}")
|
|||
|
|
print(f"转换证件数量: {updated_count}条")
|
|||
|
|
print(f"保持不变: {len(existing_id_cards)}条")
|
|||
|
|
print(f"总记录数: {len(df_verify)}条")
|
|||
|
|
print(f"\n✅ 所有1000条记录现在都使用身份证类型")
|
|||
|
|
print(f"✅ 所有身份证号码已通过GB 11643-1999标准校验")
|