新增征信员工HTML样本并改造Mock解析
This commit is contained in:
@@ -3,11 +3,13 @@ from typing import Optional
|
||||
from fastapi import APIRouter, File, Form, UploadFile
|
||||
|
||||
from services.credit_debug_service import CreditDebugService
|
||||
from services.credit_html_identity_service import CreditHtmlIdentityService
|
||||
from services.credit_payload_service import CreditPayloadService
|
||||
|
||||
router = APIRouter()
|
||||
payload_service = CreditPayloadService("config/credit_feature_schema.json")
|
||||
debug_service = CreditDebugService("config/credit_response_examples.json")
|
||||
identity_service = CreditHtmlIdentityService()
|
||||
|
||||
|
||||
@router.post("/xfeature-mngs/conversation/htmlEval")
|
||||
@@ -24,10 +26,13 @@ async def html_eval(
|
||||
if error_response:
|
||||
return error_response
|
||||
|
||||
html_content = await file.read()
|
||||
subject_identity = identity_service.extract_identity(html_content)
|
||||
payload = payload_service.generate_payload(
|
||||
model=model,
|
||||
h_type=hType,
|
||||
filename=file.filename or "credit.html",
|
||||
subject_identity=subject_identity,
|
||||
)
|
||||
return debug_service.build_success_response(payload)
|
||||
|
||||
|
||||
37
lsfx-mock-server/services/credit_html_identity_service.py
Normal file
37
lsfx-mock-server/services/credit_html_identity_service.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import html
|
||||
import re
|
||||
from typing import Dict, Union
|
||||
|
||||
|
||||
class CreditHtmlIdentityService:
|
||||
"""从征信样本 HTML 中提取员工姓名和身份证号。"""
|
||||
|
||||
_META_TEMPLATE = r'<meta\s+name="{name}"\s+content="(?P<value>[^"]+)"\s*/?>'
|
||||
|
||||
def extract_identity(self, html_content: Union[str, bytes]) -> Dict[str, str]:
|
||||
text = self._normalize_html(html_content)
|
||||
staff_name = self._extract_meta_content(text, "ccdi-staff-name")
|
||||
staff_id_card = self._extract_meta_content(text, "ccdi-staff-id-card")
|
||||
|
||||
identity: Dict[str, str] = {}
|
||||
if staff_name:
|
||||
identity["staff_name"] = staff_name
|
||||
if staff_id_card:
|
||||
identity["staff_id_card"] = staff_id_card
|
||||
return identity
|
||||
|
||||
def _extract_meta_content(self, text: str, meta_name: str) -> str:
|
||||
matched = re.search(
|
||||
self._META_TEMPLATE.format(name=re.escape(meta_name)),
|
||||
text,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
if not matched:
|
||||
return ""
|
||||
return html.unescape(matched.group("value")).strip()
|
||||
|
||||
@staticmethod
|
||||
def _normalize_html(html_content: Union[str, bytes]) -> str:
|
||||
if isinstance(html_content, bytes):
|
||||
return html_content.decode("utf-8", errors="ignore")
|
||||
return html_content
|
||||
@@ -2,7 +2,7 @@ import json
|
||||
import random
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
class CreditPayloadService:
|
||||
@@ -12,8 +12,15 @@ class CreditPayloadService:
|
||||
self.schema_path = schema_path
|
||||
self.schema = self._load_schema()
|
||||
|
||||
def generate_payload(self, model: str, h_type: str, filename: str) -> dict:
|
||||
rng = random.Random(self._build_seed(model, h_type, filename))
|
||||
def generate_payload(
|
||||
self,
|
||||
model: str,
|
||||
h_type: str,
|
||||
filename: str,
|
||||
subject_identity: Optional[Dict[str, str]] = None,
|
||||
) -> dict:
|
||||
identity = subject_identity or {}
|
||||
rng = random.Random(self._build_seed(model, h_type, filename, identity))
|
||||
payload = {
|
||||
"lx_header": {},
|
||||
"lx_debt": {},
|
||||
@@ -24,7 +31,7 @@ class CreditPayloadService:
|
||||
domain = item["domain"]
|
||||
field = item["field"]
|
||||
field_type = item["type"]
|
||||
payload[domain][field] = self._generate_value(field, field_type, item, rng)
|
||||
payload[domain][field] = self._generate_value(field, field_type, item, rng, identity)
|
||||
|
||||
return payload
|
||||
|
||||
@@ -35,8 +42,17 @@ class CreditPayloadService:
|
||||
return json.loads(schema_file.read_text(encoding="utf-8"))
|
||||
|
||||
@staticmethod
|
||||
def _build_seed(model: str, h_type: str, filename: str) -> str:
|
||||
return f"{model}|{h_type}|{filename}"
|
||||
def _build_seed(
|
||||
model: str,
|
||||
h_type: str,
|
||||
filename: str,
|
||||
subject_identity: Dict[str, str],
|
||||
) -> str:
|
||||
return (
|
||||
f"{model}|{h_type}|{filename}|"
|
||||
f"{subject_identity.get('staff_name', '')}|"
|
||||
f"{subject_identity.get('staff_id_card', '')}"
|
||||
)
|
||||
|
||||
def _generate_value(
|
||||
self,
|
||||
@@ -44,9 +60,10 @@ class CreditPayloadService:
|
||||
field_type: str,
|
||||
item: dict,
|
||||
rng: random.Random,
|
||||
subject_identity: Dict[str, str],
|
||||
) -> str:
|
||||
if field_type == "string":
|
||||
return self._generate_string(field, rng)
|
||||
return self._generate_string(field, rng, subject_identity)
|
||||
if field_type == "amount":
|
||||
return f"{rng.uniform(0, 500000):.2f}"
|
||||
if field_type == "count":
|
||||
@@ -55,7 +72,16 @@ class CreditPayloadService:
|
||||
return rng.choice(item["options"])
|
||||
raise ValueError(f"Unsupported field type: {field_type}")
|
||||
|
||||
def _generate_string(self, field: str, rng: random.Random) -> str:
|
||||
def _generate_string(
|
||||
self,
|
||||
field: str,
|
||||
rng: random.Random,
|
||||
subject_identity: Dict[str, str],
|
||||
) -> str:
|
||||
if field == "query_cert_no" and subject_identity.get("staff_id_card"):
|
||||
return subject_identity["staff_id_card"]
|
||||
if field == "query_cust_name" and subject_identity.get("staff_name"):
|
||||
return subject_identity["staff_name"]
|
||||
if field == "query_cert_no":
|
||||
return self._generate_cert_no(rng)
|
||||
if field == "query_cust_name":
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
import html
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
class StaffCreditHtmlExportService:
|
||||
"""将员工姓名和身份证导出为征信解析样本 HTML。"""
|
||||
|
||||
def __init__(self, staff_identity_repository):
|
||||
self.staff_identity_repository = staff_identity_repository
|
||||
|
||||
def export(self, output_dir: Path) -> List[Path]:
|
||||
output_path = Path(output_dir)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
self._clear_existing_html(output_path)
|
||||
|
||||
staff_list = self.staff_identity_repository.select_active_staff_identities()
|
||||
generated_files: List[Path] = []
|
||||
for index, staff in enumerate(staff_list, start=1):
|
||||
filename = self._build_filename(index, staff["staff_name"], staff["staff_id_card"])
|
||||
file_path = output_path / filename
|
||||
file_path.write_text(
|
||||
self.build_html(staff["staff_name"], staff["staff_id_card"]),
|
||||
encoding="utf-8",
|
||||
)
|
||||
generated_files.append(file_path)
|
||||
return generated_files
|
||||
|
||||
def _clear_existing_html(self, output_dir: Path) -> None:
|
||||
for html_file in output_dir.glob("*.html"):
|
||||
html_file.unlink()
|
||||
|
||||
def _build_filename(self, index: int, staff_name: str, staff_id_card: str) -> str:
|
||||
safe_name = self._sanitize_filename(staff_name)
|
||||
return f"{index:04d}_{safe_name}_{staff_id_card[-4:]}.html"
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_filename(name: str) -> str:
|
||||
sanitized = re.sub(r'[\\/:*?"<>|]+', "_", name).strip()
|
||||
return sanitized or "unknown"
|
||||
|
||||
@staticmethod
|
||||
def build_html(staff_name: str, staff_id_card: str) -> str:
|
||||
escaped_name = html.escape(staff_name, quote=True)
|
||||
escaped_id_card = html.escape(staff_id_card, quote=True)
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="ccdi-staff-name" content="{escaped_name}" />
|
||||
<meta name="ccdi-staff-id-card" content="{escaped_id_card}" />
|
||||
<title>征信解析员工样本</title>
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<h1>征信解析员工样本</h1>
|
||||
<p>姓名:{escaped_name}</p>
|
||||
<p>身份证号:{escaped_id_card}</p>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -85,3 +85,27 @@ class StaffIdentityRepository:
|
||||
"staff_id_card": staff["staff_id_card"],
|
||||
"family_id_cards": family_id_cards,
|
||||
}
|
||||
|
||||
def select_active_staff_identities(self) -> List[Dict[str, str]]:
|
||||
"""读取所有可用于征信样本导出的员工身份。"""
|
||||
with self._connect() as connection:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT s.name AS staff_name, s.id_card AS staff_id_card
|
||||
FROM ccdi_base_staff s
|
||||
WHERE COALESCE(TRIM(s.name), '') <> ''
|
||||
AND COALESCE(TRIM(s.id_card), '') <> ''
|
||||
AND s.status = '0'
|
||||
ORDER BY s.staff_id
|
||||
"""
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"staff_name": row["staff_name"],
|
||||
"staff_id_card": row["staff_id_card"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
@@ -87,4 +87,22 @@ def sample_inner_flow_request():
|
||||
@pytest.fixture
|
||||
def sample_credit_html_file():
|
||||
"""示例征信 HTML 文件。"""
|
||||
return ("credit.html", b"<html></html>", "text/html")
|
||||
html = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="ccdi-staff-name" content="测试员工" />
|
||||
<meta name="ccdi-staff-id-card" content="320101199001010030" />
|
||||
<title>征信解析员工样本</title>
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<h1>征信解析员工样本</h1>
|
||||
<p>姓名:测试员工</p>
|
||||
<p>身份证号:320101199001010030</p>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return ("credit.html", html.encode("utf-8"), "text/html")
|
||||
|
||||
@@ -10,6 +10,8 @@ def test_html_eval_should_return_credit_payload(client, sample_credit_html_file)
|
||||
assert data["status_code"] == "0"
|
||||
assert data["message"] == "成功"
|
||||
assert "lx_header" in data["payload"]
|
||||
assert data["payload"]["lx_header"]["query_cust_name"] == "测试员工"
|
||||
assert data["payload"]["lx_header"]["query_cert_no"] == "320101199001010030"
|
||||
|
||||
|
||||
def test_html_eval_should_return_err_99999_for_missing_model(client, sample_credit_html_file):
|
||||
|
||||
19
lsfx-mock-server/tests/test_credit_html_identity_service.py
Normal file
19
lsfx-mock-server/tests/test_credit_html_identity_service.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from services.credit_html_identity_service import CreditHtmlIdentityService
|
||||
|
||||
|
||||
def test_extract_identity_should_read_generated_html_meta_fields():
|
||||
service = CreditHtmlIdentityService()
|
||||
html = """
|
||||
<html>
|
||||
<head>
|
||||
<meta name="ccdi-staff-name" content="王五" />
|
||||
<meta name="ccdi-staff-id-card" content="330101198801010011" />
|
||||
</head>
|
||||
<body></body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
identity = service.extract_identity(html)
|
||||
|
||||
assert identity["staff_name"] == "王五"
|
||||
assert identity["staff_id_card"] == "330101198801010011"
|
||||
@@ -3,22 +3,30 @@ from services.credit_payload_service import CreditPayloadService
|
||||
|
||||
def test_generate_payload_should_be_stable_for_same_input():
|
||||
service = CreditPayloadService("config/credit_feature_schema.json")
|
||||
identity = {
|
||||
"staff_name": "张三",
|
||||
"staff_id_card": "330101198801010011",
|
||||
}
|
||||
|
||||
payload1 = service.generate_payload(
|
||||
model="LXCUSTALL",
|
||||
h_type="PERSON",
|
||||
filename="credit-report-a.html",
|
||||
subject_identity=identity,
|
||||
)
|
||||
payload2 = service.generate_payload(
|
||||
model="LXCUSTALL",
|
||||
h_type="PERSON",
|
||||
filename="credit-report-a.html",
|
||||
subject_identity=identity,
|
||||
)
|
||||
|
||||
assert payload1 == payload2
|
||||
assert set(payload1.keys()) == {"lx_header", "lx_debt", "lx_publictype"}
|
||||
assert len(payload1["lx_debt"]) == 21
|
||||
assert len(payload1["lx_publictype"]) == 6
|
||||
assert payload1["lx_header"]["query_cust_name"] == "张三"
|
||||
assert payload1["lx_header"]["query_cert_no"] == "330101198801010011"
|
||||
|
||||
|
||||
def test_generate_payload_should_use_schema_type_rules():
|
||||
@@ -28,6 +36,10 @@ def test_generate_payload_should_use_schema_type_rules():
|
||||
model="LXCUSTALL",
|
||||
h_type="ENTERPRISE",
|
||||
filename="credit-report-b.html",
|
||||
subject_identity={
|
||||
"staff_name": "李四",
|
||||
"staff_id_card": "330101199001010022",
|
||||
},
|
||||
)
|
||||
|
||||
assert payload["lx_debt"]["uncle_bank_house_state"] in {"正常", "逾期", "不良"}
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
from services.staff_credit_html_export_service import StaffCreditHtmlExportService
|
||||
|
||||
|
||||
class FakeStaffRepository:
|
||||
def select_active_staff_identities(self):
|
||||
return [
|
||||
{"staff_name": "张三", "staff_id_card": "110101199001010011"},
|
||||
{"staff_name": "李四", "staff_id_card": "110101199202023456"},
|
||||
]
|
||||
|
||||
|
||||
def test_export_should_write_one_html_per_staff(tmp_path):
|
||||
service = StaffCreditHtmlExportService(FakeStaffRepository())
|
||||
|
||||
generated_files = service.export(tmp_path)
|
||||
|
||||
assert len(generated_files) == 2
|
||||
first_html = generated_files[0].read_text(encoding="utf-8")
|
||||
second_html = generated_files[1].read_text(encoding="utf-8")
|
||||
|
||||
assert generated_files[0].name == "0001_张三_0011.html"
|
||||
assert generated_files[1].name == "0002_李四_3456.html"
|
||||
assert 'meta name="ccdi-staff-name" content="张三"' in first_html
|
||||
assert 'meta name="ccdi-staff-id-card" content="110101199001010011"' in first_html
|
||||
assert "姓名:李四" in second_html
|
||||
assert "身份证号:110101199202023456" in second_html
|
||||
Reference in New Issue
Block a user