From 071c02192d8d79778b8b7b4d9c7841146abaf943 Mon Sep 17 00:00:00 2001 From: wkc <978997012@qq.com> Date: Wed, 25 Mar 2026 10:28:08 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dall=E6=A8=A1=E5=BC=8F?= =?UTF-8?q?=E6=9C=88=E5=9B=BA=E5=AE=9A=E6=94=B6=E5=85=A5=E8=A7=84=E5=88=99?= =?UTF-8?q?=E5=91=BD=E4=B8=AD=E9=9A=94=E7=A6=BB=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...-lsfx-mock-all-hit-sql-alignment-record.md | 101 ++++++++++++++++-- lsfx-mock-server/services/file_service.py | 75 +++++++++++++ lsfx-mock-server/tests/test_file_service.py | 50 +++++++++ .../tests/test_statement_service.py | 52 +++++++++ 4 files changed, 268 insertions(+), 10 deletions(-) diff --git a/docs/reports/implementation/2026-03-25-lsfx-mock-all-hit-sql-alignment-record.md b/docs/reports/implementation/2026-03-25-lsfx-mock-all-hit-sql-alignment-record.md index dc188c4d..9b66eec2 100644 --- a/docs/reports/implementation/2026-03-25-lsfx-mock-all-hit-sql-alignment-record.md +++ b/docs/reports/implementation/2026-03-25-lsfx-mock-all-hit-sql-alignment-record.md @@ -81,7 +81,51 @@ - all 模式下噪声收敛为安全支出噪声 - 接口链路能返回 5 条目标规则对应的关键样本 -## 4. 验证命令 +## 4. 二次修正 + +### 4.1 发现的问题 + +在真实后端联调时,前 4 条规则已经命中,但 `MONTHLY_FIXED_INCOME` 仍未命中。 + +排查结论: + +- 问题不在 SQL,也不在 `7200 > 5000` 阈值 +- 根因是 `all` 模式下每个 `logId` 都复制整套规则样本 +- 同一员工文件内同时混入了: + - `FIXED_COUNTERPARTY_TRANSFER` 的季度转入 + - `SUSPICIOUS_INCOME_KEYWORD` 的大额收入 + - 多条大额交易/现金存款/结汇类正向入账样本 +- 这些样本都会被 `MONTHLY_FIXED_INCOME` 的 SQL 汇总到员工月收入中,导致月数和波动率口径被污染 + +### 4.2 修正方案 + +修改文件: + +- [file_service.py](/Users/wkc/Desktop/ccdi/ccdi/lsfx-mock-server/services/file_service.py) +- [test_file_service.py](/Users/wkc/Desktop/ccdi/ccdi/lsfx-mock-server/tests/test_file_service.py) +- [test_statement_service.py](/Users/wkc/Desktop/ccdi/ccdi/lsfx-mock-server/tests/test_statement_service.py) + +调整内容: + +- 新增项目内 `all` 模式规则重平衡逻辑 +- 当同一项目只有 1 个文件时,保留原始 all 模式规则计划,不额外覆盖 +- 当同一项目存在 2 个及以上文件时: + - 第 1 个文件切换为 `MONTHLY_FIXED_INCOME` 隔离档 + - 从隔离档中剔除会污染非工资月流入聚合的正向入账规则: + - `SINGLE_LARGE_INCOME` + - `CUMULATIVE_INCOME` + - `ANNUAL_TURNOVER` + - `LARGE_CASH_DEPOSIT` + - `FREQUENT_CASH_DEPOSIT` + - `SUSPICIOUS_INCOME_KEYWORD` + - `FOREX_SELL_AMT` + - `FIXED_COUNTERPARTY_TRANSFER` + - 其余文件继续承载完整 all 模式规则计划 +- 新增两条红绿测试: + - all 模式多文件下必须出现“月固定收入隔离档” + - 隔离档生成的员工非工资月收入必须稳定为 6 个月 `7200.0` + +## 5. 验证命令 执行命令: @@ -92,10 +136,52 @@ python3 -m pytest tests/test_file_service.py tests/test_statement_service.py tes 验证结果: -- `49 passed` +- `51 passed` - `0 failed` -## 5. 前端影响 +## 6. 真实链路复验 + +复验环境: + +- 后端:`http://localhost:62318` +- Mock:`python3 main.py --rule-hit-mode all` +- 复验项目:`project_id = 54` + +复验步骤: + +1. 调用 `/ccdi/project` 新建项目 `all命中复验-20260325-102625` +2. 调用 `/ccdi/file-upload/pull-bank-info` 拉取 3 个身份证号的流水 +3. 等待 `ccdi_file_upload_record` 全部进入 `parsed_success` +4. 确认 `ccdi_bank_tag_task` 最新任务状态为 `SUCCESS` +5. 查询 `ccdi_bank_statement_tag_result` 核验 5 条目标规则 + +复验结果: + +- `SPECIAL_AMOUNT_TRANSACTION`:`3` 条 +- `MONTHLY_FIXED_INCOME`:`1` 条 +- `SUSPICIOUS_INCOME_KEYWORD`:`2` 条 +- `FIXED_COUNTERPARTY_TRANSFER`:`2` 条 +- `LOW_INCOME_RELATIVE_LARGE_TRANSACTION`:`3` 条 + +关键命中样例: + +- `MONTHLY_FIXED_INCOME` + - `近12个月有 6 个月固定收入超过阈值,月均收入 7200.00 元` +- `SPECIAL_AMOUNT_TRANSACTION` + - `与非配偶/子女交易出现特殊金额 1314.00 元` +- `SUSPICIOUS_INCOME_KEYWORD` + - `摘要命中收入关键词,摘要“劳务费发放”` +- `FIXED_COUNTERPARTY_TRANSFER` + - `固定对手“兰溪零工服务有限公司”在 4 个季度累计转入位于区间 [3000, 15000] 元` +- `LOW_INCOME_RELATIVE_LARGE_TRANSACTION` + - `低收入关系人累计交易 ... 元,命中关系人数 1 人` + +结论: + +- 这 5 条目标规则在真实后端链路中均已命中 +- `MONTHLY_FIXED_INCOME` 的遗漏问题已通过 all 模式多文件重平衡修复 + +## 7. 前端影响 本次未修改 `ruoyi-ui`。 @@ -105,11 +191,6 @@ python3 -m pytest tests/test_file_service.py tests/test_statement_service.py tes - 本轮实现仅影响 Mock 样本和关联表基线 - 前端无需适配代码 -## 6. 未完成项 +## 8. 未完成项 -本次完成了 Mock 侧单元测试和接口级集成测试,但未在本记录中执行真实后端联调与数据库结果核验: - -- 未实际启动本地后端并拉取流水 -- 未实际查询 `ccdi_bank_statement_tag_result` - -如果后续需要闭环到项目库结果,应再补一次真实联调记录。 +本次范围内无新增未完成项。 diff --git a/lsfx-mock-server/services/file_service.py b/lsfx-mock-server/services/file_service.py index 9216e95b..7f4a769a 100644 --- a/lsfx-mock-server/services/file_service.py +++ b/lsfx-mock-server/services/file_service.py @@ -48,6 +48,21 @@ PHASE2_BASELINE_RULE_CODES = [ "SUPPLIER_CONCENTRATION", ] +MONTHLY_FIXED_INCOME_ISOLATED_LARGE_TRANSACTION_RULE_CODES = { + "SINGLE_LARGE_INCOME", + "CUMULATIVE_INCOME", + "ANNUAL_TURNOVER", + "LARGE_CASH_DEPOSIT", + "FREQUENT_CASH_DEPOSIT", +} +MONTHLY_FIXED_INCOME_ISOLATED_PHASE1_RULE_CODES = { + "SUSPICIOUS_INCOME_KEYWORD", + "FOREX_SELL_AMT", +} +MONTHLY_FIXED_INCOME_ISOLATED_PHASE2_RULE_CODES = { + "FIXED_COUNTERPARTY_TRANSFER", +} + RULE_CONFLICT_GROUPS = [] ALL_MODE_STATEMENT_BASELINE_RULE_CODES = { "LOW_INCOME_RELATIVE_LARGE_TRANSACTION", @@ -209,6 +224,28 @@ class FileService: "phase2_baseline_hit_rules": list(PHASE2_BASELINE_RULE_CODES), } + def _build_monthly_fixed_income_isolated_rule_hit_plan(self) -> dict: + """为月固定收入准备不受正向流入污染的 all 模式计划。""" + full_plan = self._build_all_compatible_rule_hit_plan() + return { + "large_transaction_hit_rules": [ + rule_code + for rule_code in full_plan["large_transaction_hit_rules"] + if rule_code not in MONTHLY_FIXED_INCOME_ISOLATED_LARGE_TRANSACTION_RULE_CODES + ], + "phase1_hit_rules": [ + rule_code + for rule_code in full_plan["phase1_hit_rules"] + if rule_code not in MONTHLY_FIXED_INCOME_ISOLATED_PHASE1_RULE_CODES + ], + "phase2_statement_hit_rules": [ + rule_code + for rule_code in full_plan["phase2_statement_hit_rules"] + if rule_code not in MONTHLY_FIXED_INCOME_ISOLATED_PHASE2_RULE_CODES + ], + "phase2_baseline_hit_rules": list(full_plan["phase2_baseline_hit_rules"]), + } + def _apply_conflict_groups(self, rule_plan: dict) -> dict: """按显式互斥组裁剪规则计划,同组仅保留固定优先级的首个规则。""" resolved_plan = {plan_key: list(rule_codes) for plan_key, rule_codes in rule_plan.items()} @@ -235,6 +272,42 @@ class FileService: return self._apply_conflict_groups(self._build_all_compatible_rule_hit_plan()) return self._build_subset_rule_hit_plan(log_id) + def _apply_rule_hit_plan_to_record(self, file_record: FileRecord, rule_hit_plan: dict) -> None: + """将规则命中计划回填到指定文件记录。""" + file_record.large_transaction_hit_rules = list( + rule_hit_plan.get("large_transaction_hit_rules", []) + ) + file_record.phase1_hit_rules = list(rule_hit_plan.get("phase1_hit_rules", [])) + file_record.phase2_statement_hit_rules = list( + rule_hit_plan.get("phase2_statement_hit_rules", []) + ) + file_record.phase2_baseline_hit_rules = list( + rule_hit_plan.get("phase2_baseline_hit_rules", []) + ) + + def _rebalance_all_mode_group_rule_plans(self, group_id: int) -> None: + """同项目存在多文件时,隔离月固定收入样本,避免被其他正向流入规则污染。""" + if settings.RULE_HIT_MODE != "all": + return + + group_records = [ + record for record in self.file_records.values() + if record.group_id == group_id + ] + if not group_records: + return + + full_plan = self._apply_conflict_groups(self._build_all_compatible_rule_hit_plan()) + if len(group_records) == 1: + return + + monthly_safe_plan = self._apply_conflict_groups( + self._build_monthly_fixed_income_isolated_rule_hit_plan() + ) + self._apply_rule_hit_plan_to_record(group_records[0], monthly_safe_plan) + for record in group_records[1:]: + self._apply_rule_hit_plan_to_record(record, full_plan) + def _create_file_record( self, *, @@ -375,6 +448,7 @@ class FileService: # 存储记录 self.file_records[log_id] = file_record + self._rebalance_all_mode_group_rule_plans(group_id) self._apply_phase2_baselines(file_record) # 添加后台任务(延迟解析) @@ -705,6 +779,7 @@ class FileService: ) self.file_records[log_id] = file_record + self._rebalance_all_mode_group_rule_plans(group_id) self._apply_phase2_baselines(file_record) # 返回成功的响应,包含logId数组 diff --git a/lsfx-mock-server/tests/test_file_service.py b/lsfx-mock-server/tests/test_file_service.py index 4fdb3f6f..26510cde 100644 --- a/lsfx-mock-server/tests/test_file_service.py +++ b/lsfx-mock-server/tests/test_file_service.py @@ -228,6 +228,56 @@ def test_build_rule_hit_plan_should_keep_sql_aligned_target_rules_in_all_mode(mo assert "FIXED_COUNTERPARTY_TRANSFER" in plan["phase2_statement_hit_rules"] +def test_fetch_inner_flow_should_rebalance_all_mode_records_for_monthly_fixed_income(monkeypatch): + monkeypatch.setattr("services.file_service.settings.RULE_HIT_MODE", "all") + service = FileService(staff_identity_repository=FakeStaffIdentityRepository()) + + monkeypatch.setattr( + service, + "_generate_primary_binding", + lambda: ("全命中主体", "6222000099990001"), + ) + + request = { + "groupId": 1001, + "dataChannelCode": "test_code", + "requestDateId": 20240101, + "dataStartDateId": 20240101, + "dataEndDateId": 20240131, + "uploadUserId": 902001, + } + + first_log_id = service.fetch_inner_flow( + { + **request, + "customerNo": "customer_monthly_safe_1", + } + )["data"][0] + second_log_id = service.fetch_inner_flow( + { + **request, + "customerNo": "customer_monthly_safe_2", + } + )["data"][0] + + first_record = service.file_records[first_log_id] + second_record = service.file_records[second_log_id] + + assert "MONTHLY_FIXED_INCOME" in first_record.phase2_statement_hit_rules + assert "FIXED_COUNTERPARTY_TRANSFER" not in first_record.phase2_statement_hit_rules + assert "SUSPICIOUS_INCOME_KEYWORD" not in first_record.phase1_hit_rules + assert "FOREX_SELL_AMT" not in first_record.phase1_hit_rules + assert "SINGLE_LARGE_INCOME" not in first_record.large_transaction_hit_rules + assert "CUMULATIVE_INCOME" not in first_record.large_transaction_hit_rules + assert "ANNUAL_TURNOVER" not in first_record.large_transaction_hit_rules + assert "LARGE_CASH_DEPOSIT" not in first_record.large_transaction_hit_rules + assert "FREQUENT_CASH_DEPOSIT" not in first_record.large_transaction_hit_rules + + assert "FIXED_COUNTERPARTY_TRANSFER" in second_record.phase2_statement_hit_rules + assert "SUSPICIOUS_INCOME_KEYWORD" in second_record.phase1_hit_rules + assert "SINGLE_LARGE_INCOME" in second_record.large_transaction_hit_rules + + def test_build_rule_hit_plan_should_not_include_placeholder_rules_in_all_mode(monkeypatch): monkeypatch.setattr("services.file_service.settings.RULE_HIT_MODE", "all") service = FileService(staff_identity_repository=FakeStaffIdentityRepository()) diff --git a/lsfx-mock-server/tests/test_statement_service.py b/lsfx-mock-server/tests/test_statement_service.py index 63eed1cb..b3c76899 100644 --- a/lsfx-mock-server/tests/test_statement_service.py +++ b/lsfx-mock-server/tests/test_statement_service.py @@ -258,6 +258,58 @@ def test_generate_statements_should_stay_within_single_employee_scope_per_log_id assert {item["cretNo"] for item in statements}.issubset(allowed_id_cards) +def test_all_mode_monthly_fixed_income_log_should_keep_monthly_income_stable(monkeypatch): + monkeypatch.setattr("services.file_service.settings.RULE_HIT_MODE", "all") + file_service = FileService(staff_identity_repository=FakeStaffIdentityRepository()) + service = StatementService(file_service=file_service) + + monkeypatch.setattr( + file_service, + "_generate_primary_binding", + lambda: ("全命中主体", "6222000012345678"), + ) + + request = { + "groupId": 1001, + "dataChannelCode": "test_code", + "requestDateId": 20240101, + "dataStartDateId": 20240101, + "dataEndDateId": 20240131, + "uploadUserId": 902001, + } + + first_log_id = file_service.fetch_inner_flow( + {**request, "customerNo": "customer_monthly_income_1"} + )["data"][0] + file_service.fetch_inner_flow( + {**request, "customerNo": "customer_monthly_income_2"} + ) + record = file_service.file_records[first_log_id] + + statements = service._generate_statements(group_id=1001, log_id=first_log_id, count=200) + + monthly_amounts = defaultdict(float) + for item in statements: + if item["cretNo"] != record.staff_id_card: + continue + if item["crAmount"] <= 0: + continue + if not item["customerName"]: + continue + if item["leName"] == item["customerName"]: + continue + if item["customerName"] == "浙江兰溪农村商业银行股份有限公司" and ( + any(keyword in item["userMemo"] for keyword in ("代发", "工资", "奖金", "薪酬", "薪金", "补贴", "薪", "年终奖", "年金", "加班费", "劳务费", "劳务外包", "提成", "劳务派遣", "绩效", "酬劳", "PAYROLL", "SALA", "CPF", "directors", "批量代付")) + or any(keyword in item["cashType"] for keyword in ("代发", "工资", "劳务费")) + ): + continue + monthly_amounts[item["trxDate"][:7]] += item["crAmount"] + + assert monthly_amounts + assert len(monthly_amounts) == 6 + assert set(monthly_amounts.values()) == {7200.0} + + def test_generate_statements_should_only_use_recognizable_identity_cards(): """命中样本和随机噪声都只能使用现库可识别的身份证号。""" service = StatementService()