Merge branch 'dev' into codex/bank-tag-real-rule-phase2-backend

This commit is contained in:
wkc
2026-03-20 16:28:47 +08:00
21 changed files with 2178 additions and 233 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@@ -328,7 +328,15 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
fur.file_name AS originalFileName,
fur.upload_time AS uploadTime
FROM ccdi_bank_statement bs
LEFT JOIN ccdi_file_upload_record fur ON fur.log_id = bs.batch_id AND fur.project_id = bs.project_id
LEFT JOIN (
SELECT latest_record.project_id, latest_record.log_id, latest_record.file_name, latest_record.upload_time
FROM ccdi_file_upload_record latest_record
INNER JOIN (
SELECT project_id, log_id, MAX(id) AS max_id
FROM ccdi_file_upload_record
GROUP BY project_id, log_id
) latest_meta ON latest_meta.max_id = latest_record.id
) fur ON fur.log_id = bs.batch_id AND fur.project_id = bs.project_id
WHERE bs.bank_statement_id = #{bankStatementId}
</select>

View File

@@ -121,7 +121,10 @@ class CcdiBankStatementMapperXmlTest {
String xml = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
assertTrue(
xml.contains("LEFT JOIN ccdi_file_upload_record fur ON fur.log_id = bs.batch_id AND fur.project_id = bs.project_id"),
xml.contains("LEFT JOIN (")
&& xml.contains("SELECT latest_record.project_id, latest_record.log_id, latest_record.file_name, latest_record.upload_time")
&& xml.contains("MAX(id) AS max_id")
&& xml.contains("fur.log_id = bs.batch_id AND fur.project_id = bs.project_id"),
xml
);
assertTrue(xml.contains("fur.file_name AS originalFileName"), xml);

View File

@@ -0,0 +1,48 @@
package com.ruoyi.ccdi.project.sql;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertAll;
import static org.junit.jupiter.api.Assertions.assertTrue;
class CcdiBankTagRuleSqlMetadataTest {
@Test
void phase1MetadataSql_shouldAlignInitAndMigrationScripts() throws IOException {
String initSql = readProjectFile("sql", "2026-03-16-bank-tagging.sql");
String migrationSql = readProjectFile("sql", "migration", "2026-03-20-sync-bank-tag-phase1-rule-metadata.sql");
assertPhase1Metadata(initSql);
assertPhase1Metadata(migrationSql);
}
private void assertPhase1Metadata(String sqlContent) {
assertAll(
() -> assertTrue(sqlContent.contains("'FOREX_BUY_AMT'")
&& sqlContent.contains("'SINGLE_PURCHASE_AMOUNT'"),
"FOREX_BUY_AMT 应使用 SINGLE_PURCHASE_AMOUNT"),
() -> assertTrue(sqlContent.contains("'FOREX_SELL_AMT'")
&& sqlContent.contains("'SINGLE_SETTLEMENT_AMOUNT'"),
"FOREX_SELL_AMT 应使用 SINGLE_SETTLEMENT_AMOUNT"),
() -> assertTrue(sqlContent.contains("'LARGE_STOCK_TRADING'")
&& sqlContent.contains("'STOCK_TFR_LARGE'"),
"LARGE_STOCK_TRADING 应使用 STOCK_TFR_LARGE"),
() -> assertTrue(sqlContent.contains("真实规则:识别单笔购汇金额超过阈值的流水"),
"应同步 FOREX_BUY_AMT 的真实规则说明"),
() -> assertTrue(sqlContent.contains("真实规则:识别单笔结汇金额超过阈值的流水"),
"应同步 FOREX_SELL_AMT 的真实规则说明"),
() -> assertTrue(sqlContent.contains("真实规则:识别单笔三方资管交易金额超过阈值的流水"),
"应同步 LARGE_STOCK_TRADING 的真实规则说明")
);
}
private String readProjectFile(String... parts) throws IOException {
Path path = Path.of("..", parts);
return Files.readString(path, StandardCharsets.UTF_8);
}
}

View File

@@ -0,0 +1,38 @@
# 新增模型打标验证计划实施记录
## 修改目标
- 将已确认的“新增模型打标完整验证”文档归档到项目现有测试计划目录
- 基于确认后的验证计划补充一份可直接执行的实施计划
- 保持本次工作只覆盖验证方案与执行步骤,不混入修复内容
## 修改范围
- `docs/tests/plans/2026-03-20-bank-tag-new-model-validation-test-plan.md`
- `docs/tests/plans/2026-03-20-bank-tag-new-model-validation-execution-plan.md`
## 修改内容
### 1. 调整验证计划归档路径
- 将原先放在 `docs/superpowers/specs/` 下的验证设计文档迁移到 `docs/tests/plans/`
- 文件名调整为 `2026-03-20-bank-tag-new-model-validation-test-plan.md`
- 标题与正文表述从“设计”收敛为“验证计划”语境,和目录职责保持一致
### 2. 新增执行计划
- 新增 `docs/tests/plans/2026-03-20-bank-tag-new-model-validation-execution-plan.md`
- 按“环境与基线 -> Mock 自动化 -> 主工程自动化 -> 数据库核验 -> 接口端到端 -> 进程清理”的顺序拆解任务
- 为每个阶段补充了明确的执行命令、预期结果、停点条件和文档沉淀要求
## 目录选择理由
- `docs/tests/plans/` 当前已用于承载测试计划类文档
- 本次文档内容核心是验证目标、执行步骤、通过标准和失败停点
- 相比 `docs/plans/misc/`,该目录与本次任务的语义更一致
## 结果
- 验证计划路径已修正为项目目录下的测试计划位置
- 执行计划已补齐,后续可直接进入验证执行阶段
- 本次改动未触碰业务代码,也未启动前后端或 Mock 服务进程

View File

@@ -0,0 +1,55 @@
# 新增模型打标完整验证实施记录
## 验证目标
- 对 2026-03-20 新加入的模型打标改动执行完整验证。
- 验证范围覆盖 Mock 随机命中、第一期真实规则、数据库事实与接口端到端结果。
- 本次执行仅输出验证过程与结论,不进入修复实现。
## 验证范围
- In Scope:
- `lsfx-mock-server` 随机命中规则计划、样本装配、缓存稳定性相关验证。
- `ccdi-project` 第一期真实规则参数映射、真实 SQL、Service 分发、风险人数刷新相关验证。
- 数据库基线与规则元数据核验。
- 接口端到端调用与结果核验。
- Out of Scope:
- 第二期占位规则。
- 新增规则、补丁逻辑、兼容性处理。
- 任何修复动作。
## 执行阶段
- 阶段 1环境与范围确认
- 阶段 2Mock 随机命中自动化回归
- 阶段 3主工程第一期真实规则自动化回归
- 阶段 4数据库基线与规则元数据核验
- 阶段 5接口端到端验证与环境清理
## 目标项目
- 本次端到端验证选用 `project_id=47`
- `project_name=测试03191`
- `lsfx_project_id=1002`
- `config_type=custom`
## 产物路径
- 执行计划:`docs/tests/plans/2026-03-20-bank-tag-new-model-validation-execution-plan.md`
- 验证计划:`docs/tests/plans/2026-03-20-bank-tag-new-model-validation-test-plan.md`
- 实施记录:`docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md`
- 验证记录:`docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md`
## 执行说明
- 验证过程中若任一层失败,立即停在对应层记录证据,不继续给出“验证通过”结论。
- 本次执行基于当前本地开发环境,不额外引入修复或扩展范围。
## 当前进展
- 2026-03-20 15:21:54 CST 完成阶段 1已对齐验证范围、读取来源实施记录、选定 `project_id=47`,并创建实施记录与验证记录骨架。
- 2026-03-20 15:21:54 CST 完成阶段 2`lsfx-mock-server` 聚焦回归与全量回归全部通过,确认规则命中计划、样本装配、缓存稳定性与集成链路未回退。
- 2026-03-20 15:23:10 CST 完成阶段 3`ccdi-project` 第一期真实规则目标测试全部 `BUILD SUCCESS`,规则映射、真实 SQL、规则分发与风险人数刷新链路保持通过。
- 2026-03-20 15:24 左右执行阶段 4采购基线脚本成功重跑`LSFXMOCKPUR001` 基线记录存在且金额满足门槛;但第一期规则元数据查询发现 `indicator_code` 与既有实施记录不一致,判定为“数据基线异常”,按计划停在数据库核验层,不继续执行接口端到端验证。
- 2026-03-20 15:41:06 CST 完成问题修复与复验:
- 已新增第一期规则元数据 SQL 校验测试与增量修复脚本。
- 已将修复脚本落库,确认 `FOREX_BUY_AMT``FOREX_SELL_AMT``LARGE_STOCK_TRADING``indicator_code` 与 9 条一期真实规则 `remark` 均已对齐。
- 已完成项目 `47` 的拉取本行信息、手动重算、任务轮询、命中结果查询与流水详情接口复验。
- Mock 与后端验证进程均已关闭。
- 2026-03-20 16:01 左右完成补充复验:
- 重新启动 Mock 与后端服务,复跑项目 `47` 的登录、拉取本行信息、手动重算、任务轮询与详情接口链路。
- 自动任务 `id=39` 与手动任务 `id=40` 均执行成功,`hit_count=3636``success_rule_count=33``failed_rule_count=0`
- 针对之前出现 `selectOne()` 重复结果异常的样例 `bank_statement_id=67279`,详情接口已返回 `code=200`,并正确带出 `GAMBLING_SENSITIVE_KEYWORD` 命中标签与原始文件名。

View File

@@ -0,0 +1,32 @@
# 第一期银行流水规则元数据修复实施记录
## 问题背景
- 2026-03-20 新增模型打标完整验证在数据库核验阶段发现:
- `FOREX_BUY_AMT.indicator_code` 仍为 `FOREX_BUY_AMT`
- `FOREX_SELL_AMT.indicator_code` 仍为 `FOREX_SELL_AMT`
- `LARGE_STOCK_TRADING.indicator_code``NULL`
- 同时,第一期已落地真实规则的 `remark` 仍停留在“占位规则待补充真实SQL”。
## 根因分析
- 主初始化脚本 [`sql/2026-03-16-bank-tagging.sql`](/Users/wkc/Desktop/ccdi/ccdi/sql/2026-03-16-bank-tagging.sql) 已包含第一期真实规则的正确元数据。
- 老增量脚本 [`sql/migration/2026-03-18-sync-bank-tag-uppercase-and-rules.sql`](/Users/wkc/Desktop/ccdi/ccdi/sql/migration/2026-03-18-sync-bank-tag-uppercase-and-rules.sql) 仍写入旧的占位元数据。
- 已执行过 2026-03-18 增量脚本、但未补后续迁移的环境,会停留在旧的 `indicator_code``remark` 状态。
## 本次修改
- 新增 SQL 资产校验测试 [`ccdi-project/src/test/java/com/ruoyi/ccdi/project/sql/CcdiBankTagRuleSqlMetadataTest.java`](/Users/wkc/Desktop/ccdi/ccdi/ccdi-project/src/test/java/com/ruoyi/ccdi/project/sql/CcdiBankTagRuleSqlMetadataTest.java)
- 先以缺失迁移脚本的红灯方式固定问题。
- 约束初始化脚本与增量脚本必须同时对齐:
- `FOREX_BUY_AMT -> SINGLE_PURCHASE_AMOUNT`
- `FOREX_SELL_AMT -> SINGLE_SETTLEMENT_AMOUNT`
- `LARGE_STOCK_TRADING -> STOCK_TFR_LARGE`
- 三条规则真实说明文案保持一致。
- 新增增量脚本 [`sql/migration/2026-03-20-sync-bank-tag-phase1-rule-metadata.sql`](/Users/wkc/Desktop/ccdi/ccdi/sql/migration/2026-03-20-sync-bank-tag-phase1-rule-metadata.sql)
- 使用 `INSERT ... ON DUPLICATE KEY UPDATE` 同步第一期 9 条真实规则元数据。
- 修复三条规则的 `indicator_code`
- 同步 9 条规则的真实规则 `remark`
- 将增量脚本通过 `bin/mysql_utf8_exec.sh` 落到当前验证数据库。
## 实施结果
- 规则元数据已对齐到第一期真实规则状态。
- 新增 SQL 校验测试可在仓库层拦住“只改初始化脚本、遗漏增量脚本”的回归。
- 修复后重新完成接口链路复验,项目 `47` 的自动拉取、手动重算、命中结果查询与详情接口均已通过。

View File

@@ -0,0 +1,43 @@
# LSFX Mock 随机命中规则后端实施记录
## 修改范围
- `lsfx-mock-server/services/file_service.py`
- `lsfx-mock-server/services/statement_rule_samples.py`
- `lsfx-mock-server/services/statement_service.py`
- `lsfx-mock-server/tests/test_file_service.py`
- `lsfx-mock-server/tests/test_statement_service.py`
- `lsfx-mock-server/tests/integration/test_full_workflow.py`
- `sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql`
## 规则命中计划生成方式
-`FileService` 中新增 `LARGE_TRANSACTION_RULE_CODES``PHASE1_RULE_CODES` 两组规则池。
- 新增 `_build_rule_hit_plan(log_id)`,使用 `random.Random(f"rule-plan:{log_id}")` 生成稳定随机源。
- 通过 `_pick_rule_subset()` 从两组规则池内分别稳定选出 `2-4` 条规则,并保留规则池原始顺序。
-`upload_file()``fetch_inner_flow()` 创建 `FileRecord` 时同步写入:
- `large_transaction_hit_rules`
- `phase1_hit_rules`
## 样本模块按规则子集装配
- 将原有“大额交易全量样本”拆成按规则代码独立调用的 builder。
- 新增 `LARGE_TRANSACTION_BUILDERS``PHASE1_RULE_BUILDERS` 两组映射,覆盖:
- 大额交易 8 条规则
- 第一期可由银行流水构造的 8 条规则
- 提供统一入口 `build_seed_statements_for_rule_plan(...)`,仅按 `rule_plan` 中被选中的规则拼装最小命中样本,不再默认返回全量命中样本。
- `build_large_transaction_seed_statements(...)` 保留为兼容测试入口,但内部已改为走新的规则映射。
## StatementService 接通方式
- `StatementService._generate_statements()` 改为优先读取 `FileRecord` 中保存的命中计划。
- 若存在真实 `FileRecord`,则复用其主体、账号、员工及亲属身份证范围,并把命中计划传给 `build_seed_statements_for_rule_plan(...)`
- 命中样本与随机噪声流水继续统一走 `_assign_statement_ids()` 分配稳定 ID。
- 首次生成后仍缓存固定 `200` 条流水;同一 `logId` 重复分页读取保持结果稳定。
## LARGE_PURCHASE_TRANSACTION 单独补数据库基线原因
- `LARGE_PURCHASE_TRANSACTION` 的真实命中来源是 `ccdi_purchase_transaction`,不依赖 `ccdi_bank_statement`
- 为避免伪造银行流水造成业务链路偏移,本次不把该规则塞进 Mock 流水样本。
- 通过新增 `sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql`,只补一条最小采购记录 `LSFXMOCKPUR001`
- 脚本通过 `ccdi_base_staff` 选取一条真实员工主数据作为 `applicant_id/applicant_name` 来源,`actual_amount=186000.00`,满足真实 SQL 的 `>100000` 命中门槛。
## 实施结果
- `FileService -> StatementService -> 缓存分页` 主链路保持不变。
- 大额交易规则与第一期新增规则均已支持“按 `logId` 稳定随机命中一部分”。
- `LARGE_PURCHASE_TRANSACTION` 已通过独立数据库基线补齐联调数据来源。

View File

@@ -0,0 +1,28 @@
# Mock 服务随机 logId 实施记录
## 问题背景
- 2026-03-20 联调过程中,`lsfx-mock-server``logId` 仍使用进程内递增方式分配。
- 仓库文档与接口预期要求 Mock 返回随机 `logId`,避免联调时对顺序值形成隐式依赖。
## 根因分析
- [`lsfx-mock-server/services/file_service.py`](/Users/wkc/Desktop/ccdi/ccdi/lsfx-mock-server/services/file_service.py) 中,`upload_file()``fetch_inner_flow()` 都直接通过 `self.log_counter += 1` 生成 `logId`
- 现有测试只覆盖了 `logId` 落在 `10000-99999` 区间内,没有约束“冲突时需要重试并避让已有记录”。
## 本次修改
- 在 [`lsfx-mock-server/tests/test_file_service.py`](/Users/wkc/Desktop/ccdi/ccdi/lsfx-mock-server/tests/test_file_service.py) 先新增红灯测试 `test_generate_log_id_should_retry_when_random_value_conflicts`
- 固定随机值第一次命中已存在 `logId` 时必须重试。
- 同步把行内流水测试中的旧递增断言改为随机区间断言。
- 在 [`lsfx-mock-server/services/file_service.py`](/Users/wkc/Desktop/ccdi/ccdi/lsfx-mock-server/services/file_service.py) 新增统一 `_generate_log_id()`
-`10000-99999` 区间内随机生成。
- 若命中 `file_records` 中已存在的 `logId`,则继续重试直到拿到未占用值。
- `upload_file()``fetch_inner_flow()` 均切换为调用该方法。
## 验证结果
- `python3 -m pytest lsfx-mock-server/tests/test_file_service.py -k "fetch_inner_flow_persists_primary_binding_record or generate_log_id_should_retry_when_random_value_conflicts" -v`
- 结果:`2 passed`
- `python3 -m pytest lsfx-mock-server/tests/test_file_service.py lsfx-mock-server/tests/test_statement_service.py lsfx-mock-server/tests/test_api.py lsfx-mock-server/tests/integration/test_full_workflow.py -v`
- 结果:`39 passed, 20 warnings`
## 实施结果
- Mock 服务的新建上传记录与行内流水记录已改为随机 `logId`
- 同一 `logId` 下的规则命中计划、流水样本与上传状态复用逻辑保持不变。

View File

@@ -0,0 +1,521 @@
# 新增模型打标完整验证 Implementation Plan
> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** 对 2026-03-20 新加入的模型打标改动执行一轮完整验证,覆盖 Mock 随机命中、第一期真实规则、数据库事实和接口端到端结果。
**Architecture:** 这次工作不是改代码,而是按“环境与基线 -> 自动化回归 -> 数据库核验 -> 接口链路 -> 文档沉淀”五层顺序执行。只要某一层失败,就停在该层记录证据,不继续给出“验证通过”的结论,也不进入修复。
**Tech Stack:** Bash, Python 3, FastAPI, Java 21, Spring Boot 3, Maven, pytest, MySQL, curl, jq
---
## File Structure
- `docs/tests/plans/2026-03-20-bank-tag-new-model-validation-test-plan.md`: 已确认的验证计划,执行时必须严格对齐范围与停点。
- `docs/tests/plans/2026-03-20-bank-tag-new-model-validation-execution-plan.md`: 本执行计划,负责把验证计划拆成可执行步骤。
- `docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md`: 本次实施记录,记录执行内容与范围。
- `docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md`: 本次验证记录记录命令、SQL、接口结果和结论。
- `docs/reports/implementation/2026-03-20-lsfx-mock-random-hit-rule-backend-record.md`: Mock 随机命中改动的既有实施记录。
- `docs/tests/records/2026-03-20-lsfx-mock-random-hit-rule-backend-verification.md`: Mock 随机命中改动的既有验证记录。
- `docs/reports/implementation/2026-03-20-bank-tag-real-rule-phase1-backend-record.md`: 第一期真实规则改动的既有实施记录。
- `docs/tests/records/2026-03-20-bank-tag-real-rule-phase1-backend-verification.md`: 第一期真实规则改动的既有验证记录。
- `ruoyi-admin/src/main/resources/application-dev.yml`: 读取数据库连接与后端本地配置。
- `sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql`: `LARGE_PURCHASE_TRANSACTION` 采购基线脚本。
- `lsfx-mock-server/tests/test_file_service.py`: Mock 规则命中计划测试。
- `lsfx-mock-server/tests/test_statement_service.py`: Mock 样本装配与缓存稳定性测试。
- `lsfx-mock-server/tests/integration/test_full_workflow.py`: Mock 端到端链路测试。
- `ccdi-project/src/test/java/com/ruoyi/ccdi/project/service/impl/BankTagRuleConfigResolverTest.java`: 规则参数映射测试。
- `ccdi-project/src/test/java/com/ruoyi/ccdi/project/mapper/CcdiBankTagAnalysisMapperXmlTest.java`: 真实 SQL 结构测试。
- `ccdi-project/src/test/java/com/ruoyi/ccdi/project/service/impl/CcdiBankTagServiceImplTest.java`: 规则分发测试。
- `ccdi-project/src/test/java/com/ruoyi/ccdi/project/service/impl/CcdiBankTagServiceRiskCountRefreshTest.java`: 风险人数刷新回归测试。
- `ccdi-project/src/main/java/com/ruoyi/ccdi/project/controller/CcdiFileUploadController.java`: 拉取本行信息接口。
- `ccdi-project/src/main/java/com/ruoyi/ccdi/project/controller/CcdiBankTagController.java`: 手动重算接口。
- `ccdi-project/src/main/java/com/ruoyi/ccdi/project/controller/CcdiBankStatementController.java`: 流水详情接口。
- `bin/restart_java_backend.sh`: 后端 Jar 启停脚本。
### Task 1: 锁定执行环境、目标项目和记录文档
**Files:**
- Create: `docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md`
- Create: `docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md`
- Reference: `docs/tests/plans/2026-03-20-bank-tag-new-model-validation-test-plan.md`
- Reference: `ruoyi-admin/src/main/resources/application-dev.yml`
- Reference: `docs/reports/implementation/2026-03-20-lsfx-mock-random-hit-rule-backend-record.md`
- Reference: `docs/reports/implementation/2026-03-20-bank-tag-real-rule-phase1-backend-record.md`
- [ ] **Step 1: 阅读既有计划与两份来源实施记录**
Run:
```bash
sed -n '1,220p' docs/tests/plans/2026-03-20-bank-tag-new-model-validation-test-plan.md
sed -n '1,220p' docs/reports/implementation/2026-03-20-lsfx-mock-random-hit-rule-backend-record.md
sed -n '1,220p' docs/reports/implementation/2026-03-20-bank-tag-real-rule-phase1-backend-record.md
```
Expected:
- 明确本次只验证双线已落地内容
- 不把第二期规则和修复方案带进执行范围
- [ ] **Step 2: 从数据库中选出本次端到端验证使用的项目**
Run:
```bash
python3 - <<'PY'
from pathlib import Path
import pymysql, re
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
with conn, conn.cursor() as cursor:
cursor.execute("""
SELECT project_id, project_name, lsfx_project_id, config_type
FROM ccdi_project
WHERE del_flag = '0'
AND lsfx_project_id IS NOT NULL
ORDER BY update_time DESC, project_id DESC
LIMIT 10
""")
for row in cursor.fetchall():
print(row)
PY
```
Expected:
- 至少找到 1 个可用于 LSFX 联调的项目
- 在实施记录和验证记录中记下最终采用的 `project_id`
- [ ] **Step 3: 创建本次实施记录与验证记录骨架**
在两个文档中先写入以下固定章节:
- 实施记录:验证目标、范围、执行阶段、产物路径
- 验证记录:执行命令、数据库核验、接口验证、结论、环境清理
- [ ] **Step 4: Commit**
```bash
git add docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md docs/tests/plans/2026-03-20-bank-tag-new-model-validation-execution-plan.md
git commit -m "补充新增模型打标验证执行计划"
```
### Task 2: 先完成 Mock 随机命中自动化回归
**Files:**
- Reference: `lsfx-mock-server/tests/test_file_service.py`
- Reference: `lsfx-mock-server/tests/test_statement_service.py`
- Reference: `lsfx-mock-server/tests/integration/test_full_workflow.py`
- Modify: `docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md`
- Modify: `docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md`
- [ ] **Step 1: 跑 Mock 聚焦回归**
Run:
```bash
cd lsfx-mock-server
python3 -m pytest tests/test_file_service.py -k "rule_hit_plan or persist_rule_hit_plan" -v
python3 -m pytest tests/test_statement_service.py -k "rule_plan_should_only_include or withdraw_cnt_samples" -v
python3 -m pytest tests/test_statement_service.py -k "follow_rule_hit_plan or fixed_total_count_200 or cached_result" -v
python3 -m pytest tests/integration/test_full_workflow.py -k "same_rule_subset or share_same_primary_binding" -v
```
Expected:
- 全部 `PASS`
- 能证明规则命中计划、样本装配和缓存稳定性未回退
- [ ] **Step 2: 跑 Mock 全量回归**
Run:
```bash
cd lsfx-mock-server
python3 -m pytest tests/test_file_service.py tests/test_statement_service.py tests/test_api.py tests/integration/test_full_workflow.py -v
```
Expected:
- `PASS`
- 若失败,停止后续阶段并把失败用例和首个错误栈写入验证记录
- [ ] **Step 3: 将结果写入验证记录**
记录:
- 实际执行时间
- `passed / failed / warnings` 摘要
- 若存在 warning只说明是否为既有 warning不做修复
### Task 3: 完成主工程第一期真实规则自动化回归
**Files:**
- Reference: `ccdi-project/src/test/java/com/ruoyi/ccdi/project/service/impl/BankTagRuleConfigResolverTest.java`
- Reference: `ccdi-project/src/test/java/com/ruoyi/ccdi/project/mapper/CcdiBankTagAnalysisMapperXmlTest.java`
- Reference: `ccdi-project/src/test/java/com/ruoyi/ccdi/project/service/impl/CcdiBankTagServiceImplTest.java`
- Reference: `ccdi-project/src/test/java/com/ruoyi/ccdi/project/service/impl/CcdiBankTagServiceRiskCountRefreshTest.java`
- Modify: `docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md`
- [ ] **Step 1: 逐步跑第一期目标测试**
Run:
```bash
mvn test -pl ccdi-project -Dtest=BankTagRuleConfigResolverTest
mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest
mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest,CcdiBankTagServiceImplTest
mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest,BankTagRuleConfigResolverTest,CcdiBankTagServiceImplTest,CcdiBankTagServiceRiskCountRefreshTest
```
Expected:
- 全部 `BUILD SUCCESS`
- 若任一命令失败,停止后续阶段并把失败命令、失败类名和错误摘要写入验证记录
- [ ] **Step 2: 在验证记录中沉淀主工程自动化结果**
记录:
- 每条命令的执行结果
- 规则映射、真实 SQL、对象分发、风险人数刷新是否都保持通过
### Task 4: 做数据库基线和规则元数据核验
**Files:**
- Reference: `sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql`
- Reference: `sql/2026-03-16-bank-tagging.sql`
- Modify: `docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md`
- Modify: `docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md`
- [ ] **Step 1: 幂等执行采购基线脚本**
Run:
```bash
bin/mysql_utf8_exec.sh sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql
```
Expected:
- 脚本执行成功
- 无中文乱码、无 SQL 报错
- [ ] **Step 2: 校验采购基线记录存在**
Run:
```bash
python3 - <<'PY'
from pathlib import Path
import pymysql, re
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
with conn, conn.cursor() as cursor:
cursor.execute("""
SELECT purchase_id, actual_amount, supplier_name
FROM ccdi_purchase_transaction
WHERE purchase_id = 'LSFXMOCKPUR001'
AND actual_amount > 100000
""")
print(cursor.fetchone())
PY
```
Expected:
- 返回 `LSFXMOCKPUR001`
- `actual_amount` 大于 `100000`
- [ ] **Step 3: 校验第一期规则元数据保持大写且可识别**
Run:
```bash
python3 - <<'PY'
from pathlib import Path
import pymysql, re
TARGET_RULES = (
'GAMBLING_SENSITIVE_KEYWORD','SPECIAL_AMOUNT_TRANSACTION','SUSPICIOUS_INCOME_KEYWORD',
'FOREX_BUY_AMT','FOREX_SELL_AMT','LARGE_PURCHASE_TRANSACTION',
'STOCK_TFR_LARGE','WITHDRAW_CNT','LARGE_STOCK_TRADING'
)
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
sql = f"""
SELECT model_code, rule_code, indicator_code
FROM ccdi_bank_tag_rule
WHERE rule_code IN ({','.join(['%s'] * len(TARGET_RULES))})
ORDER BY model_code, sort_order, rule_code
"""
with conn, conn.cursor() as cursor:
cursor.execute(sql, TARGET_RULES)
for row in cursor.fetchall():
print(row)
PY
```
Expected:
- 所有目标规则均能查到
- `rule_code``indicator_code` 继续保持全大写风格
- [ ] **Step 4: 将 SQL 与结果摘要写入验证记录**
记录:
- 执行的 SQL 或脚本命令
- 返回结果摘要
- 若数据缺失,明确归类为“数据基线异常”
### Task 5: 执行接口端到端验证并清理进程
**Files:**
- Reference: `ccdi-project/src/main/java/com/ruoyi/ccdi/project/controller/CcdiFileUploadController.java`
- Reference: `ccdi-project/src/main/java/com/ruoyi/ccdi/project/controller/CcdiBankTagController.java`
- Reference: `ccdi-project/src/main/java/com/ruoyi/ccdi/project/controller/CcdiBankStatementController.java`
- Modify: `docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md`
- Modify: `docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md`
- [ ] **Step 1: 启动 Mock 服务**
Run:
```bash
mkdir -p logs
cd lsfx-mock-server
nohup python3 main.py > ../logs/lsfx-mock-validation.log 2>&1 &
echo $! > ../logs/lsfx-mock-validation.pid
cd ..
```
Expected:
- `logs/lsfx-mock-validation.pid` 已生成
- `http://localhost:8000/docs` 可访问
- [ ] **Step 2: 启动后端 Jar 服务**
Run:
```bash
./bin/restart_java_backend.sh stop
nohup ./bin/restart_java_backend.sh start > logs/backend-validation.log 2>&1 &
echo $! > logs/backend-validation.pid
```
Expected:
- 后端监听在 `http://localhost:62318`
- 若启动失败,先看 `logs/backend-console.log`,记录失败后停止执行
- [ ] **Step 3: 登录并取 token**
Run:
```bash
curl -s http://localhost:62318/login/test \
-H 'Content-Type: application/json' \
-d '{"username":"admin","password":"admin123"}' | tee /tmp/bank-tag-login.json
jq -r '.token' /tmp/bank-tag-login.json
```
Expected:
- 返回非空 token
- [ ] **Step 4: 触发拉取本行信息链路生成新的 Mock 流水**
先从数据库挑 1-3 个真实身份证号,再调用接口:
```bash
python3 - <<'PY' >/tmp/bank-tag-id-cards.json
from pathlib import Path
import json, pymysql, re
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
with conn, conn.cursor() as cursor:
cursor.execute("""
SELECT DISTINCT id_card
FROM ccdi_base_staff
WHERE del_flag = '0'
AND id_card IS NOT NULL
AND id_card <> ''
ORDER BY id ASC
LIMIT 3
""")
print(json.dumps([row['id_card'] for row in cursor.fetchall()], ensure_ascii=False))
PY
TOKEN=$(jq -r '.token' /tmp/bank-tag-login.json)
PROJECT_ID=<把 Task 1 选定的 project_id 填到这里>
curl -s http://localhost:62318/ccdi/file-upload/pull-bank-info \
-H "Authorization: Bearer $TOKEN" \
-H 'Content-Type: application/json' \
-d "{\"projectId\":${PROJECT_ID},\"idCards\":$(cat /tmp/bank-tag-id-cards.json),\"startDate\":\"2026-03-01\",\"endDate\":\"2026-03-20\"}"
```
Expected:
- 返回 `拉取任务已提交`
- 说明 Mock 随机命中链路已被主工程实际调用
- [ ] **Step 5: 触发整项目手动重算**
Run:
```bash
TOKEN=$(jq -r '.token' /tmp/bank-tag-login.json)
curl -s http://localhost:62318/ccdi/project/tags/rebuild \
-H "Authorization: Bearer $TOKEN" \
-H 'Content-Type: application/json' \
-d "{\"projectId\":${PROJECT_ID},\"modelCode\":null}"
```
Expected:
- 返回 `{"code":200,...}`
- 手动重算任务成功提交
- [ ] **Step 6: 轮询数据库确认重算任务成功,并查出一条新增模型命中记录**
Run:
```bash
python3 - <<'PY'
from pathlib import Path
import pymysql, re, time
PROJECT_ID = int("<把 Task 1 选定的 project_id 填到这里>")
TARGET_RULES = (
'GAMBLING_SENSITIVE_KEYWORD','SPECIAL_AMOUNT_TRANSACTION','SUSPICIOUS_INCOME_KEYWORD',
'FOREX_BUY_AMT','FOREX_SELL_AMT','LARGE_PURCHASE_TRANSACTION',
'STOCK_TFR_LARGE','WITHDRAW_CNT','LARGE_STOCK_TRADING'
)
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
with conn, conn.cursor() as cursor:
task = None
for _ in range(30):
cursor.execute("""
SELECT id, status, model_code, hit_count, success_rule_count, failed_rule_count
FROM ccdi_bank_tag_task
WHERE project_id = %s
ORDER BY id DESC
LIMIT 1
""", (PROJECT_ID,))
task = cursor.fetchone()
print(task)
if task and task['status'] == 'SUCCESS':
break
time.sleep(2)
cursor.execute(f"""
SELECT id, rule_code, bank_statement_id, object_key, reason_detail
FROM ccdi_bank_statement_tag_result
WHERE project_id = %s
AND rule_code IN ({','.join(['%s'] * len(TARGET_RULES))})
ORDER BY id DESC
LIMIT 10
""", (PROJECT_ID, *TARGET_RULES))
for row in cursor.fetchall():
print(row)
PY
```
Expected:
- 最新任务状态为 `SUCCESS`
- 至少查到 1 条目标规则命中结果
- [ ] **Step 7: 用命中的 `bank_statement_id` 回查接口详情**
Run:
```bash
TOKEN=$(jq -r '.token' /tmp/bank-tag-login.json)
BANK_STATEMENT_ID=<把 Step 6 查到的 bank_statement_id 填到这里>
curl -s "http://localhost:62318/ccdi/project/bank-statement/detail/${BANK_STATEMENT_ID}" \
-H "Authorization: Bearer $TOKEN"
```
Expected:
- 返回 `code = 200`
- `data.hitTags` 中能看到至少 1 个目标规则对应的命中标签
- [ ] **Step 8: 写结论并关闭本次启动的进程**
Run:
```bash
if [ -f logs/lsfx-mock-validation.pid ]; then kill "$(cat logs/lsfx-mock-validation.pid)" || true; rm -f logs/lsfx-mock-validation.pid; fi
./bin/restart_java_backend.sh stop || true
rm -f logs/backend-validation.pid
```
Expected:
- Mock 进程已关闭
- 后端 Jar 进程已关闭
- 验证记录中明确写出“已完成进程清理”
- [ ] **Step 9: Commit**
```bash
git add docs/reports/implementation/2026-03-20-bank-tag-new-model-validation-record.md docs/tests/records/2026-03-20-bank-tag-new-model-validation-verification.md docs/tests/plans/2026-03-20-bank-tag-new-model-validation-execution-plan.md
git commit -m "补充新增模型打标完整验证记录"
```

View File

@@ -0,0 +1,244 @@
# 新增模型打标完整验证计划
## 背景
2026-03-20 已落地两条与“新增模型打标”直接相关的后端改动:
- `lsfx-mock-server` 新增按 `logId` 稳定随机命中的规则计划,用于为联调链路提供可重复、可命中的 Mock 流水样本。
- 主工程 `ccdi-project` 已接通第一期真实规则打标链路,覆盖明细型规则、对象型规则、参数映射和真实 SQL 分发。
当前需要做的不是继续扩展规则,而是对这两条改动线做一轮完整验证,确认“新加入的模型”在 Mock 样本层、真实规则层和最终接口链路层都能正确打标。
## 目标
- 同时验证 `lsfx-mock-server` 随机命中规则与主工程第一期真实规则。
- 验证深度覆盖自动化测试、数据库核验和接口端到端调用。
- 若发现打标异常,只输出验证结论与问题清单,不进入修复。
## 范围
### In Scope
- `lsfx-mock-server` 中与随机命中规则计划、样本拼装、缓存稳定性相关的验证。
- `ccdi-project` 中第一期真实规则相关的参数映射、真实 SQL、Service 分发与风险人数刷新链路验证。
- 关键数据库基线与元数据核验。
- 实际接口调用后的打标结果核验。
- 本次验证对应的实施记录与验证记录沉淀。
### Out of Scope
- 第二期仍为占位状态的规则。
- 新增规则、补丁逻辑、兜底逻辑或兼容性改造。
- 任何修复实现与代码修改方案。
## 验证对象
本次只验证已在既有实施记录中明确落地的内容,不扩展额外模型。
### 1. Mock 随机命中链路
关注以下能力是否仍成立:
- `FileService` 能为新 `logId` 写入稳定随机的规则命中计划。
- `StatementService` 能按命中计划拼装对应样本,而不是回退到全量样本。
- 同一 `logId` 重复读取时,规则命中子集和分页结果保持稳定。
- `LARGE_PURCHASE_TRANSACTION` 对应的采购基线数据已通过独立 SQL 提供。
### 2. 主工程第一期真实规则链路
关注以下规则在现有打标链路中是否仍能按真实规则命中:
- `GAMBLING_SENSITIVE_KEYWORD`
- `SPECIAL_AMOUNT_TRANSACTION`
- `SUSPICIOUS_INCOME_KEYWORD`
- `FOREX_BUY_AMT`
- `FOREX_SELL_AMT`
- `LARGE_PURCHASE_TRANSACTION`
- `STOCK_TFR_LARGE`
- `WITHDRAW_CNT`
- `LARGE_STOCK_TRADING`
重点关注点:
- 参数编码与规则编码保持全大写。
- 阈值规则仍正确透传到 Mapper。
- `WITHDRAW_CNT` 对象型规则仍保持“命中为空不回退任务状态”的链路约束。
- `LARGE_PURCHASE_TRANSACTION` 仍走采购交易表数据源,不伪造银行流水替代。
## 方案对比
### 方案一:分三层顺序验证
`Mock 自动化 -> 主工程自动化 -> 数据库核验 -> 接口端到端` 顺序执行。
优点:
- 最容易定位问题落点。
- 与当前仓库已有测试资产和实施记录天然对齐。
- 能覆盖你要求的完整验证深度。
缺点:
- 执行步骤最多。
### 方案二:纯端到端驱动
直接起服务并调用接口,看最终打标结果。
优点:
- 离业务使用最近。
缺点:
- 一旦失败,难以快速分辨是 Mock 样本、真实 SQL、参数分发还是接口编排问题。
### 方案三:自动化为主,接口抽样补充
优点:
- 执行更快。
缺点:
- 对数据库事实和最终链路覆盖不足,不满足本次“完整验证”的要求。
## 推荐方案
采用方案一。
原因是当前需求不是只看“有没有结果”,而是要同时确认:
- Mock 样本能不能正确提供命中前提;
- 主工程真实规则能不能正确识别;
- 最终接口链路有没有把命中结果正确暴露出来。
分层验证能把这三层责任拆开,失败时也能严格停在结论和问题清单,不会直接滑向修复。
## 验证方案
### 一、环境与基线确认
先确认本次验证依赖的数据与环境处于可验证状态:
- 主工程数据库连接可用。
- `sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql` 已执行或可幂等重跑。
- 采购基线记录 `LSFXMOCKPUR001` 存在,且 `actual_amount > 100000`
这一阶段的目标是确认“验证素材存在”,不直接下结论说模型已命中。
### 二、Mock 随机命中自动化验证
执行现有 `lsfx-mock-server` pytest 资产,覆盖四类能力:
- 规则命中计划生成;
- 命中计划持久化;
- 按规则子集拼装样本;
- 同一 `logId` 下缓存和分页稳定性;
- 端到端集成链路。
这一阶段通过后,才能认为 Mock 服务仍在稳定提供“可命中的输入数据”。
### 三、主工程第一期真实规则自动化验证
执行 `ccdi-project` 中与第一期真实规则直接相关的 Maven 测试,覆盖:
- 规则参数映射;
- XML 真实 SQL
- Service 分发;
- 风险人数刷新链路。
这一阶段通过后,才能认为主工程内部的规则识别与分发逻辑没有回退。
### 四、数据库关键事实核验
自动化通过后,再直接核验数据库事实,避免只依赖测试断言:
- 采购基线记录存在且满足门槛。
- 第一期开启真实规则的元数据仍与预期一致。
- 规则编码、参数编码、指标编码继续保持全大写。
- 端到端验证依赖的关键项目、流水、采购或对象数据具备最小命中条件。
这一阶段的结论是“数据前提是否成立”,不是替代接口结果。
### 五、接口端到端打标验证
启动本次验证需要的最小服务集合,按真实链路执行一次完整调用:
- 触发 Mock 取数或上传链路;
- 触发主工程打标分析链路;
- 查询最终模型或标签结果;
- 对照数据库事实与规则预期,确认新增模型是否真正出现在结果中。
这里的判定标准不是仅返回 HTTP 200而是最终打标结果中是否包含预期命中的新增模型规则。
## 通过标准
只有同时满足以下三层条件,才视为本次验证通过:
- 自动化层:相关 pytest 与 Maven 测试全部通过。
- 数据层:关键基线与元数据查询结果符合预期。
- 接口层:最终接口返回中包含预期命中的新增模型规则。
只要任一层不满足,就记为失败,并停止给出“验证通过”的结论。
## 失败判定与停点
### 自动化失败
任一既有 pytest 或 Maven 目标失败,记为:
- 代码级回归;或
- 环境级阻塞。
此时记录失败命令、失败用例和首个错误点,不继续放大为“模型已失败命中”的业务结论。
### 数据核验失败
若自动化通过,但基线数据不存在、门槛不满足或元数据不一致,记为:
- 验证数据不足;或
- 数据基线异常。
此时停止进入最终通过判定。
### 接口链路失败
若服务能启动、接口能调用,但最终结果缺失预期命中项,记为:
- 链路级打标异常。
输出接口请求、响应摘要、相关数据库证据和可疑断点位置,但不进入修复。
## 记录与产物
本次验证至少沉淀两类文档:
- `docs/reports/implementation/` 下新增本次实施记录,说明验证执行内容与调整范围。
- `docs/tests/records/` 下新增本次验证记录,说明执行命令、核验 SQL、接口结果与结论。
文档内容固定包含:
- 验证目标与范围;
- 执行命令;
- 数据库核验 SQL 与结果摘要;
- 接口端到端步骤与结果摘要;
- 最终结论;
- 若失败则输出问题清单,不包含修复动作。
## 进程管理
若本次验证启动了 Java 后端、前端或 Mock 服务进程,验证结束后必须主动关闭,并在验证记录中写明已完成清理,避免残留端口占用。
## 风险与边界
- Mock 随机命中只保证“稳定随机子集”,不保证每个 `logId` 全量命中所有规则。
- `LARGE_PURCHASE_TRANSACTION` 的命中依赖采购表基线,不应误判为银行流水样本问题。
- 对象型规则 `WITHDRAW_CNT` 的结论需要和明细型规则区分,避免用相同口径判断失败。
- 本次验证只为确认现状是否正确,不引申为修复方案或二期规则推进。
## 结论
本计划采用分层完整验证方案,对 2026-03-20 新加入的模型打标改动做统一校验。
执行时先验证 Mock 规则输入,再验证主工程真实规则识别,最后验证数据库事实和接口结果是否闭环一致。若任何一层失败,只输出证据和问题清单,不进入代码修复。

View File

@@ -0,0 +1,224 @@
# 新增模型打标完整验证记录
## 执行命令
```bash
cd lsfx-mock-server
python3 -m pytest tests/test_file_service.py -k "rule_hit_plan or persist_rule_hit_plan" -v
python3 -m pytest tests/test_statement_service.py -k "rule_plan_should_only_include or withdraw_cnt_samples" -v
python3 -m pytest tests/test_statement_service.py -k "follow_rule_hit_plan or fixed_total_count_200 or cached_result" -v
python3 -m pytest tests/integration/test_full_workflow.py -k "same_rule_subset or share_same_primary_binding" -v
python3 -m pytest tests/test_file_service.py tests/test_statement_service.py tests/test_api.py tests/integration/test_full_workflow.py -v
cd ..
mvn test -pl ccdi-project -Dtest=BankTagRuleConfigResolverTest
mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest
mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest,CcdiBankTagServiceImplTest
mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest,BankTagRuleConfigResolverTest,CcdiBankTagServiceImplTest,CcdiBankTagServiceRiskCountRefreshTest
```
## Mock 自动化结果
- 2026-03-20 15:21:54 CST 完成 Mock 聚焦回归与全量回归。
- 聚焦回归结果:
- `tests/test_file_service.py -k "rule_hit_plan or persist_rule_hit_plan"`: `2 passed, 4 deselected, 1 warning`
- `tests/test_statement_service.py -k "rule_plan_should_only_include or withdraw_cnt_samples"`: `2 passed, 11 deselected, 1 warning`
- `tests/test_statement_service.py -k "follow_rule_hit_plan or fixed_total_count_200 or cached_result"`: `3 passed, 10 deselected, 1 warning`
- `tests/integration/test_full_workflow.py -k "same_rule_subset or share_same_primary_binding"`: `2 passed, 3 deselected, 3 warnings`
- 全量回归结果:
- `python3 -m pytest tests/test_file_service.py tests/test_statement_service.py tests/test_api.py tests/integration/test_full_workflow.py -v`
- 摘要:`38 passed, 20 warnings in 4.15s`
- warning 摘要:
- `pydantic` 的 class-based config 弃用提示仍存在。
- `httpx``app` shortcut 弃用提示仍存在。
- 两类 warning 与既有 Mock 验证记录一致,本次未新增 failure 或 error。
## 主工程自动化结果
- 2026-03-20 15:22:27 CST 执行 `mvn test -pl ccdi-project -Dtest=BankTagRuleConfigResolverTest`,结果 `BUILD SUCCESS``Tests run: 6, Failures: 0, Errors: 0, Skipped: 0`
- 2026-03-20 15:22:47 CST 执行 `mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest`,结果 `BUILD SUCCESS``Tests run: 8, Failures: 0, Errors: 0, Skipped: 0`
- 2026-03-20 15:22:57 CST 执行 `mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest,CcdiBankTagServiceImplTest`,结果 `BUILD SUCCESS``Tests run: 19, Failures: 0, Errors: 0, Skipped: 0`
- 2026-03-20 15:23:10 CST 执行 `mvn test -pl ccdi-project -Dtest=CcdiBankTagAnalysisMapperXmlTest,BankTagRuleConfigResolverTest,CcdiBankTagServiceImplTest,CcdiBankTagServiceRiskCountRefreshTest`,结果 `BUILD SUCCESS``Tests run: 27, Failures: 0, Errors: 0, Skipped: 0`
- 结果归纳:
- `BankTagRuleConfigResolverTest` 证明第一期规则参数映射保持通过。
- `CcdiBankTagAnalysisMapperXmlTest` 证明真实 SQL 结构保持通过。
- `CcdiBankTagServiceImplTest` 证明规则分发和异常路径断言保持通过。
- `CcdiBankTagServiceRiskCountRefreshTest` 证明风险人数刷新链路保持通过。
- 日志说明:
- 测试日志中的 `threshold missing``refresh failed` 为异常路径断言场景产生的预期日志,不代表本轮 Maven 回归失败。
## 数据库核验
```bash
bin/mysql_utf8_exec.sh sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql
python3 - <<'PY'
from pathlib import Path
import pymysql, re
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
with conn, conn.cursor() as cursor:
cursor.execute("""
SELECT purchase_id, actual_amount, supplier_name
FROM ccdi_purchase_transaction
WHERE purchase_id = 'LSFXMOCKPUR001'
AND actual_amount > 100000
""")
print(cursor.fetchone())
PY
python3 - <<'PY'
from pathlib import Path
import pymysql, re
TARGET_RULES = (
'GAMBLING_SENSITIVE_KEYWORD','SPECIAL_AMOUNT_TRANSACTION','SUSPICIOUS_INCOME_KEYWORD',
'FOREX_BUY_AMT','FOREX_SELL_AMT','LARGE_PURCHASE_TRANSACTION',
'STOCK_TFR_LARGE','WITHDRAW_CNT','LARGE_STOCK_TRADING'
)
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
sql = f"""
SELECT model_code, rule_code, indicator_code
FROM ccdi_bank_tag_rule
WHERE rule_code IN ({','.join(['%s'] * len(TARGET_RULES))})
ORDER BY model_code, sort_order, rule_code
"""
with conn, conn.cursor() as cursor:
cursor.execute(sql, TARGET_RULES)
for row in cursor.fetchall():
print(row)
PY
```
- 采购基线脚本执行结果:
- `bin/mysql_utf8_exec.sh sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql` 执行成功,无报错、无乱码输出。
- 采购基线查询结果:
- 返回 `{'purchase_id': 'LSFXMOCKPUR001', 'actual_amount': Decimal('186000.00'), 'supplier_name': '兰溪市联调供应链有限公司'}`
- 结论:`LSFXMOCKPUR001` 存在,且 `actual_amount > 100000`,采购基线正常。
- 规则元数据查询结果:
- 共查询到 9 条目标规则,`rule_code` 均存在。
- 返回摘要:
- `STOCK_TFR_LARGE -> indicator_code=STOCK_TFR_LARGE`
- `WITHDRAW_CNT -> indicator_code=WITHDRAW_CNT`
- `LARGE_STOCK_TRADING -> indicator_code=NULL`
- `FOREX_BUY_AMT -> indicator_code=FOREX_BUY_AMT`
- `FOREX_SELL_AMT -> indicator_code=FOREX_SELL_AMT`
- 其余 4 条规则 `indicator_code=NULL`
- 异常判定:
- 根据既有实施记录,`FOREX_BUY_AMT` 预期应对齐为 `SINGLE_PURCHASE_AMOUNT`
- `FOREX_SELL_AMT` 预期应对齐为 `SINGLE_SETTLEMENT_AMOUNT`
- `LARGE_STOCK_TRADING` 预期应对齐为 `STOCK_TFR_LARGE`,当前查询为 `NULL`
- 首次执行因此在数据库层判定为“数据基线异常”。
- 修复后复验:
- 已执行 `bin/mysql_utf8_exec.sh sql/migration/2026-03-20-sync-bank-tag-phase1-rule-metadata.sql`
- 修复后查询结果:
- `FOREX_BUY_AMT -> indicator_code=SINGLE_PURCHASE_AMOUNT`
- `FOREX_SELL_AMT -> indicator_code=SINGLE_SETTLEMENT_AMOUNT`
- `LARGE_STOCK_TRADING -> indicator_code=STOCK_TFR_LARGE`
- 9 条一期真实规则 `remark` 均已同步为真实规则说明
- 结论:数据库元数据异常已修复,可继续进入接口端到端验证。
## 接口验证
```bash
curl -s http://localhost:62318/login/test \
-H 'Content-Type: application/json' \
-d '{"username":"admin","password":"admin123"}'
python3 - <<'PY'
# 读取 3 个有效身份证号并生成 /tmp/bank-tag-pull-request.json
PY
curl -s http://localhost:62318/ccdi/file-upload/pull-bank-info \
-H "Authorization: Bearer $TOKEN" \
-H 'Content-Type: application/json' \
--data-binary @/tmp/bank-tag-pull-request.json
curl -s http://localhost:62318/ccdi/project/tags/rebuild \
-H "Authorization: Bearer $TOKEN" \
-H 'Content-Type: application/json' \
-d '{"projectId":47,"modelCode":null}'
python3 - <<'PY'
# 轮询 ccdi_bank_tag_task 并查询目标规则命中结果
PY
curl -s "http://localhost:62318/ccdi/project/bank-statement/detail/66679" \
-H "Authorization: Bearer $TOKEN"
```
- 登录结果:
- 返回 `code=200`token 非空。
- 拉取本行信息结果:
- 选择身份证号:`558455197203132040``523342199111246421``38056420050404632X`
- 接口返回 `{"msg":"拉取任务已提交","code":200,...}`
- 自动触发任务 `id=36``trigger_type=AUTO_PULL_BANK_INFO`,状态 `SUCCESS`
- 手动重算结果:
- 首次调用命中项目级重算锁,返回“当前项目标签正在重算中,请稍后再试”。
- 自动拉取任务完成后再次调用,返回 `{"msg":"标签重算任务已提交","code":200}`
- 最新任务 `id=37`,状态 `SUCCESS``hit_count=3481``success_rule_count=33``failed_rule_count=0`
- 命中结果查询:
- 已查到目标规则命中,包括:
- `WITHDRAW_CNT`
- `GAMBLING_SENSITIVE_KEYWORD`
- `LARGE_PURCHASE_TRANSACTION`
- 样例明细:
- `rule_code=GAMBLING_SENSITIVE_KEYWORD`
- `bank_statement_id=66679`
- `reason_detail=摘要/对手命中赌博敏感词,摘要“游戏充值”,对手方“欢乐游戏科技有限公司”,支出金额 6888.00 元`
- 详情接口回查:
- `GET /ccdi/project/bank-statement/detail/66679` 返回 `code=200`
- `data.hitTags` 中包含 `GAMBLING_SENSITIVE_KEYWORD`
## 补充复验
- 2026-03-20 16:01 左右,基于修复后的详情查询 SQL 再次执行项目 `47` 端到端链路验证。
- 登录结果:
- `POST /login/test` 返回 `code=200`token 非空。
- 拉取本行信息结果:
- 仍使用身份证号 `558455197203132040``523342199111246421``38056420050404632X`
- `POST /ccdi/file-upload/pull-bank-info` 返回 `{"msg":"拉取任务已提交","code":200,...}`
- 自动触发任务 `id=39``trigger_type=AUTO_PULL_BANK_INFO`,状态 `SUCCESS`
- `hit_count=3636``success_rule_count=33``failed_rule_count=0`
- 手动重算结果:
- `POST /ccdi/project/tags/rebuild` 直接返回 `{"msg":"标签重算任务已提交","code":200}`
- 最新任务 `id=40``trigger_type=MANUAL`,状态 `SUCCESS`
- `hit_count=3636``success_rule_count=33``failed_rule_count=0`
- 命中样例回查:
- 最新 `GAMBLING_SENSITIVE_KEYWORD` 命中样例为 `bank_statement_id=67279`
- `reason_detail=摘要/对手命中赌博敏感词,摘要“游戏充值”,对手方“欢乐游戏科技有限公司”,支出金额 6888.00 元`
- 详情接口回查:
- `GET /ccdi/project/bank-statement/detail/67279` 返回 `code=200`
- 返回结果包含 `originalFileName=558455197203132040_10001.csv`
- `data.hitTags` 中包含 `GAMBLING_SENSITIVE_KEYWORD`
## 结论
- 首次执行在数据库核验阶段发现第一期规则元数据异常,问题已定位并修复。
- 修复后重新验证结果如下:
- Mock 自动化回归通过。
- 主工程第一期真实规则自动化回归通过。
- 数据库采购基线与第一期规则元数据核验通过。
- 项目 `47` 的自动拉取、手动重算、规则命中查询与详情接口回查通过。
- 补充复验确认:重复上传记录场景下,流水详情接口已不再出现 `selectOne()` 结果重复异常。
- 最终结论:本次“新增模型打标完整验证”在修复元数据缺口后已通过。
## 环境清理
- 已停止本次复验启动的 Mock 服务与后端 Jar 服务。
- 端口复核结果:
- `62318` 无监听进程
- `8000` 无监听进程

View File

@@ -0,0 +1,70 @@
# LSFX Mock 随机命中规则后端验证记录
## 执行命令
```bash
cd lsfx-mock-server
python3 -m pytest tests/test_file_service.py -k "rule_hit_plan or persist_rule_hit_plan" -v
python3 -m pytest tests/test_statement_service.py -k "rule_plan_should_only_include or withdraw_cnt_samples" -v
python3 -m pytest tests/test_statement_service.py -k "follow_rule_hit_plan or fixed_total_count_200 or cached_result" -v
python3 -m pytest tests/integration/test_full_workflow.py -k "same_rule_subset or share_same_primary_binding" -v
python3 -m pytest tests/test_file_service.py tests/test_statement_service.py tests/test_api.py tests/integration/test_full_workflow.py -v
cd ..
bin/mysql_utf8_exec.sh sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql
python3 - <<'PY'
import pymysql
from pathlib import Path
import re
text = Path('ruoyi-admin/src/main/resources/application-dev.yml').read_text(encoding='utf-8')
match = re.search(r"url:\s*jdbc:mysql://(?P<host>[^:/?#]+):(?P<port>\d+)/(?P<db>[^?\n]+).*?\n\s*username:\s*(?P<user>[^\n]+)\n\s*password:\s*(?P<pwd>[^\n]+)", text, re.S)
conn = pymysql.connect(
host=match.group('host'),
port=int(match.group('port')),
user=match.group('user').strip(),
password=match.group('pwd').strip(),
database=match.group('db').strip(),
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
with conn, conn.cursor() as cursor:
cursor.execute("""
SELECT COUNT(1) AS cnt
FROM ccdi_purchase_transaction
WHERE purchase_id = 'LSFXMOCKPUR001'
AND actual_amount > 100000
""")
print(cursor.fetchone()['cnt'])
PY
```
## 执行时间与结果
- 2026-03-20 14:50 CST 完成目标回归:
`python3 -m pytest tests/test_file_service.py tests/test_statement_service.py tests/test_api.py tests/integration/test_full_workflow.py -v`
- 回归结果:`38 passed, 20 warnings in 4.20s`
- warnings 为现有 `pydantic``httpx` 弃用提示,本次改动未新增失败或 error。
## SQL 基线脚本执行结果
- 执行命令:`bin/mysql_utf8_exec.sh sql/migration/2026-03-20-lsfx-mock-random-hit-rule-purchase-baseline.sql`
- 执行结果:脚本执行成功,无报错输出。
- 脚本内容采用“先删后插”的幂等方式,避免重复执行造成脏数据。
## 采购基线查询结果
- 执行前查询:
`SELECT COUNT(1) FROM ccdi_purchase_transaction WHERE purchase_id = 'LSFXMOCKPUR001'`
返回 `0`
- 执行后查询:
`SELECT purchase_id, actual_amount, supplier_name FROM ccdi_purchase_transaction WHERE purchase_id = 'LSFXMOCKPUR001'`
返回:
- `purchase_id = LSFXMOCKPUR001`
- `actual_amount = 186000.00`
- `supplier_name = 兰溪市联调供应链有限公司`
- 最终门槛校验:
`SELECT COUNT(1) ... WHERE purchase_id = 'LSFXMOCKPUR001' AND actual_amount > 100000`
返回 `1`
## 是否发现回归
- 未发现本次改动引入的功能回归。
- `FileService` 的规则命中计划生成、`statement_rule_samples.py` 的按规则子集装配、`StatementService` 的缓存稳定性、端到端接口链路均已通过验证。
- 本次验证未启动额外前后端常驻进程,因此无需执行进程清理。

View File

@@ -9,6 +9,28 @@ from datetime import datetime, timedelta
import random
import uuid
LARGE_TRANSACTION_RULE_CODES = [
"HOUSE_OR_CAR_EXPENSE",
"TAX_EXPENSE",
"SINGLE_LARGE_INCOME",
"CUMULATIVE_INCOME",
"ANNUAL_TURNOVER",
"LARGE_CASH_DEPOSIT",
"FREQUENT_CASH_DEPOSIT",
"LARGE_TRANSFER",
]
PHASE1_RULE_CODES = [
"GAMBLING_SENSITIVE_KEYWORD",
"SPECIAL_AMOUNT_TRANSACTION",
"SUSPICIOUS_INCOME_KEYWORD",
"FOREX_BUY_AMT",
"FOREX_SELL_AMT",
"STOCK_TFR_LARGE",
"LARGE_STOCK_TRADING",
"WITHDRAW_CNT",
]
@dataclass
class FileRecord:
@@ -64,12 +86,16 @@ class FileRecord:
staff_name: str = ""
staff_id_card: str = ""
family_id_cards: List[str] = field(default_factory=list)
large_transaction_hit_rules: List[str] = field(default_factory=list)
phase1_hit_rules: List[str] = field(default_factory=list)
class FileService:
"""文件上传和解析服务"""
INNER_FLOW_TOTAL_RECORDS = 200
LOG_ID_MIN = settings.INITIAL_LOG_ID
LOG_ID_MAX = 99999
def __init__(self, staff_identity_repository=None):
self.file_records: Dict[int, FileRecord] = {} # logId -> FileRecord
@@ -80,6 +106,18 @@ class FileService:
"""按 logId 获取已存在的文件记录。"""
return self.file_records.get(log_id)
def _generate_log_id(self) -> int:
"""生成当前进程内未占用的随机 logId。"""
available_capacity = self.LOG_ID_MAX - self.LOG_ID_MIN + 1
if len(self.file_records) >= available_capacity:
raise RuntimeError("可用 logId 已耗尽")
while True:
candidate = random.randint(self.LOG_ID_MIN, self.LOG_ID_MAX)
if candidate not in self.file_records:
self.log_counter = candidate
return candidate
def _infer_bank_name(self, filename: str) -> tuple:
"""根据文件名推断银行名称和模板名称"""
if "支付宝" in filename or "alipay" in filename.lower():
@@ -110,6 +148,27 @@ class FileService:
"enterpriseNameList": [primary_enterprise_name],
}
def _pick_rule_subset(
self,
rng: random.Random,
rule_codes: List[str],
min_count: int,
max_count: int,
) -> List[str]:
"""按固定随机源选择稳定规则子集,并保留规则池原始顺序。"""
selected_codes = set(rng.sample(rule_codes, rng.randint(min_count, max_count)))
return [rule_code for rule_code in rule_codes if rule_code in selected_codes]
def _build_rule_hit_plan(self, log_id: int) -> dict:
"""基于 logId 生成稳定的规则命中计划。"""
rng = random.Random(f"rule-plan:{log_id}")
return {
"large_transaction_hit_rules": self._pick_rule_subset(
rng, LARGE_TRANSACTION_RULE_CODES, 2, 4
),
"phase1_hit_rules": self._pick_rule_subset(rng, PHASE1_RULE_CODES, 2, 4),
}
def _create_file_record(
self,
*,
@@ -130,6 +189,8 @@ class FileService:
staff_name: str = "",
staff_id_card: str = "",
family_id_cards: List[str] = None,
large_transaction_hit_rules: List[str] = None,
phase1_hit_rules: List[str] = None,
parsing: bool = True,
status: int = -5,
) -> FileRecord:
@@ -160,6 +221,8 @@ class FileService:
staff_name=staff_name,
staff_id_card=staff_id_card,
family_id_cards=list(family_id_cards or []),
large_transaction_hit_rules=list(large_transaction_hit_rules or []),
phase1_hit_rules=list(phase1_hit_rules or []),
parsing=parsing,
status=status,
)
@@ -181,12 +244,12 @@ class FileService:
Returns:
上传响应字典
"""
# 生成唯一logId
self.log_counter += 1
log_id = self.log_counter
# 生成唯一 logId
log_id = self._generate_log_id()
# 推断银行信息
bank_name, template_name = self._infer_bank_name(file.filename)
rule_hit_plan = self._build_rule_hit_plan(log_id)
# 生成合理的交易日期范围
end_date = datetime.now()
@@ -217,6 +280,8 @@ class FileService:
staff_name=identity_scope["staff_name"],
staff_id_card=identity_scope["staff_id_card"],
family_id_cards=identity_scope["family_id_cards"],
large_transaction_hit_rules=rule_hit_plan["large_transaction_hit_rules"],
phase1_hit_rules=rule_hit_plan["phase1_hit_rules"],
)
# 存储记录
@@ -518,9 +583,9 @@ class FileService:
data_start_date_id = request.dataStartDateId
data_end_date_id = request.dataEndDateId
# 使用递增 logId确保与上传链路一致
self.log_counter += 1
log_id = self.log_counter
# 使用随机 logId确保与上传链路一致且不覆盖现有记录
log_id = self._generate_log_id()
rule_hit_plan = self._build_rule_hit_plan(log_id)
primary_enterprise_name, primary_account_no = self._generate_primary_binding()
identity_scope = self._select_staff_identity_scope()
@@ -542,6 +607,8 @@ class FileService:
staff_name=identity_scope["staff_name"],
staff_id_card=identity_scope["staff_id_card"],
family_id_cards=identity_scope["family_id_cards"],
large_transaction_hit_rules=rule_hit_plan["large_transaction_hit_rules"],
phase1_hit_rules=rule_hit_plan["phase1_hit_rules"],
parsing=False,
)

View File

@@ -141,6 +141,468 @@ def _build_statement(
}
def _build_sample_context(
log_id: int,
primary_enterprise_name: Optional[str] = None,
primary_account_no: Optional[str] = None,
staff_id_card: Optional[str] = None,
family_id_cards: Optional[List[str]] = None,
) -> Dict[str, str]:
identity_scope = resolve_identity_scope(log_id)
staff_identity = identity_scope["staff"]
family_identity = identity_scope["family"]
selected_staff_id_card = staff_id_card or staff_identity["id_card"]
selected_family_id_cards = list(family_id_cards or [family_identity["id_card"]])
return {
"le_name": primary_enterprise_name or "模型测试主体",
"account_no": primary_account_no or "6222024999999999",
"staff_id_card": selected_staff_id_card,
"family_id_card": (
selected_family_id_cards[0] if selected_family_id_cards else selected_staff_id_card
),
}
def build_house_or_car_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=9, hours=1),
cret_no=context["staff_id_card"],
customer_name="杭州贝壳房地产经纪有限公司",
user_memo="购买房产首付款",
cash_type="对公转账",
dr_amount=680000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=8, hours=2),
cret_no=context["family_id_card"],
customer_name="兰溪星耀汽车销售服务有限公司",
user_memo="购车首付款",
cash_type="对公转账",
dr_amount=380000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500002",
),
]
def build_tax_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=7, hours=1),
cret_no=context["staff_id_card"],
customer_name="国家金库兰溪市中心支库",
user_memo="个人所得税税款",
cash_type="税务缴款",
dr_amount=126000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500003",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=6, hours=3),
cret_no=context["family_id_card"],
customer_name="兰溪市税务局",
user_memo="房产税务缴税",
cash_type="税务缴款",
dr_amount=88000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500004",
),
]
def build_single_large_income_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=2),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="经营往来收入",
cash_type="对公转账",
cr_amount=18800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
)
]
def build_cumulative_income_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=2),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="经营往来收入",
cash_type="对公转账",
cr_amount=18800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=1),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="项目回款收入",
cash_type="对公转账",
cr_amount=20800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=4),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="业务合作收入",
cash_type="对公转账",
cr_amount=20700000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
),
]
def build_annual_turnover_supporting_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=15, hours=2),
cret_no=context["staff_id_card"],
customer_name="浙江金穗供应链有限公司",
user_memo="年度经营回款",
cash_type="对公转账",
cr_amount=17200000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=11, hours=3),
cret_no=context["staff_id_card"],
customer_name="浙江金穗供应链有限公司",
user_memo="年度项目回款",
cash_type="对公转账",
cr_amount=17600000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=9, hours=4),
cret_no=context["staff_id_card"],
customer_name="浙江金穗供应链有限公司",
user_memo="年度合作收入",
cash_type="对公转账",
cr_amount=17800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600002",
),
]
def build_large_cash_deposit_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 9, 0, 0),
cret_no=context["staff_id_card"],
customer_name="",
user_memo="现金存款",
cash_type="现金存款",
cr_amount=3000000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
)
]
def build_frequent_cash_deposit_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
deposit_specs = [
(datetime(2026, 3, 10, 9, 0, 0), "现金存款", 3000000.0),
(datetime(2026, 3, 10, 9, 30, 0), "ATM现金存款", 3100000.0),
(datetime(2026, 3, 10, 10, 0, 0), "自助存款现金存入", 3200000.0),
(datetime(2026, 3, 10, 10, 30, 0), "CRS存款", 3300000.0),
(datetime(2026, 3, 10, 11, 0, 0), "本行ATM存款", 3400000.0),
(datetime(2026, 3, 10, 11, 30, 0), "柜面现金存款", 3500000.0),
]
return [
_build_statement(
group_id,
log_id,
trx_datetime=trx_datetime,
cret_no=context["staff_id_card"],
customer_name="",
user_memo=user_memo,
cash_type="现金存款",
cr_amount=cr_amount,
le_name=context["le_name"],
account_mask_no=context["account_no"],
)
for trx_datetime, user_memo, cr_amount in deposit_specs
]
def build_large_transfer_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=1),
cret_no=context["staff_id_card"],
customer_name="异地转账平台",
user_memo="手机银行转账",
cash_type="转账支出",
dr_amount=12000000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024777700001",
)
]
def build_gambling_sensitive_keyword_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=2),
cret_no=context["staff_id_card"],
customer_name="欢乐游戏科技有限公司",
user_memo="游戏充值",
cash_type="快捷支付",
dr_amount=6888.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024888800001",
)
]
def build_special_amount_transaction_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=1),
cret_no=context["staff_id_card"],
customer_name="兰溪特别金额结算中心",
user_memo="特殊金额转账",
cash_type="转账支出",
dr_amount=88888.88,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024888800002",
)
]
def build_suspicious_income_keyword_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=5),
cret_no=context["staff_id_card"],
customer_name="灰度信息咨询有限公司",
user_memo="咨询返现收入",
cash_type="对公转账",
cr_amount=166666.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024888800003",
)
]
def build_forex_buy_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=6),
cret_no=context["staff_id_card"],
customer_name="中国银行外汇业务中心",
user_memo="个人购汇",
cash_type="购汇支出",
dr_amount=126000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900001",
)
]
def build_forex_sell_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=4),
cret_no=context["staff_id_card"],
customer_name="中国银行外汇业务中心",
user_memo="个人结汇",
cash_type="结汇收入",
cr_amount=132000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900002",
)
]
def build_stock_transfer_large_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=2),
cret_no=context["staff_id_card"],
customer_name="国信证券资金账户",
user_memo="证券大额转托管转出",
cash_type="转账支出",
dr_amount=560000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900003",
)
]
def build_large_stock_trading_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=1, hours=3),
cret_no=context["staff_id_card"],
customer_name="华泰证券资金账户",
user_memo="证券大额交易买入",
cash_type="证券交易",
dr_amount=880000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900004",
)
]
def build_withdraw_cnt_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
withdraw_specs = [
(datetime(2026, 3, 12, 9, 0, 0), "微信提现", 8000.0),
(datetime(2026, 3, 12, 10, 0, 0), "支付宝提现", 9000.0),
(datetime(2026, 3, 12, 11, 0, 0), "微信提现", 8500.0),
(datetime(2026, 3, 12, 12, 0, 0), "支付宝提现", 9200.0),
]
return [
_build_statement(
group_id,
log_id,
trx_datetime=trx_datetime,
cret_no=context["staff_id_card"],
customer_name="财付通结算账户" if "微信" in user_memo else "支付宝结算账户",
user_memo=user_memo,
cash_type="提现支出",
dr_amount=dr_amount,
le_name=context["le_name"],
account_mask_no=context["account_no"],
)
for trx_datetime, user_memo, dr_amount in withdraw_specs
]
LARGE_TRANSACTION_BUILDERS = {
"HOUSE_OR_CAR_EXPENSE": build_house_or_car_samples,
"TAX_EXPENSE": build_tax_samples,
"SINGLE_LARGE_INCOME": build_single_large_income_samples,
"CUMULATIVE_INCOME": build_cumulative_income_samples,
"ANNUAL_TURNOVER": build_annual_turnover_supporting_samples,
"LARGE_CASH_DEPOSIT": build_large_cash_deposit_samples,
"FREQUENT_CASH_DEPOSIT": build_frequent_cash_deposit_samples,
"LARGE_TRANSFER": build_large_transfer_samples,
}
PHASE1_RULE_BUILDERS = {
"GAMBLING_SENSITIVE_KEYWORD": build_gambling_sensitive_keyword_samples,
"SPECIAL_AMOUNT_TRANSACTION": build_special_amount_transaction_samples,
"SUSPICIOUS_INCOME_KEYWORD": build_suspicious_income_keyword_samples,
"FOREX_BUY_AMT": build_forex_buy_samples,
"FOREX_SELL_AMT": build_forex_sell_samples,
"STOCK_TFR_LARGE": build_stock_transfer_large_samples,
"LARGE_STOCK_TRADING": build_large_stock_trading_samples,
"WITHDRAW_CNT": build_withdraw_cnt_samples,
}
def build_seed_statements_for_rule_plan(
group_id: int,
log_id: int,
rule_plan: Dict,
**kwargs,
) -> List[Dict]:
statements: List[Dict] = []
for rule_code in rule_plan.get("large_transaction_hit_rules", []):
builder = LARGE_TRANSACTION_BUILDERS.get(rule_code)
if builder is not None:
statements.extend(builder(group_id, log_id, **kwargs))
for rule_code in rule_plan.get("phase1_hit_rules", []):
builder = PHASE1_RULE_BUILDERS.get(rule_code)
if builder is not None:
statements.extend(builder(group_id, log_id, **kwargs))
return statements
def build_large_transaction_seed_statements(
group_id: int,
log_id: int,
@@ -149,220 +611,15 @@ def build_large_transaction_seed_statements(
staff_id_card: Optional[str] = None,
family_id_cards: Optional[List[str]] = None,
) -> List[Dict]:
le_name = primary_enterprise_name or "模型测试主体"
account_no = primary_account_no or "6222024999999999"
identity_scope = resolve_identity_scope(log_id)
staff_identity = identity_scope["staff"]
family_identity = identity_scope["family"]
selected_staff_id_card = staff_id_card or staff_identity["id_card"]
selected_family_id_cards = list(family_id_cards or [family_identity["id_card"]])
primary_family_id_card = selected_family_id_cards[0] if selected_family_id_cards else selected_staff_id_card
statements: List[Dict] = []
statements.extend([
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=9, hours=1),
cret_no=selected_staff_id_card,
customer_name="杭州贝壳房地产经纪有限公司",
user_memo="购买房产首付款",
cash_type="对公转账",
dr_amount=680000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=8, hours=2),
cret_no=primary_family_id_card,
customer_name="兰溪星耀汽车销售服务有限公司",
user_memo="购车首付款",
cash_type="对公转账",
dr_amount=380000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=7, hours=1),
cret_no=selected_staff_id_card,
customer_name="国家金库兰溪市中心支库",
user_memo="个人所得税税款",
cash_type="税务缴款",
dr_amount=126000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500003",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=6, hours=3),
cret_no=primary_family_id_card,
customer_name="兰溪市税务局",
user_memo="房产税务缴税",
cash_type="税务缴款",
dr_amount=88000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500004",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=2),
cret_no=selected_staff_id_card,
customer_name="浙江远望贸易有限公司",
user_memo="经营往来收入",
cash_type="对公转账",
cr_amount=18800000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=1),
cret_no=selected_staff_id_card,
customer_name="浙江远望贸易有限公司",
user_memo="项目回款收入",
cash_type="对公转账",
cr_amount=20800000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=4),
cret_no=selected_staff_id_card,
customer_name="浙江远望贸易有限公司",
user_memo="业务合作收入",
cash_type="对公转账",
cr_amount=20700000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 9, 0, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="现金存款",
cash_type="现金存款",
cr_amount=3000000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 9, 30, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="ATM现金存款",
cash_type="现金存款",
cr_amount=3100000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 10, 0, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="自助存款现金存入",
cash_type="现金存款",
cr_amount=3200000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 10, 30, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="CRS存款",
cash_type="现金存款",
cr_amount=3300000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 11, 0, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="本行ATM存款",
cash_type="现金存款",
cr_amount=3400000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 11, 30, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="柜面现金存款",
cash_type="现金存款",
cr_amount=3500000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=1),
cret_no=selected_staff_id_card,
customer_name="异地转账平台",
user_memo="手机银行转账",
cash_type="转账支出",
dr_amount=12000000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024777700001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=2),
cret_no=selected_staff_id_card,
customer_name="跨行转账中心",
user_memo="对外转账",
cash_type="转账支出",
dr_amount=10000000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024777700002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=5),
cret_no=selected_staff_id_card,
customer_name="跨境转账服务平台",
user_memo="网银转账",
cash_type="转账支出",
dr_amount=9000000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024777700003",
),
])
return statements
return build_seed_statements_for_rule_plan(
group_id=group_id,
log_id=log_id,
rule_plan={
"large_transaction_hit_rules": list(LARGE_TRANSACTION_BUILDERS.keys()),
"phase1_hit_rules": [],
},
primary_enterprise_name=primary_enterprise_name,
primary_account_no=primary_account_no,
staff_id_card=staff_id_card,
family_id_cards=family_id_cards,
)

View File

@@ -5,7 +5,7 @@ import uuid
from datetime import datetime, timedelta
from services.statement_rule_samples import (
build_large_transaction_seed_statements,
build_seed_statements_for_rule_plan,
resolve_identity_cards,
)
@@ -144,14 +144,21 @@ class StatementService:
"""生成指定数量的流水记录。"""
primary_enterprise_name, primary_account_no = self._resolve_primary_binding(log_id)
record = self.file_service.get_file_record(log_id) if self.file_service is not None else None
rule_plan = {
"large_transaction_hit_rules": (
list(record.large_transaction_hit_rules) if record is not None else []
),
"phase1_hit_rules": list(record.phase1_hit_rules) if record is not None else [],
}
if record is not None and record.staff_id_card:
allowed_identity_cards = tuple([record.staff_id_card, *record.family_id_cards])
else:
allowed_identity_cards = resolve_identity_cards(log_id)
rng = random.Random(f"statement:{log_id}")
seeded_statements = build_large_transaction_seed_statements(
seeded_statements = build_seed_statements_for_rule_plan(
group_id=group_id,
log_id=log_id,
rule_plan=rule_plan,
primary_enterprise_name=primary_enterprise_name,
primary_account_no=primary_account_no,
staff_id_card=record.staff_id_card if record is not None else None,

View File

@@ -170,3 +170,30 @@ def test_upload_status_and_bank_statement_share_same_primary_binding(client, mon
assert statements
assert all(item["leName"] == status_log["enterpriseNameList"][0] for item in statements)
assert all(item["accountMaskNo"] == status_log["accountNoList"][0] for item in statements)
def test_inner_flow_bank_statement_should_keep_same_rule_subset(client):
fetch_response = client.post(
"/watson/api/project/getJZFileOrZjrcuFile",
data={
"groupId": 1001,
"customerNo": "customer_subset",
"dataChannelCode": "test_code",
"requestDateId": 20240101,
"dataStartDateId": 20240101,
"dataEndDateId": 20240131,
"uploadUserId": 902001,
},
)
log_id = fetch_response.json()["data"][0]
page1 = client.post(
"/watson/api/project/getBSByLogId",
data={"groupId": 1001, "logId": log_id, "pageNow": 1, "pageSize": 10},
).json()
page2 = client.post(
"/watson/api/project/getBSByLogId",
data={"groupId": 1001, "logId": log_id, "pageNow": 1, "pageSize": 10},
).json()
assert page1["data"]["bankStatementList"] == page2["data"]["bankStatementList"]

View File

@@ -8,7 +8,7 @@ import io
from fastapi import BackgroundTasks
from fastapi.datastructures import UploadFile
from services.file_service import FileService
from services.file_service import FileRecord, FileService
class FakeStaffIdentityRepository:
@@ -139,7 +139,7 @@ def test_fetch_inner_flow_persists_primary_binding_record(monkeypatch):
response = service.fetch_inner_flow(request)
log_id = response["data"][0]
assert log_id == service.log_counter
assert 10000 <= log_id <= 99999
assert log_id in service.file_records
record = service.file_records[log_id]
@@ -154,3 +154,67 @@ def test_fetch_inner_flow_persists_primary_binding_record(monkeypatch):
assert record.enterprise_name_list == ["行内主体"]
assert record.account_no_list == ["6210987654321098"]
assert record.total_records == 200
def test_generate_log_id_should_retry_when_random_value_conflicts(monkeypatch):
"""随机 logId 命中已存在记录时必须重试并返回未占用值。"""
service = FileService(staff_identity_repository=FakeStaffIdentityRepository())
service.file_records[34567] = FileRecord(
log_id=34567,
group_id=1001,
file_name="existing.csv",
)
candidate_values = iter([34567, 45678])
monkeypatch.setattr(
"services.file_service.random.randint",
lambda start, end: next(candidate_values),
)
assert service._generate_log_id() == 45678
def test_build_rule_hit_plan_should_be_deterministic_for_same_log_id():
service = FileService(staff_identity_repository=FakeStaffIdentityRepository())
plan1 = service._build_rule_hit_plan(10001)
plan2 = service._build_rule_hit_plan(10001)
assert plan1 == plan2
assert 2 <= len(plan1["large_transaction_hit_rules"]) <= 4
assert 2 <= len(plan1["phase1_hit_rules"]) <= 4
def test_fetch_inner_flow_should_persist_rule_hit_plan(monkeypatch):
service = FileService(staff_identity_repository=FakeStaffIdentityRepository())
monkeypatch.setattr(
service,
"_build_rule_hit_plan",
lambda log_id: {
"large_transaction_hit_rules": ["HOUSE_OR_CAR_EXPENSE", "TAX_EXPENSE"],
"phase1_hit_rules": ["GAMBLING_SENSITIVE_KEYWORD", "FOREX_BUY_AMT"],
},
)
response = service.fetch_inner_flow(
{
"groupId": 1001,
"customerNo": "test_customer_001",
"dataChannelCode": "test_code",
"requestDateId": 20240101,
"dataStartDateId": 20240101,
"dataEndDateId": 20240131,
"uploadUserId": 902001,
}
)
log_id = response["data"][0]
record = service.file_records[log_id]
assert record.large_transaction_hit_rules == [
"HOUSE_OR_CAR_EXPENSE",
"TAX_EXPENSE",
]
assert record.phase1_hit_rules == [
"GAMBLING_SENSITIVE_KEYWORD",
"FOREX_BUY_AMT",
]

View File

@@ -9,6 +9,7 @@ from services.statement_service import StatementService
from services.statement_rule_samples import (
DEFAULT_LARGE_TRANSACTION_THRESHOLDS,
build_large_transaction_seed_statements,
build_seed_statements_for_rule_plan,
)
@@ -21,16 +22,71 @@ class FakeStaffIdentityRepository:
}
def test_generate_statements_should_include_seeded_samples_before_noise():
"""生成流水必须先混入固定命中样本,而不是纯随机噪声"""
service = StatementService()
def test_generate_statements_should_include_seeded_samples_before_noise_when_rule_plan_exists():
"""存在规则命中计划时,生成流水必须先混入被选中的命中样本"""
file_service = FileService(staff_identity_repository=FakeStaffIdentityRepository())
service = StatementService(file_service=file_service)
statements = service._generate_statements(group_id=1000, log_id=20001, count=30)
response = file_service.fetch_inner_flow(
{
"groupId": 1001,
"customerNo": "customer_seeded_samples",
"dataChannelCode": "test_code",
"requestDateId": 20240101,
"dataStartDateId": 20240101,
"dataEndDateId": 20240131,
"uploadUserId": 902001,
}
)
log_id = response["data"][0]
record = file_service.file_records[log_id]
record.large_transaction_hit_rules = ["HOUSE_OR_CAR_EXPENSE"]
record.phase1_hit_rules = []
statements = service._generate_statements(group_id=1000, log_id=log_id, count=30)
assert len(statements) >= 30
assert any(item["userMemo"] == "购买房产首付款" for item in statements)
def test_build_seed_statements_for_rule_plan_should_only_include_requested_phase1_rules():
plan = {
"large_transaction_hit_rules": [],
"phase1_hit_rules": ["GAMBLING_SENSITIVE_KEYWORD", "FOREX_BUY_AMT"],
}
statements = build_seed_statements_for_rule_plan(
group_id=1000,
log_id=20001,
rule_plan=plan,
)
assert any("游戏" in item["userMemo"] for item in statements)
assert any("购汇" in item["userMemo"] for item in statements)
assert not any("证券" in item["userMemo"] for item in statements)
def test_build_seed_statements_for_rule_plan_should_generate_withdraw_cnt_samples():
plan = {
"large_transaction_hit_rules": [],
"phase1_hit_rules": ["WITHDRAW_CNT"],
}
statements = build_seed_statements_for_rule_plan(
group_id=1000,
log_id=20001,
rule_plan=plan,
)
assert len(
[
item
for item in statements
if "微信提现" in item["userMemo"] or "支付宝提现" in item["userMemo"]
]
) >= 4
def test_large_transaction_seed_should_cover_all_eight_rules():
"""大额交易样本生成器必须覆盖 8 条已实现规则的关键口径。"""
statements = build_large_transaction_seed_statements(group_id=1000, log_id=20001)
@@ -98,6 +154,33 @@ def test_generate_statements_should_fill_noise_up_to_requested_count():
assert len(statements) == 80
def test_generate_statements_should_follow_rule_hit_plan_from_file_record():
file_service = FileService(staff_identity_repository=FakeStaffIdentityRepository())
statement_service = StatementService(file_service=file_service)
response = file_service.fetch_inner_flow(
{
"groupId": 1001,
"customerNo": "customer_rule_plan",
"dataChannelCode": "test_code",
"requestDateId": 20240101,
"dataStartDateId": 20240101,
"dataEndDateId": 20240131,
"uploadUserId": 902001,
}
)
log_id = response["data"][0]
record = file_service.file_records[log_id]
record.large_transaction_hit_rules = ["HOUSE_OR_CAR_EXPENSE"]
record.phase1_hit_rules = ["GAMBLING_SENSITIVE_KEYWORD"]
statements = statement_service._generate_statements(group_id=1001, log_id=log_id, count=200)
assert any("房产首付款" in item["userMemo"] for item in statements)
assert any("游戏" in item["userMemo"] for item in statements)
assert not any("购汇" in item["userMemo"] for item in statements)
def test_generate_statements_should_stay_within_single_employee_scope_per_log_id():
"""同一 logId 的流水只能落在 FileRecord 绑定的员工及亲属身份证内。"""
file_service = FileService(staff_identity_repository=FakeStaffIdentityRepository())
@@ -228,6 +311,14 @@ def test_get_bank_statement_contains_large_transaction_hit_samples(monkeypatch):
)
log_id = response["data"][0]
record = file_service.file_records[log_id]
record.large_transaction_hit_rules = [
"HOUSE_OR_CAR_EXPENSE",
"TAX_EXPENSE",
"CUMULATIVE_INCOME",
"FREQUENT_CASH_DEPOSIT",
"LARGE_TRANSFER",
]
record.phase1_hit_rules = []
staff_id_card = record.staff_id_card
family_id_card = record.family_id_cards[0]

View File

@@ -0,0 +1,78 @@
DELETE FROM ccdi_purchase_transaction
WHERE purchase_id = 'LSFXMOCKPUR001';
INSERT INTO ccdi_purchase_transaction (
purchase_id,
purchase_category,
project_name,
subject_name,
subject_desc,
purchase_qty,
budget_amount,
bid_amount,
actual_amount,
contract_amount,
settlement_amount,
purchase_method,
supplier_name,
contact_person,
contact_phone,
supplier_uscc,
supplier_bank_account,
apply_date,
plan_approve_date,
announce_date,
bid_open_date,
contract_sign_date,
expected_delivery_date,
actual_delivery_date,
acceptance_date,
settlement_date,
applicant_id,
applicant_name,
apply_department,
purchase_leader_id,
purchase_leader_name,
purchase_department,
created_by,
updated_by
)
SELECT
'LSFXMOCKPUR001',
'设备采购',
'LSFX Mock 联调',
'反洗钱终端设备采购',
'用于命中 LARGE_PURCHASE_TRANSACTION 真实规则',
1,
188000.00,
186000.00,
186000.00,
186000.00,
186000.00,
'竞争性谈判',
'兰溪市联调供应链有限公司',
'联调联系人',
'13800000000',
'91330781MOCKPUR001',
'6222000000001234',
CURRENT_DATE,
CURRENT_DATE,
CURRENT_DATE,
CURRENT_DATE,
CURRENT_DATE,
CURRENT_DATE,
CURRENT_DATE,
CURRENT_DATE,
CURRENT_DATE,
CAST(s.staff_id AS CHAR),
s.name,
'纪检初核部',
NULL,
NULL,
NULL,
'admin',
'admin'
FROM ccdi_base_staff s
WHERE COALESCE(TRIM(CAST(s.staff_id AS CHAR)), '') <> ''
AND COALESCE(TRIM(s.name), '') <> ''
LIMIT 1;

View File

@@ -0,0 +1,40 @@
START TRANSACTION;
INSERT INTO ccdi_bank_tag_rule (
model_code,
model_name,
rule_code,
rule_name,
indicator_code,
result_type,
risk_level,
business_caliber,
enabled,
sort_order,
create_by,
remark
) VALUES
('SUSPICIOUS_GAMBLING', '疑似赌博', 'GAMBLING_SENSITIVE_KEYWORD', '疑似敏感交易', NULL, 'STATEMENT', 'HIGH', '备注或交易摘要、对手有“游戏、抖币、体彩、福彩”等字眼。', 1, 20, 'system', '真实规则:识别摘要或对手方命中赌博敏感词的支出流水'),
('SUSPICIOUS_RELATION', '可疑关系', 'SPECIAL_AMOUNT_TRANSACTION', '特殊金额交易', NULL, 'STATEMENT', NULL, '除与配偶、子女外发生特殊金额交易如1314元、520元等具有特殊含义的金额。', 1, 10, 'system', '真实规则:识别与非配偶子女发生的特殊金额交易'),
('SUSPICIOUS_PART_TIME', '可疑兼职', 'SUSPICIOUS_INCOME_KEYWORD', '疑似兼职', NULL, 'STATEMENT', 'HIGH', '转入资金摘要有“工资”、“分红”、“红利”、“利息(非银行结息)”等收入', 1, 30, 'system', '真实规则:识别非本行工资代发的收入关键词转入流水'),
('SUSPICIOUS_FOREIGN_EXCHANGE', '可疑外汇交易', 'FOREX_BUY_AMT', '可疑外汇交易', 'SINGLE_PURCHASE_AMOUNT', 'STATEMENT', NULL, '单笔购汇金额超限', 1, 10, 'system', '真实规则:识别单笔购汇金额超过阈值的流水'),
('SUSPICIOUS_FOREIGN_EXCHANGE', '可疑外汇交易', 'FOREX_SELL_AMT', '可疑外汇交易', 'SINGLE_SETTLEMENT_AMOUNT', 'STATEMENT', NULL, '单笔结汇金额超限', 1, 20, 'system', '真实规则:识别单笔结汇金额超过阈值的流水'),
('SUSPICIOUS_PURCHASE', '可疑采购', 'LARGE_PURCHASE_TRANSACTION', '可疑采购', NULL, 'STATEMENT', NULL, '单笔采购金额超过10万元。', 1, 10, 'system', '真实规则识别单笔采购金额超过10万元的采购事项'),
('ABNORMAL_BEHAVIOR', '异常行为', 'STOCK_TFR_LARGE', '可疑银证大额转账', 'STOCK_TFR_LARGE', 'STATEMENT', NULL, '家庭老人/非关系人银证大额转账', 1, 10, 'system', '真实规则:识别银证转账金额超过阈值的流水'),
('ABNORMAL_BEHAVIOR', '异常行为', 'WITHDRAW_CNT', '微信支付宝频繁提现', 'WITHDRAW_CNT', 'OBJECT', NULL, '微信、支付宝单日提现次数超过设置次数', 1, 20, 'system', '真实规则:识别微信支付宝单日提现次数超过阈值的对象'),
('ABNORMAL_BEHAVIOR', '异常行为', 'LARGE_STOCK_TRADING', '大额炒股', 'STOCK_TFR_LARGE', 'STATEMENT', 'HIGH', '单次三方资管交易金额超过100万元。', 1, 60, 'system', '真实规则:识别单笔三方资管交易金额超过阈值的流水')
ON DUPLICATE KEY UPDATE
model_code = VALUES(model_code),
model_name = VALUES(model_name),
rule_name = VALUES(rule_name),
indicator_code = VALUES(indicator_code),
result_type = VALUES(result_type),
risk_level = VALUES(risk_level),
business_caliber = VALUES(business_caliber),
enabled = VALUES(enabled),
sort_order = VALUES(sort_order),
update_by = 'system',
update_time = NOW(),
remark = VALUES(remark);
COMMIT;