拆分Mock规则样本构造器

This commit is contained in:
wkc
2026-03-20 14:45:49 +08:00
parent 1fd7ae7026
commit 5d03811d49
2 changed files with 513 additions and 217 deletions

View File

@@ -141,6 +141,468 @@ def _build_statement(
}
def _build_sample_context(
log_id: int,
primary_enterprise_name: Optional[str] = None,
primary_account_no: Optional[str] = None,
staff_id_card: Optional[str] = None,
family_id_cards: Optional[List[str]] = None,
) -> Dict[str, str]:
identity_scope = resolve_identity_scope(log_id)
staff_identity = identity_scope["staff"]
family_identity = identity_scope["family"]
selected_staff_id_card = staff_id_card or staff_identity["id_card"]
selected_family_id_cards = list(family_id_cards or [family_identity["id_card"]])
return {
"le_name": primary_enterprise_name or "模型测试主体",
"account_no": primary_account_no or "6222024999999999",
"staff_id_card": selected_staff_id_card,
"family_id_card": (
selected_family_id_cards[0] if selected_family_id_cards else selected_staff_id_card
),
}
def build_house_or_car_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=9, hours=1),
cret_no=context["staff_id_card"],
customer_name="杭州贝壳房地产经纪有限公司",
user_memo="购买房产首付款",
cash_type="对公转账",
dr_amount=680000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=8, hours=2),
cret_no=context["family_id_card"],
customer_name="兰溪星耀汽车销售服务有限公司",
user_memo="购车首付款",
cash_type="对公转账",
dr_amount=380000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500002",
),
]
def build_tax_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=7, hours=1),
cret_no=context["staff_id_card"],
customer_name="国家金库兰溪市中心支库",
user_memo="个人所得税税款",
cash_type="税务缴款",
dr_amount=126000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500003",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=6, hours=3),
cret_no=context["family_id_card"],
customer_name="兰溪市税务局",
user_memo="房产税务缴税",
cash_type="税务缴款",
dr_amount=88000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024555500004",
),
]
def build_single_large_income_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=2),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="经营往来收入",
cash_type="对公转账",
cr_amount=18800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
)
]
def build_cumulative_income_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=2),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="经营往来收入",
cash_type="对公转账",
cr_amount=18800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=1),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="项目回款收入",
cash_type="对公转账",
cr_amount=20800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=4),
cret_no=context["staff_id_card"],
customer_name="浙江远望贸易有限公司",
user_memo="业务合作收入",
cash_type="对公转账",
cr_amount=20700000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600001",
),
]
def build_annual_turnover_supporting_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=15, hours=2),
cret_no=context["staff_id_card"],
customer_name="浙江金穗供应链有限公司",
user_memo="年度经营回款",
cash_type="对公转账",
cr_amount=17200000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=11, hours=3),
cret_no=context["staff_id_card"],
customer_name="浙江金穗供应链有限公司",
user_memo="年度项目回款",
cash_type="对公转账",
cr_amount=17600000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=9, hours=4),
cret_no=context["staff_id_card"],
customer_name="浙江金穗供应链有限公司",
user_memo="年度合作收入",
cash_type="对公转账",
cr_amount=17800000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024666600002",
),
]
def build_large_cash_deposit_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 9, 0, 0),
cret_no=context["staff_id_card"],
customer_name="",
user_memo="现金存款",
cash_type="现金存款",
cr_amount=3000000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
)
]
def build_frequent_cash_deposit_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
deposit_specs = [
(datetime(2026, 3, 10, 9, 0, 0), "现金存款", 3000000.0),
(datetime(2026, 3, 10, 9, 30, 0), "ATM现金存款", 3100000.0),
(datetime(2026, 3, 10, 10, 0, 0), "自助存款现金存入", 3200000.0),
(datetime(2026, 3, 10, 10, 30, 0), "CRS存款", 3300000.0),
(datetime(2026, 3, 10, 11, 0, 0), "本行ATM存款", 3400000.0),
(datetime(2026, 3, 10, 11, 30, 0), "柜面现金存款", 3500000.0),
]
return [
_build_statement(
group_id,
log_id,
trx_datetime=trx_datetime,
cret_no=context["staff_id_card"],
customer_name="",
user_memo=user_memo,
cash_type="现金存款",
cr_amount=cr_amount,
le_name=context["le_name"],
account_mask_no=context["account_no"],
)
for trx_datetime, user_memo, cr_amount in deposit_specs
]
def build_large_transfer_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=1),
cret_no=context["staff_id_card"],
customer_name="异地转账平台",
user_memo="手机银行转账",
cash_type="转账支出",
dr_amount=12000000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024777700001",
)
]
def build_gambling_sensitive_keyword_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=2),
cret_no=context["staff_id_card"],
customer_name="欢乐游戏科技有限公司",
user_memo="游戏充值",
cash_type="快捷支付",
dr_amount=6888.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024888800001",
)
]
def build_special_amount_transaction_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=1),
cret_no=context["staff_id_card"],
customer_name="兰溪特别金额结算中心",
user_memo="特殊金额转账",
cash_type="转账支出",
dr_amount=88888.88,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024888800002",
)
]
def build_suspicious_income_keyword_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=5),
cret_no=context["staff_id_card"],
customer_name="灰度信息咨询有限公司",
user_memo="咨询返现收入",
cash_type="对公转账",
cr_amount=166666.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024888800003",
)
]
def build_forex_buy_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=6),
cret_no=context["staff_id_card"],
customer_name="中国银行外汇业务中心",
user_memo="个人购汇",
cash_type="购汇支出",
dr_amount=126000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900001",
)
]
def build_forex_sell_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=4),
cret_no=context["staff_id_card"],
customer_name="中国银行外汇业务中心",
user_memo="个人结汇",
cash_type="结汇收入",
cr_amount=132000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900002",
)
]
def build_stock_transfer_large_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=2),
cret_no=context["staff_id_card"],
customer_name="国信证券资金账户",
user_memo="证券大额转托管转出",
cash_type="转账支出",
dr_amount=560000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900003",
)
]
def build_large_stock_trading_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
return [
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=1, hours=3),
cret_no=context["staff_id_card"],
customer_name="华泰证券资金账户",
user_memo="证券大额交易买入",
cash_type="证券交易",
dr_amount=880000.0,
le_name=context["le_name"],
account_mask_no=context["account_no"],
customer_account_mask_no="6222024999900004",
)
]
def build_withdraw_cnt_samples(group_id: int, log_id: int, **kwargs) -> List[Dict]:
context = _build_sample_context(log_id, **kwargs)
withdraw_specs = [
(datetime(2026, 3, 12, 9, 0, 0), "微信提现", 8000.0),
(datetime(2026, 3, 12, 10, 0, 0), "支付宝提现", 9000.0),
(datetime(2026, 3, 12, 11, 0, 0), "微信提现", 8500.0),
(datetime(2026, 3, 12, 12, 0, 0), "支付宝提现", 9200.0),
]
return [
_build_statement(
group_id,
log_id,
trx_datetime=trx_datetime,
cret_no=context["staff_id_card"],
customer_name="财付通结算账户" if "微信" in user_memo else "支付宝结算账户",
user_memo=user_memo,
cash_type="提现支出",
dr_amount=dr_amount,
le_name=context["le_name"],
account_mask_no=context["account_no"],
)
for trx_datetime, user_memo, dr_amount in withdraw_specs
]
LARGE_TRANSACTION_BUILDERS = {
"HOUSE_OR_CAR_EXPENSE": build_house_or_car_samples,
"TAX_EXPENSE": build_tax_samples,
"SINGLE_LARGE_INCOME": build_single_large_income_samples,
"CUMULATIVE_INCOME": build_cumulative_income_samples,
"ANNUAL_TURNOVER": build_annual_turnover_supporting_samples,
"LARGE_CASH_DEPOSIT": build_large_cash_deposit_samples,
"FREQUENT_CASH_DEPOSIT": build_frequent_cash_deposit_samples,
"LARGE_TRANSFER": build_large_transfer_samples,
}
PHASE1_RULE_BUILDERS = {
"GAMBLING_SENSITIVE_KEYWORD": build_gambling_sensitive_keyword_samples,
"SPECIAL_AMOUNT_TRANSACTION": build_special_amount_transaction_samples,
"SUSPICIOUS_INCOME_KEYWORD": build_suspicious_income_keyword_samples,
"FOREX_BUY_AMT": build_forex_buy_samples,
"FOREX_SELL_AMT": build_forex_sell_samples,
"STOCK_TFR_LARGE": build_stock_transfer_large_samples,
"LARGE_STOCK_TRADING": build_large_stock_trading_samples,
"WITHDRAW_CNT": build_withdraw_cnt_samples,
}
def build_seed_statements_for_rule_plan(
group_id: int,
log_id: int,
rule_plan: Dict,
**kwargs,
) -> List[Dict]:
statements: List[Dict] = []
for rule_code in rule_plan.get("large_transaction_hit_rules", []):
builder = LARGE_TRANSACTION_BUILDERS.get(rule_code)
if builder is not None:
statements.extend(builder(group_id, log_id, **kwargs))
for rule_code in rule_plan.get("phase1_hit_rules", []):
builder = PHASE1_RULE_BUILDERS.get(rule_code)
if builder is not None:
statements.extend(builder(group_id, log_id, **kwargs))
return statements
def build_large_transaction_seed_statements(
group_id: int,
log_id: int,
@@ -149,220 +611,15 @@ def build_large_transaction_seed_statements(
staff_id_card: Optional[str] = None,
family_id_cards: Optional[List[str]] = None,
) -> List[Dict]:
le_name = primary_enterprise_name or "模型测试主体"
account_no = primary_account_no or "6222024999999999"
identity_scope = resolve_identity_scope(log_id)
staff_identity = identity_scope["staff"]
family_identity = identity_scope["family"]
selected_staff_id_card = staff_id_card or staff_identity["id_card"]
selected_family_id_cards = list(family_id_cards or [family_identity["id_card"]])
primary_family_id_card = selected_family_id_cards[0] if selected_family_id_cards else selected_staff_id_card
statements: List[Dict] = []
statements.extend([
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=9, hours=1),
cret_no=selected_staff_id_card,
customer_name="杭州贝壳房地产经纪有限公司",
user_memo="购买房产首付款",
cash_type="对公转账",
dr_amount=680000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=8, hours=2),
cret_no=primary_family_id_card,
customer_name="兰溪星耀汽车销售服务有限公司",
user_memo="购车首付款",
cash_type="对公转账",
dr_amount=380000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=7, hours=1),
cret_no=selected_staff_id_card,
customer_name="国家金库兰溪市中心支库",
user_memo="个人所得税税款",
cash_type="税务缴款",
dr_amount=126000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500003",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=6, hours=3),
cret_no=primary_family_id_card,
customer_name="兰溪市税务局",
user_memo="房产税务缴税",
cash_type="税务缴款",
dr_amount=88000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024555500004",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=2),
cret_no=selected_staff_id_card,
customer_name="浙江远望贸易有限公司",
user_memo="经营往来收入",
cash_type="对公转账",
cr_amount=18800000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=5, hours=1),
cret_no=selected_staff_id_card,
customer_name="浙江远望贸易有限公司",
user_memo="项目回款收入",
cash_type="对公转账",
cr_amount=20800000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=4, hours=4),
cret_no=selected_staff_id_card,
customer_name="浙江远望贸易有限公司",
user_memo="业务合作收入",
cash_type="对公转账",
cr_amount=20700000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024666600001",
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 9, 0, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="现金存款",
cash_type="现金存款",
cr_amount=3000000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 9, 30, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="ATM现金存款",
cash_type="现金存款",
cr_amount=3100000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 10, 0, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="自助存款现金存入",
cash_type="现金存款",
cr_amount=3200000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 10, 30, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="CRS存款",
cash_type="现金存款",
cr_amount=3300000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 11, 0, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="本行ATM存款",
cash_type="现金存款",
cr_amount=3400000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=datetime(2026, 3, 10, 11, 30, 0),
cret_no=selected_staff_id_card,
customer_name="",
user_memo="柜面现金存款",
cash_type="现金存款",
cr_amount=3500000.0,
le_name=le_name,
account_mask_no=account_no,
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=1),
cret_no=selected_staff_id_card,
customer_name="异地转账平台",
user_memo="手机银行转账",
cash_type="转账支出",
dr_amount=12000000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024777700001",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=3, hours=2),
cret_no=selected_staff_id_card,
customer_name="跨行转账中心",
user_memo="对外转账",
cash_type="转账支出",
dr_amount=10000000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024777700002",
),
_build_statement(
group_id,
log_id,
trx_datetime=REFERENCE_NOW - timedelta(days=2, hours=5),
cret_no=selected_staff_id_card,
customer_name="跨境转账服务平台",
user_memo="网银转账",
cash_type="转账支出",
dr_amount=9000000.0,
le_name=le_name,
account_mask_no=account_no,
customer_account_mask_no="6222024777700003",
),
])
return statements
return build_seed_statements_for_rule_plan(
group_id=group_id,
log_id=log_id,
rule_plan={
"large_transaction_hit_rules": list(LARGE_TRANSACTION_BUILDERS.keys()),
"phase1_hit_rules": [],
},
primary_enterprise_name=primary_enterprise_name,
primary_account_no=primary_account_no,
staff_id_card=staff_id_card,
family_id_cards=family_id_cards,
)

View File

@@ -9,6 +9,7 @@ from services.statement_service import StatementService
from services.statement_rule_samples import (
DEFAULT_LARGE_TRANSACTION_THRESHOLDS,
build_large_transaction_seed_statements,
build_seed_statements_for_rule_plan,
)
@@ -31,6 +32,44 @@ def test_generate_statements_should_include_seeded_samples_before_noise():
assert any(item["userMemo"] == "购买房产首付款" for item in statements)
def test_build_seed_statements_for_rule_plan_should_only_include_requested_phase1_rules():
plan = {
"large_transaction_hit_rules": [],
"phase1_hit_rules": ["GAMBLING_SENSITIVE_KEYWORD", "FOREX_BUY_AMT"],
}
statements = build_seed_statements_for_rule_plan(
group_id=1000,
log_id=20001,
rule_plan=plan,
)
assert any("游戏" in item["userMemo"] for item in statements)
assert any("购汇" in item["userMemo"] for item in statements)
assert not any("证券" in item["userMemo"] for item in statements)
def test_build_seed_statements_for_rule_plan_should_generate_withdraw_cnt_samples():
plan = {
"large_transaction_hit_rules": [],
"phase1_hit_rules": ["WITHDRAW_CNT"],
}
statements = build_seed_statements_for_rule_plan(
group_id=1000,
log_id=20001,
rule_plan=plan,
)
assert len(
[
item
for item in statements
if "微信提现" in item["userMemo"] or "支付宝提现" in item["userMemo"]
]
) >= 4
def test_large_transaction_seed_should_cover_all_eight_rules():
"""大额交易样本生成器必须覆盖 8 条已实现规则的关键口径。"""
statements = build_large_transaction_seed_statements(group_id=1000, log_id=20001)