Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1import lodash from 'lodash';
2
3import { type Dependencies } from '../../iocContainer/index.js';
4import { inject } from '../../iocContainer/utils.js';
5import { WEEK_MS } from '../../utils/time.js';
6import { type RuleAlarmStatus } from '../moderationConfigService/index.js';
7import getRuleAlarmStatus from './getRuleAlarmStatus.js';
8
9const { mapValues, groupBy } = lodash;
10
11const makeGetCurrentPeriodRuleAlarmStatuses = inject(
12 ['getRuleAnomalyDetectionStatistics', 'getSimplifiedRuleHistory'],
13 (getRuleStats, getRuleHistory) =>
14 async function () {
15 const now = new Date();
16 const oneWeekAgo = new Date(now.valueOf() - WEEK_MS);
17
18 const passStats = await getRuleStats({ startTime: oneWeekAgo });
19 const statsByRule = groupBy(passStats, (it) => it.ruleId);
20
21 const minVersionsByRule = await getMinimumAnomalyDetectionRuleVersions(
22 getRuleHistory,
23 undefined,
24 oneWeekAgo,
25 );
26
27 return mapValues(statsByRule, (passStats) => {
28 const { ruleId } = passStats[0];
29
30 // getRuleAlarmStatus assumes that each rule has an underlying pass rate
31 // (i.e., percentage of executions for which the rule matches), and
32 // determines whether a rule is in alarm by looking for improbable
33 // deviations from that pass rate. However, this didn't work well for
34 // Some platforms. The basic issue is that there's actually a feedback loop
35 // involved that causes a single period's pass rate to sometimes deviate
36 // very dramatically from the long-run pass rate, without us being in a
37 // state of alarm. Specifically, what happens is: on some platforms, a user is
38 // notified right away when one of their pieces of content is deleted by
39 // a rule; this very often leads them to try to post the same piece of
40 // content again, which is usually also gets caught by the rule; this
41 // duplicate posting then triggers a spike in the pass rate, well above
42 // what's explicable by the random variation in pass rate we'd expect.
43 // This repeat posting phenomenon doesn't seem like it should be a big
44 // deal. However, because some platform rules pass so infrequently -- some
45 // rules' long-run pass rates are under 1 in 10,000, even counting any
46 // repeat posts -- it turned out that repeat posting by users who hit a
47 // rule was triggering the majority of our alarms, and often accounted
48 // for 50% or more of a rule's passes in a given period. To work around
49 // this issue, but still maintain the same basic anomaly detection
50 // model, we represent the number of passes _not_ as the number of times
51 // that the rule passed in a period, but rather as the number of
52 // distinct users that caused the rule to pass in that period.
53 const applicableStats = passStats
54 .filter((it) => it.approxRuleVersion >= minVersionsByRule[ruleId])
55 .map((it) => ({ passes: it.passingUsersCount, runs: it.runsCount }));
56
57 return {
58 status: getRuleAlarmStatus(applicableStats),
59 meta: {
60 lastPeriodPassRate: !applicableStats.length
61 ? undefined
62 : applicableStats[0].runs === 0
63 ? 0
64 : applicableStats[0].passes / applicableStats[0].runs,
65 secondToLastPeriodPassRate:
66 applicableStats.length < 2
67 ? undefined
68 : applicableStats[1].runs === 0
69 ? 0
70 : applicableStats[1].passes / applicableStats[1].runs,
71 },
72 };
73 });
74 },
75);
76
77export default makeGetCurrentPeriodRuleAlarmStatuses;
78export type GetCurrentPeriodRuleAlarmStatuses = () => Promise<{
79 [ruleId: string]: {
80 status: RuleAlarmStatus;
81 meta: {
82 lastPeriodPassRate: number | undefined;
83 secondToLastPeriodPassRate: number | undefined;
84 };
85 };
86}>;
87
88/**
89 * For each of the passed in rule ids, returns the minimum (i.e., oldest)
90 * version of the rule that's still identical to the current version of the rule
91 * _for anomaly detection purposes_ (i.e., whose historical pass rate data is
92 * still applicable). This leverages the fact that some changes that create a
93 * new rule version, like changing a rule's associated actions (or name, etc),
94 * don't actually influence the rule's pass rate.
95 */
96async function getMinimumAnomalyDetectionRuleVersions(
97 getRuleHistory: Dependencies['getSimplifiedRuleHistory'],
98 ruleIds?: string[],
99 startTime?: Date,
100) {
101 // Returns the versions of all rules from the past week, where each 'version'
102 // indicates a change that's _actually salient to anomaly detection_ (i.e.,
103 // that effects the rule's pass rate).
104 const ruleVersionHistoriesByRule = groupBy(
105 await getRuleHistory(['conditionSet', 'itemTypeIds'], ruleIds, startTime),
106 (it) => it.id,
107 );
108
109 // Find the version representing the date when this rule's
110 // anomaly-detection-relevant fields were most-recently changed.
111 return mapValues(
112 ruleVersionHistoriesByRule,
113 (versions) => versions[versions.length - 1].approxVersion,
114 );
115}