Mirror of https://github.com/roostorg/coop github.com/roostorg/coop
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 557ff54b2b435e5f1e789c6a8a4e1bebf2d7deb6 115 lines 5.2 kB view raw
1import lodash from 'lodash'; 2 3import { type Dependencies } from '../../iocContainer/index.js'; 4import { inject } from '../../iocContainer/utils.js'; 5import { WEEK_MS } from '../../utils/time.js'; 6import { type RuleAlarmStatus } from '../moderationConfigService/index.js'; 7import getRuleAlarmStatus from './getRuleAlarmStatus.js'; 8 9const { mapValues, groupBy } = lodash; 10 11const makeGetCurrentPeriodRuleAlarmStatuses = inject( 12 ['getRuleAnomalyDetectionStatistics', 'getSimplifiedRuleHistory'], 13 (getRuleStats, getRuleHistory) => 14 async function () { 15 const now = new Date(); 16 const oneWeekAgo = new Date(now.valueOf() - WEEK_MS); 17 18 const passStats = await getRuleStats({ startTime: oneWeekAgo }); 19 const statsByRule = groupBy(passStats, (it) => it.ruleId); 20 21 const minVersionsByRule = await getMinimumAnomalyDetectionRuleVersions( 22 getRuleHistory, 23 undefined, 24 oneWeekAgo, 25 ); 26 27 return mapValues(statsByRule, (passStats) => { 28 const { ruleId } = passStats[0]; 29 30 // getRuleAlarmStatus assumes that each rule has an underlying pass rate 31 // (i.e., percentage of executions for which the rule matches), and 32 // determines whether a rule is in alarm by looking for improbable 33 // deviations from that pass rate. However, this didn't work well for 34 // Some platforms. The basic issue is that there's actually a feedback loop 35 // involved that causes a single period's pass rate to sometimes deviate 36 // very dramatically from the long-run pass rate, without us being in a 37 // state of alarm. Specifically, what happens is: on some platforms, a user is 38 // notified right away when one of their pieces of content is deleted by 39 // a rule; this very often leads them to try to post the same piece of 40 // content again, which is usually also gets caught by the rule; this 41 // duplicate posting then triggers a spike in the pass rate, well above 42 // what's explicable by the random variation in pass rate we'd expect. 43 // This repeat posting phenomenon doesn't seem like it should be a big 44 // deal. However, because some platform rules pass so infrequently -- some 45 // rules' long-run pass rates are under 1 in 10,000, even counting any 46 // repeat posts -- it turned out that repeat posting by users who hit a 47 // rule was triggering the majority of our alarms, and often accounted 48 // for 50% or more of a rule's passes in a given period. To work around 49 // this issue, but still maintain the same basic anomaly detection 50 // model, we represent the number of passes _not_ as the number of times 51 // that the rule passed in a period, but rather as the number of 52 // distinct users that caused the rule to pass in that period. 53 const applicableStats = passStats 54 .filter((it) => it.approxRuleVersion >= minVersionsByRule[ruleId]) 55 .map((it) => ({ passes: it.passingUsersCount, runs: it.runsCount })); 56 57 return { 58 status: getRuleAlarmStatus(applicableStats), 59 meta: { 60 lastPeriodPassRate: !applicableStats.length 61 ? undefined 62 : applicableStats[0].runs === 0 63 ? 0 64 : applicableStats[0].passes / applicableStats[0].runs, 65 secondToLastPeriodPassRate: 66 applicableStats.length < 2 67 ? undefined 68 : applicableStats[1].runs === 0 69 ? 0 70 : applicableStats[1].passes / applicableStats[1].runs, 71 }, 72 }; 73 }); 74 }, 75); 76 77export default makeGetCurrentPeriodRuleAlarmStatuses; 78export type GetCurrentPeriodRuleAlarmStatuses = () => Promise<{ 79 [ruleId: string]: { 80 status: RuleAlarmStatus; 81 meta: { 82 lastPeriodPassRate: number | undefined; 83 secondToLastPeriodPassRate: number | undefined; 84 }; 85 }; 86}>; 87 88/** 89 * For each of the passed in rule ids, returns the minimum (i.e., oldest) 90 * version of the rule that's still identical to the current version of the rule 91 * _for anomaly detection purposes_ (i.e., whose historical pass rate data is 92 * still applicable). This leverages the fact that some changes that create a 93 * new rule version, like changing a rule's associated actions (or name, etc), 94 * don't actually influence the rule's pass rate. 95 */ 96async function getMinimumAnomalyDetectionRuleVersions( 97 getRuleHistory: Dependencies['getSimplifiedRuleHistory'], 98 ruleIds?: string[], 99 startTime?: Date, 100) { 101 // Returns the versions of all rules from the past week, where each 'version' 102 // indicates a change that's _actually salient to anomaly detection_ (i.e., 103 // that effects the rule's pass rate). 104 const ruleVersionHistoriesByRule = groupBy( 105 await getRuleHistory(['conditionSet', 'itemTypeIds'], ruleIds, startTime), 106 (it) => it.id, 107 ); 108 109 // Find the version representing the date when this rule's 110 // anomaly-detection-relevant fields were most-recently changed. 111 return mapValues( 112 ruleVersionHistoriesByRule, 113 (versions) => versions[versions.length - 1].approxVersion, 114 ); 115}