Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1import { type Dependencies } from '../../iocContainer/index.js';
2import { inject } from '../../iocContainer/utils.js';
3import { unzip2 } from '../../utils/fp-helpers.js';
4
5const makeGetRuleAnomalyDetectionaStatistics =
6 (
7 dataWarehouse: Dependencies['DataWarehouse'],
8 tracer: Dependencies['Tracer'],
9 ) =>
10 /**
11 * For each one hour period (starting from the given startTime, or going back
12 * indefinitely if no startTime is given), and for each rule given in ruleIds
13 * (or for all rules, if ruleIds is not given), it returns the number of times
14 * that the rule ran, the number of executions for which it passed, and the
15 * number _of distinct users_ for which it passed.
16 *
17 * This might be extended in the future to allow the caller to customize the
18 * window of time over which each pass rate is calculated, but, for now, it's
19 * always a one-hour window.
20 *
21 * NB: Does not return pass rates for time windows that are still in progress
22 * by default.
23 */
24 async (
25 opts: {
26 ruleIds?: string[];
27 startTime?: Date;
28 includePeriodsInProgress?: boolean;
29 } = {},
30 ) => {
31 const { ruleIds, startTime, includePeriodsInProgress = false } = opts;
32
33 if (ruleIds && !ruleIds.length) {
34 throw new Error('Must provide at least one ruleId to filter by ruleIds.');
35 }
36
37 // For rule_id filtering, it'd be amazing if we could just do `rule_id in ?`,
38 // and then pass an array as the bind value, but the warehouse client
39 // doesn't support arrays as bind values. so, we use an array below for
40 // conditions that need (or are forced) to have multiple bind values, and
41 // then flatten below.
42 //
43 // NB: we use sysdate(), not current_timestamp() because the former gives a
44 // UTC time, which is what we need (current_timestamp() is server-local time).
45 const [conditions, conditionBindValues] = unzip2<string, string[] | Date>([
46 ...(!includePeriodsInProgress
47 ? [['ts_end_exclusive <= SYSDATE()', [] as any] as const]
48 : []),
49 ...(startTime ? [['ts_start_inclusive >= ?', startTime] as const] : []),
50 ...(ruleIds
51 ? [
52 [
53 `rule_id IN (${ruleIds.map((_) => '?').join(',')})`,
54 ruleIds,
55 ] as const,
56 ]
57 : []),
58 ]);
59
60 const bindValues = conditionBindValues.flat();
61 const conditionString = conditions.join(' AND ');
62
63 // Use group by to sum passes + runs across all rule environments.
64 const results = await dataWarehouse.query(
65 `
66 SELECT
67 rule_id,
68 rule_version,
69 num_passes,
70 num_runs,
71 array_size(passes_distinct_user_ids) as num_distinct_users,
72 ts_start_inclusive
73 FROM RULE_ANOMALY_DETECTION_SERVICE.RULE_EXECUTION_STATISTICS
74 ${conditionString.length ? `WHERE ${conditionString}` : ''}
75 ORDER BY ts_start_inclusive DESC;`,
76 tracer,
77 bindValues,
78 );
79
80 return results.map((result: any) => ({
81 ruleId: result.RULE_ID,
82 // name is a reminder that JS may trim the precision on the Date here,
83 // but that should be ok for our purposes.
84 approxRuleVersion: new Date(result.RULE_VERSION),
85 // nb: the warehouse returned value for a timestamp is a JS Date, but with
86 // some extra methods attached to it. These methods include toString, so
87 // we cast back to a proper Date to avoid the string representation
88 // changing (e.g., when serializing to JSON).
89 windowStart: new Date(result.TS_START_INCLUSIVE),
90 passCount: result.NUM_PASSES,
91 passingUsersCount: result.NUM_DISTINCT_USERS,
92 runsCount: result.NUM_RUNS,
93 }));
94 };
95
96export default inject(
97 ['DataWarehouse', 'Tracer'],
98 makeGetRuleAnomalyDetectionaStatistics,
99);
100export type GetRuleAnomalyDetectionStatistics = ReturnType<
101 typeof makeGetRuleAnomalyDetectionaStatistics
102>;