Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1import { sql, type Kysely } from 'kysely';
2import _ from 'lodash';
3import { type CamelCasedProperties, type Simplify } from 'type-fest';
4
5import { camelToSnakeCase } from '../../utils/misc.js';
6import { type CamelToSnakeCase } from '../../utils/typescript-types.js';
7import {
8 type ConditionSet,
9 type RuleStatus,
10} from '../moderationConfigService/index.js';
11import { type ReportingServicePg } from './dbTypes.js';
12
13// A rule version as stored in the db.
14type ReportingRuleVersion = {
15 id: string;
16 version: Date;
17 name: string;
18 status: Exclude<RuleStatus, typeof RuleStatus.EXPIRED>;
19 org_id: string;
20 creator_id: string;
21 condition_set: ConditionSet;
22 is_current: boolean;
23};
24
25type PublicReportingRuleVersion = Simplify<
26 CamelCasedProperties<
27 Pick<
28 ReportingRuleVersion,
29 | 'id'
30 | 'name'
31 | 'status'
32 | 'org_id'
33 | 'creator_id'
34 | 'condition_set'
35 | 'is_current'
36 >
37 > & {
38 approxVersion: Date;
39 exactVersion: string;
40 }
41>;
42
43export type VersionedField = Exclude<
44 keyof PublicReportingRuleVersion,
45 'id' | 'isCurrent' | 'approxVersion' | 'exactVersion'
46>;
47
48/**
49 * Returns a simplified version history that only reports a new version for
50 * when a field of interest changed. The versions are returned in
51 * chronological order (i.e., older versions first).
52 *
53 * Lots and lots of types of changes can create a new rule version, but each
54 * user of the rule version history is only gonna care about changes to a
55 * subset of the fields. E.g., one consumer might just wanna know when a
56 * rule's actions changed, while another might wanna know when its conditions
57 * changed. So, this function allows consumers to specify a subset of fields
58 * they're interested in, and get a condensed version history that only notes
59 * the new versions resulting from a change to one of those fields.
60 *
61 * @param getRawHistory A function that knows how to query the underlying data.
62 * This is only passed in to facilitate mocking it during testing. When the
63 * service function is registered for DI, it'll have been partially applied
64 * away (see above).
65 * @param fields - The fields to look for changes in.
66 * @param ruleIds - The rules to get the history for (defaults to all rules).
67 * @param startDate - If given, we'll only return versions created later than
68 * this date.
69 */
70export async function getSimplifiedRuleHistory<K extends VersionedField>(
71 getRawHistory: (
72 fields: readonly K[],
73 ruleIds?: readonly string[],
74 numVersions?: number,
75 ) => Promise<
76 (Partial<Pick<PublicReportingRuleVersion, K>> &
77 Pick<PublicReportingRuleVersion, 'id' | 'exactVersion'>)[]
78 >,
79 fields: readonly K[],
80 ruleIds?: readonly string[],
81 startDate?: Date,
82) {
83 // NB: if given a start date, it's tricky to filter in the db, because the
84 // version in effect at the start date could've been created an arbitrary
85 // amount of time before the start date. E.g., if the start date is
86 // 2022-06-01, we can't just ask for versions created on or after
87 // 2022-06-01, because that will exclude the version that was in effect on
88 // 2022-06-01, which could've been created any time before that. So, we
89 // want to return "all versions created on or after the start date, plus
90 // one version right before that", which is hard to experss in sql [though
91 // we could do it by issuing two queries, if not filtering in the db
92 // eventually becomes a perf issue]. For now, though, we get back all
93 // the versions and filter here, client-side.
94 const allVersions = (await getRawHistory(fields, ruleIds)).map((it) => ({
95 ...it,
96 approxVersion: new Date(it.exactVersion),
97 }));
98
99 return startDate
100 ? allVersions.filter(
101 (_, i) =>
102 // Always keep the last version (which is what we're looking at if
103 // i == allVersions.length - 1), because that's definitely still
104 // in effect at the start date. Then, if we're not looking at the
105 // last version, keep this version if the _next_ version was
106 // created after the start date (which'd mean this was the version
107 // in effect _at_ the start date). The overall result here is to
108 // keep last version created at or before the start date, plus all
109 // created after it.
110 i === allVersions.length - 1 ||
111 allVersions[i + 1].approxVersion > startDate,
112 )
113 : allVersions;
114}
115
116/**
117 * This function builds a SQL query that'll be run as part of determining the
118 * version history of a rule with respect to the given fields.
119 *
120 * It's really an internal implementation detail, but it's exported for testing,
121 * since we don't have a great way to mock kysely right now that'll let us see
122 * the query being executed and sub in a mock return value.
123 *
124 * The return value only exposes the `execute()` and `compile()` methods, which
125 * is justified by the fact that we don't want callers to be able to modify the
126 * query further (as it is an implementation detail), but is actually motivated
127 * by the fact that throwing away all the interim kysely SelectQueryBuilder type
128 * params makes it easier for us to make TS happy when we partially apply
129 * getSimplifiedRuleHistory with this.
130 */
131export function buildSimplifiedHistoryQuery<K extends VersionedField>(
132 db: Kysely<ReportingServicePg>,
133 fields: readonly K[],
134 ruleIds?: readonly string[],
135 numVersions?: number,
136) {
137 // To build our query, we convert the camelCased fields from the public
138 // interface to their snake_case names in the db. In terms of the types, we:
139 //
140 // 1. want TS to verify that `CamelToSnakeCase<K> extends keyof RuleVersion`.
141 // I.e., we want TS to check that each snake_cased-version of an incoming
142 // field actually corresponds to a column in the db. Unfortunately, TS
143 // isn't smart enough for this. But, it can check that
144 // `CamelToSnakeCase<VersionedField> extends keyof RuleVersion`, so we'll
145 // have it do that. We then know that `K extends VersionedField`.
146 //
147 // 2. We want `K` to influence the return type. E.g., if K is `'name'`, then
148 // the returned object can't have any other PublicRuleVersion fields
149 // (except `id` and `exactVersion`). However, we have to throw away some
150 // type information about exactly which fields we're selecting by "up
151 // casting" from CamelToSnakeCase<K>[] to simply (keyof RuleVersion)[],
152 // because `CamelToSnakeCase<K>` is too dynamic for kysely (which'll try to
153 // inspect these strings to verify that they're valid column names,
154 // possibly with aliases).
155 //
156 // The type annotation on `dbFields` has TS verify point (1) above, and the
157 // cast on the return type from `execute()` recovers the info about which
158 // fields might have been selected in our final results.
159 const dbFieldsAndAliases = _.uniq(fields).map((publicField) => [
160 camelToSnakeCase(publicField),
161 publicField,
162 ]);
163
164 const dbFields: readonly (keyof ReportingRuleVersion)[] =
165 dbFieldsAndAliases.map(
166 ([dbField]) => dbField,
167 ) as readonly CamelToSnakeCase<VersionedField>[];
168
169 // The idea here is to select all version rows for the relevant rules, but
170 // then add a column to each result that identifies all rows that have the
171 // same values for the consumer's `fields` of interest. This group identifier
172 // is just the first version that had that set of values for the given fields.
173 // This choice of identifier proves convenient below, since we want to merge
174 // the first version that had each set of `fields` values with version rows
175 // that immediately follow it, when the only changes in the immediately
176 // following rows are irrelevant from the POV of our consumer's `fields`.
177 const versionsWithFirstVersionGroupId = db
178 .selectFrom('reporting_rules.reporting_rule_versions')
179 .select([
180 ...dbFields,
181 'id',
182 'version',
183 sql<string>`first_value(version)
184 OVER (
185 PARTITION BY id, ${sql.join(dbFields.map((it) => sql.ref(it)))}
186 ORDER BY version asc
187 )`.as('first_version'),
188 ])
189 .$if(ruleIds != null, (qb) => qb.where('id', 'in', ruleIds!))
190 .orderBy('version', 'asc');
191
192 // When we build our condensed version history, unfortunately we can't just
193 // return the first row that had each set of `fields` values (with
194 // first_version as it's version), because a set of values could come up
195 // twice, with a change in between, and we need to reflect that. E.g.,
196 // imagine the user's interested in one field `name`; the version history
197 // could be:
198 //
199 // { name: 'xyz', version: date1, first_version: date1 }
200 // { name: 'xyz', version: date2, first_version: date1 } // something besides name changed; first_version still good.
201 // { name: 'abc', version: date3, first_version: date3 } // new name, but also a new first_version
202 // { name: 'xyz', version: date4, first_version: date1 } // whoops, can't use first version for this
203 //
204 // So, the final history needs `('xyz', date1), ('abc', date3), ('xyz', date4)`.
205 // To do that, there are a lot of edge cases, but we basically identify the
206 // moments that a version changes in a salient way (i.e., when the set of
207 // salient fields differs between adjacent raw versions), and only track the
208 // versions of those moments (setting other rows to have a null version).
209 // Then, we discard the null-versioned rows.
210 const simplifiedHistoryQuery = db
211 .selectFrom(
212 db
213 .selectFrom(versionsWithFirstVersionGroupId.as('t1'))
214 .select([
215 'id',
216 ...dbFields,
217 sql<string | null>`CASE
218 WHEN lag(first_version, 1, version)
219 OVER (PARTITION BY id ORDER BY version asc) <> first_version
220 THEN version
221 WHEN first_version = version THEN version
222 ELSE NULL
223 END`.as('version'),
224 ])
225 .as('t2'),
226 )
227 .select([
228 'id',
229 ...dbFieldsAndAliases.map(([field, alias]) => sql.ref(field).as(alias)),
230 sql<string>`version::text`.as('exactVersion'),
231 ])
232 .where('version', 'is not', null);
233
234 if (numVersions) {
235 simplifiedHistoryQuery.limit(numVersions);
236 }
237
238 return {
239 async execute() {
240 return simplifiedHistoryQuery.execute() as Promise<
241 // NB: this is partial because K could be 'name' | 'tags' when the
242 // fields array is actually just ['name'], for example.
243 (Partial<Pick<PublicReportingRuleVersion, K>> &
244 Pick<PublicReportingRuleVersion, 'id' | 'exactVersion'>)[]
245 >;
246 },
247 compile() {
248 return simplifiedHistoryQuery.compile();
249 },
250 };
251}