Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1/**
2 * @fileoverview This service exports various functions for querying rule
3 * history/versions.
4 *
5 * Making the rule_versions view into a sequelize model wouldn't really make
6 * sense, since the view is read-only, so many of the sequelize static methods
7 * (`Model.update()` et al, plus the general idea of mutable instances to later
8 * `save()`) don't apply. So we just expose these queries.
9 */
10import { sql, type Kysely } from 'kysely';
11import _ from 'lodash';
12import { type CamelCasedProperties, type Simplify } from 'type-fest';
13
14import { inject } from '../../iocContainer/index.js';
15import { camelToSnakeCase } from '../../utils/misc.js';
16import {
17 type Bind1,
18 type CamelToSnakeCase,
19} from '../../utils/typescript-types.js';
20import {
21 type ConditionSet,
22 type RuleStatus,
23} from '../moderationConfigService/index.js';
24
25// A rule version as stored in the db.
26type RuleVersion = {
27 id: string;
28 version: Date;
29 name: string;
30 status_if_unexpired: Exclude<RuleStatus, typeof RuleStatus.EXPIRED>;
31 tags: string[];
32 max_daily_actions: number;
33 org_id: string;
34 creator_id: string;
35 expiration_time?: Date;
36 condition_set: ConditionSet;
37 action_ids: string[];
38 item_type_ids: string[];
39 is_current: boolean;
40};
41
42// A rule version as exposed from this service. We pick so that new properties
43// on RuleVersion don't automatically become part of the public interface.
44// NB: `version` explicitly excluded, in favor of the two versioning keys below.
45type PublicRuleVersion = Simplify<
46 CamelCasedProperties<
47 Pick<
48 RuleVersion,
49 | 'id'
50 | 'name'
51 | 'status_if_unexpired'
52 | 'tags'
53 | 'max_daily_actions'
54 | 'org_id'
55 | 'creator_id'
56 | 'expiration_time'
57 | 'condition_set'
58 | 'action_ids'
59 | 'item_type_ids'
60 | 'is_current'
61 >
62 > & {
63 approxVersion: Date;
64 exactVersion: string;
65 }
66>;
67
68// The db tables that this service should be able to "see"/query.
69type RuleHistoryServicePg = { rule_versions: RuleVersion };
70
71type VersionedField = Exclude<
72 keyof PublicRuleVersion,
73 'id' | 'isCurrent' | 'approxVersion' | 'exactVersion'
74>;
75
76export const makeGetSimplifiedRuleHistory = inject(
77 ['KyselyPg'],
78 function (db: Kysely<RuleHistoryServicePg>) {
79 return async function <K extends VersionedField>(
80 ...getHistoryArgs: Parameters<Bind1<typeof getSimplifiedRuleHistory<K>>>
81 ) {
82 return getSimplifiedRuleHistory<K>(
83 async (...buildQueryArgs) =>
84 buildSimplifiedHistoryQuery(db, ...buildQueryArgs).execute(),
85 ...getHistoryArgs,
86 );
87 };
88 },
89);
90
91/**
92 * Returns a simplified version history that only reports a new version for
93 * when a field of interest changed. The versions are returned in
94 * chronological order (i.e., older versions first).
95 *
96 * Lots and lots of types of changes can create a new rule version, but each
97 * user of the rule version history is only gonna care about changes to a
98 * subset of the fields. E.g., one consumer might just wanna know when a
99 * rule's actions changed, while another might wanna know when its conditions
100 * changed. So, this function allows consumers to specify a subset of fields
101 * they're interested in, and get a condensed version history that only notes
102 * the new versions resulting from a change to one of those fields.
103 *
104 * @param getRawHistory A function that knows how to query the underlying data.
105 * This is only passed in to facilitate mocking it during testing. When the
106 * service function is registered for DI, it'll have been partially applied
107 * away (see above).
108 * @param fields - The fields to look for changes in.
109 * @param ruleIds - The rules to get the history for (defaults to all rules).
110 * @param startDate - If given, we'll only return versions created later than
111 * this date.
112 */
113export async function getSimplifiedRuleHistory<K extends VersionedField>(
114 getRawHistory: (
115 fields: readonly K[],
116 ruleIds?: readonly string[],
117 numVersions?: number,
118 ) => Promise<
119 (Partial<Pick<PublicRuleVersion, K>> &
120 Pick<PublicRuleVersion, 'id' | 'exactVersion'>)[]
121 >,
122 fields: readonly K[],
123 ruleIds?: readonly string[],
124 startDate?: Date,
125) {
126 // NB: if given a start date, it's tricky to filter in the db, because the
127 // version in effect at the start date could've been created an arbitrary
128 // amount of time before the start date. E.g., if the start date is
129 // 2022-06-01, we can't just ask for versions created on or after
130 // 2022-06-01, because that will exclude the version that was in effect on
131 // 2022-06-01, which could've been created any time before that. So, we
132 // want to return "all versions created on or after the start date, plus
133 // one version right before that", which is hard to experss in sql [though
134 // we could do it by issuing two queries, if not filtering in the db
135 // eventually becomes a perf issue]. For now, though, we get back all
136 // the versions and filter here, client-side.
137 const allVersions = (await getRawHistory(fields, ruleIds)).map((it) => ({
138 ...it,
139 approxVersion: new Date(it.exactVersion),
140 }));
141
142 return startDate
143 ? allVersions.filter(
144 (_, i) =>
145 // Always keep the last version (which is what we're looking at if
146 // i == allVersions.length - 1), because that's definitely still
147 // in effect at the start date. Then, if we're not looking at the
148 // last version, keep this version if the _next_ version was
149 // created after the start date (which'd mean this was the version
150 // in effect _at_ the start date). The overall result here is to
151 // keep last version created at or before the start date, plus all
152 // created after it.
153 i === allVersions.length - 1 ||
154 allVersions[i + 1].approxVersion > startDate,
155 )
156 : allVersions;
157}
158
159/**
160 * This function builds a SQL query that'll be run as part of determining the
161 * version history of a rule with respect to the given fields.
162 *
163 * It's really an internal implementation detail, but it's exported for testing,
164 * since we don't have a great way to mock kysely right now that'll let us see
165 * the query being executed and sub in a mock return value.
166 *
167 * The return value only exposes the `execute()` and `compile()` methods, which
168 * is justified by the fact that we don't want callers to be able to modify the
169 * query further (as it is an implementation detail), but is actually motivated
170 * by the fact that throwing away all the interim kysely SelectQueryBuilder type
171 * params makes it easier for us to make TS happy when we partially apply
172 * getSimplifiedRuleHistory with this.
173 */
174export function buildSimplifiedHistoryQuery<K extends VersionedField>(
175 db: Kysely<RuleHistoryServicePg>,
176 fields: readonly K[],
177 ruleIds?: readonly string[],
178 numVersions?: number,
179) {
180 // To build our query, we convert the camelCased fields from the public
181 // interface to their snake_case names in the db. In terms of the types, we:
182 //
183 // 1. want TS to verify that `CamelToSnakeCase<K> extends keyof RuleVersion`.
184 // I.e., we want TS to check that each snake_cased-version of an incoming
185 // field actually corresponds to a column in the db. Unfortunately, TS
186 // isn't smart enough for this. But, it can check that
187 // `CamelToSnakeCase<VersionedField> extends keyof RuleVersion`, so we'll
188 // have it do that. We then know that `K extends VersionedField`.
189 //
190 // 2. We want `K` to influence the return type. E.g., if K is `'name'`, then
191 // the returned object can't have any other PublicRuleVersion fields
192 // (except `id` and `exactVersion`). However, we have to throw away some
193 // type information about exactly which fields we're selecting by "up
194 // casting" from CamelToSnakeCase<K>[] to simply (keyof RuleVersion)[],
195 // because `CamelToSnakeCase<K>` is too dynamic for kysely (which'll try to
196 // inspect these strings to verify that they're valid column names,
197 // possibly with aliases).
198 //
199 // The type annotation on `dbFields` has TS verify point (1) above, and the
200 // cast on the return type from `execute()` recovers the info about which
201 // fields might have been selected in our final results.
202 const dbFieldsAndAliases = _.uniq(fields).map((publicField) => [
203 camelToSnakeCase(publicField),
204 publicField,
205 ]);
206
207 const dbFields: readonly (keyof RuleVersion)[] = dbFieldsAndAliases.map(
208 ([dbField]) => dbField,
209 ) as readonly CamelToSnakeCase<VersionedField>[];
210
211 // The idea here is to select all version rows for the relevant rules, but
212 // then add a column to each result that identifies all rows that have the
213 // same values for the consumer's `fields` of interest. This group identifier
214 // is just the first version that had that set of values for the given fields.
215 // This choice of identifier proves convenient below, since we want to merge
216 // the first version that had each set of `fields` values with version rows
217 // that immediately follow it, when the only changes in the immediately
218 // following rows are irrelevant from the POV of our consumer's `fields`.
219 const versionsWithFirstVersionGroupId = db
220 .selectFrom('rule_versions')
221 .select([
222 ...dbFields,
223 'id',
224 'version',
225 sql<string>`first_value(version)
226 OVER (
227 PARTITION BY id, ${sql.join(dbFields.map((it) => sql.ref(it)))}
228 ORDER BY version asc
229 )`.as('first_version'),
230 ])
231 .$if(ruleIds != null, (qb) => qb.where('id', 'in', ruleIds!))
232 .orderBy('version', 'asc');
233
234 // When we build our condensed version history, unfortunately we can't just
235 // return the first row that had each set of `fields` values (with
236 // first_version as it's version), because a set of values could come up
237 // twice, with a change in between, and we need to reflect that. E.g.,
238 // imagine the user's interested in one field `name`; the version history
239 // could be:
240 //
241 // { name: 'xyz', version: date1, first_version: date1 }
242 // { name: 'xyz', version: date2, first_version: date1 } // something besides name changed; first_version still good.
243 // { name: 'abc', version: date3, first_version: date3 } // new name, but also a new first_version
244 // { name: 'xyz', version: date4, first_version: date1 } // whoops, can't use first version for this
245 //
246 // So, the final history needs `('xyz', date1), ('abc', date3), ('xyz', date4)`.
247 // To do that, there are a lot of edge cases, but we basically identify the
248 // moments that a version changes in a salient way (i.e., when the set of
249 // salient fields differs between adjacent raw versions), and only track the
250 // versions of those moments (setting other rows to have a null version).
251 // Then, we discard the null-versioned rows.
252 const simplifiedHistoryQuery = db
253 .selectFrom(
254 db
255 .selectFrom(versionsWithFirstVersionGroupId.as('t1'))
256 .select([
257 'id',
258 ...dbFields,
259 sql<string | null>`CASE
260 WHEN lag(first_version, 1, version)
261 OVER (PARTITION BY id ORDER BY version asc) <> first_version
262 THEN version
263 WHEN first_version = version THEN version
264 ELSE NULL
265 END`.as('version'),
266 ])
267 .as('t2'),
268 )
269 .select([
270 'id',
271 ...dbFieldsAndAliases.map(([field, alias]) => sql.ref(field).as(alias)),
272 sql<string>`version::text`.as('exactVersion'),
273 ])
274 .where('version', 'is not', null);
275
276 if (numVersions) {
277 simplifiedHistoryQuery.limit(numVersions);
278 }
279
280 return {
281 async execute() {
282 return simplifiedHistoryQuery.execute() as Promise<
283 // NB: this is partial because K could be 'name' | 'tags' when the
284 // fields array is actually just ['name'], for example.
285 (Partial<Pick<PublicRuleVersion, K>> &
286 Pick<PublicRuleVersion, 'id' | 'exactVersion'>)[]
287 >;
288 },
289 compile() {
290 return simplifiedHistoryQuery.compile();
291 },
292 };
293}
294
295export type GetSimplifiedRuleHistory = ReturnType<
296 typeof makeGetSimplifiedRuleHistory
297>;