Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1import { type ReadonlyDeep } from 'type-fest';
2
3import { isConditionSet } from '../../condition_evaluator/condition.js';
4import { type LocationArea } from '../../models/types/locationArea.js';
5import { type DerivedFieldSpec } from '../../services/derivedFieldsService/index.js';
6import {
7 type ConditionResult,
8 type ConditionSetWithResult,
9 type CoopInput,
10 type LeafConditionWithResult,
11} from '../../services/moderationConfigService/index.js';
12import { isSignalId } from '../../services/signalsService/index.js';
13import { type CorrelationId } from '../../utils/correlationIds.js';
14import { jsonParse, tryJsonParse, type JsonOf } from '../../utils/encoding.js';
15import {
16 type SafeErrorKey,
17 type SerializableError,
18} from '../../utils/errors.js';
19import {
20 type NonEmptyArray,
21 type ReplaceDeep,
22} from '../../utils/typescript-types.js';
23
24type ConditionResultAsLogged = ReplaceDeep<
25 ConditionResult,
26 SerializableError,
27 Omit<SerializableError, SafeErrorKey>
28>;
29
30/**
31 * A type for the subset of ConditionSetWithResults that we actually persist to
32 * the data warehouse when recording a rule execution. This doesn't have signal instances,
33 * etc. and it's all we can actually show to the user in the insights UI.
34 */
35export type ConditionSetWithResultAsLogged = Omit<
36 ConditionSetWithResult,
37 'conditions' | 'result'
38> & {
39 conditions:
40 | NonEmptyArray<LeafConditionWithResultAsLogged>
41 | NonEmptyArray<ConditionSetWithResultAsLogged>;
42 result?: ConditionResultAsLogged;
43};
44
45export type ConditionWithResultAsLogged =
46 | ConditionSetWithResultAsLogged
47 | LeafConditionWithResultAsLogged;
48
49// NB: we make these types to ensure, at the type level, that we're generating
50// correlation ids consistently for everything we log.
51export type RuleExecutionSourceType =
52 | 'post-content'
53 | 'backtest'
54 | 'retroaction'
55 | 'user-rule-run'
56 | 'post-items'
57 | 'manual-action-run';
58
59export type RuleExecutionCorrelationId = CorrelationId<RuleExecutionSourceType>;
60
61// We write out this type explicitly, rather than deriving it with ReturnType,
62// because it includes legacy cases that the inferred return type would not
63// account for -- e.g., signal.id holding a SignalType string. Then, we annote
64// the return type of pickConditionPropsToLog with this type to check that the
65// implementation's current return type is compatible w/ this type that should
66// be broader. More generally, we want to force ourselves to add new cases here
67// when they appear (e.g., if some new ValueComparator or ConditionInput case is
68// added), without having cases automatically disappear here (e.g., if we remove
69// a case from ConditionInput), because we use this type when _reading_ from the
70// logged data too, and the old cases will only go away in the db if we
71// explicitly migrate old rows. We could (technically should) take this approach
72// even further by inlining literally every type here (like `LocationArea`,
73// `DerivedFieldSpec`, etc), but this is enough for now.
74export type LeafConditionWithResultAsLogged = {
75 input: ReadonlyDeep<
76 | { type: 'USER_ID' }
77 | { type: 'FULL_ITEM'; contentTypeIds?: string[] }
78 | { type: 'CONTENT_FIELD'; name: string; contentTypeId: string }
79 | { type: 'CONTENT_COOP_INPUT'; name: CoopInput }
80 | { type: 'CONTENT_DERIVED_FIELD'; spec: DerivedFieldSpec }
81 >;
82 /**
83 * NB: to figure out which Signal a logged condition references, use
84 * {@link signalIdFromLoggedCondition}. Do not try to read the fields of the
85 * condition directly, as there are too many legacy formats to account for.
86 */
87 signal:
88 | {
89 // Given that, you'd expect `id` to be typed as
90 // `JsonOf<SignalId> | SignalType | null`, but, instead, we use
91 // `JsonOf<{ type: string; id?: string }> | string | null` for the same
92 // reason we type `type` as string rather than SignalType. See below.
93 id?: JsonOf<{ type: string; id?: string }> | string | null;
94 // Intentionally `string` rather than `SignalType`, b/c we have some old
95 // logged rows with a `type` that is no longer one of our current
96 // SignalTypes (e.g., for the old language detection signal).
97 type?: string;
98 name?: string | null | undefined;
99 subcategory?: string | null | undefined;
100 }
101 | null
102 | undefined;
103 matchingValues:
104 | {
105 strings?: readonly string[];
106 textBankIds?: readonly string[];
107 locations?: readonly ReadonlyDeep<LocationArea>[];
108 locationBankIds?: readonly string[];
109 imageBankIds?: readonly string[];
110 }
111 | null
112 | undefined;
113 comparator?:
114 | 'EQUALS'
115 | 'NOT_EQUAL_TO'
116 | 'LESS_THAN'
117 | 'LESS_THAN_OR_EQUALS'
118 | 'GREATER_THAN'
119 | 'GREATER_THAN_OR_EQUALS'
120 | 'IS_UNAVAILABLE'
121 | 'IS_NOT_PROVIDED'
122 | null;
123 threshold?: string | number | null;
124 result?: ConditionResultAsLogged;
125};
126
127export function pickConditionPropsToLog(
128 condition: ReadonlyDeep<ConditionSetWithResult>,
129): ConditionSetWithResultAsLogged;
130export function pickConditionPropsToLog(
131 condition: ReadonlyDeep<LeafConditionWithResult>,
132): LeafConditionWithResultAsLogged;
133export function pickConditionPropsToLog(
134 condition:
135 | ReadonlyDeep<ConditionSetWithResult>
136 | ReadonlyDeep<LeafConditionWithResult>,
137): ConditionWithResultAsLogged;
138export function pickConditionPropsToLog(
139 condition:
140 | ReadonlyDeep<ConditionSetWithResult>
141 | ReadonlyDeep<LeafConditionWithResult>,
142): unknown {
143 return isConditionSet(condition)
144 ? {
145 ...condition,
146 conditions: condition.conditions.map((it) =>
147 pickConditionPropsToLog(it),
148 ) satisfies ConditionWithResultAsLogged[] as
149 | NonEmptyArray<LeafConditionWithResultAsLogged>
150 | NonEmptyArray<ConditionSetWithResultAsLogged>,
151 }
152 : pickLeafConditionPropsTolog(condition);
153}
154
155export function pickLeafConditionPropsTolog(
156 condition: ReadonlyDeep<LeafConditionWithResult>,
157): LeafConditionWithResultAsLogged {
158 const { matchingValues, signal } = condition;
159
160 return {
161 comparator: condition.comparator,
162 threshold: condition.threshold,
163 input: condition.input,
164 result: condition.result satisfies
165 | ReadonlyDeep<ConditionResult>
166 | undefined as ConditionResultAsLogged | undefined,
167 signal: signal && {
168 id: signal.id,
169 type: signal.type,
170 name: signal.name,
171 subcategory: signal.subcategory,
172 },
173 matchingValues: matchingValues && {
174 ...(matchingValues.strings
175 ? { strings: matchingValues.strings }
176 : undefined),
177 ...(matchingValues.textBankIds
178 ? { textBankIds: matchingValues.textBankIds }
179 : undefined),
180 ...(matchingValues.locations
181 ? { locations: matchingValues.locations }
182 : undefined),
183 ...(matchingValues.locationBankIds
184 ? { locationBankIds: matchingValues.locationBankIds }
185 : undefined),
186 ...(matchingValues.imageBankIds
187 ? { imageBankIds: matchingValues.imageBankIds }
188 : undefined),
189 },
190 };
191}
192
193/**
194 * Signal ids have been logged in conditions in very haphazard ways over-time.
195 * This function attempts to account for all the different iterations in order
196 * to return a SignalId if at all possible.
197 */
198export function signalIdFromLoggedCondition(
199 it: LeafConditionWithResultAsLogged,
200) {
201 if (!it.signal) {
202 return undefined;
203 }
204
205 // Very old logged conditions have `signal.id` as `null` or omit it when the
206 // condition was targeting a built-in signal; in that case, we build a
207 // `SignalId` from the type. Slightly newer conditions contain the bare signal
208 // type in `condition.signal.id`, which isn't a valid JSON string. So, if we
209 // try to parse it as JSON and it fails, we assume we're in that case and
210 // likewise construct the id from the type. Finally, newer conditions contain
211 // `JsonOf<SignalId>` (or, really, `JsonOf<SignalId>` where `SignalId` is
212 // whatever shape it had when the row was written -- not necessarily its
213 // current shape), so we parse that.
214 const candidateSignalId =
215 !it.signal.id || !tryJsonParse(it.signal.id)
216 ? { type: it.signal.type }
217 : jsonParse(it.signal.id satisfies string | JsonOf<JSON> as JsonOf<JSON>);
218
219 // Finally, it's possible that the stored condition references a signal that
220 // no longer has a corresponding SignalType in our codebase (e.g., the deleted
221 // LangaugeDetection signal). In that case, we just return undefined.
222 return isSignalId(candidateSignalId) ? candidateSignalId : undefined;
223}