Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1import { type ScalarType, type TaggedScalar } from '@roostorg/types';
2import _Ajv, { type JSONSchemaType } from 'ajv-draft-04';
3import _ from 'lodash';
4import { type ReadonlyDeep } from 'type-fest';
5
6import { extractContentValueOrValues } from '../../condition_evaluator/leafCondition.js';
7import { type TaggedItemData } from '../../models/rules/item-type-fields.js';
8import {
9 b64UrlDecode,
10 b64UrlEncode,
11 jsonParse,
12 jsonStringifyUnstable,
13 type B64UrlOf,
14 type JsonOf,
15} from '../../utils/encoding.js';
16import {
17 makeDerivedFieldPermanentError,
18 type CoopError,
19} from '../../utils/errors.js';
20import { everyAsync } from '../../utils/fp-helpers.js';
21import { assertUnreachable } from '../../utils/misc.js';
22import { type NonEmptyArray } from '../../utils/typescript-types.js';
23import { type ItemSubmission } from '../itemProcessingService/makeItemSubmission.js';
24import { CoopInput } from '../moderationConfigService/index.js';
25import { type TransientRunSignalWithCache } from '../orgAwareSignalExecutionService/signalExecutionService.js';
26import {
27 isSignalErrorResult,
28 SignalType,
29 type SignalId,
30 type SignalInputType,
31 type SignalOutputType,
32 type SignalsService,
33} from '../signalsService/index.js';
34
35const Ajv = _Ajv as unknown as typeof _Ajv.default;
36
37const { sum } = _;
38
39// A bit of data that defines how to create a derived field, and that also
40// serves as a unique identity for the field within a given content type.
41// (Ie, no two derived fields on the same content type will have the same spec.)
42//
43// NB: this data is exposed to end users through the API -- both directly and
44// through the stringified version of the spec used to request the associated
45// derived field's comuputed value -- which is why it (partially) duplicates,
46// rather than references, the ConditionInput types in its `source` definition:
47// if we later modify ConditionInput's definition for internal use, we don't
48// want to inadvertently change the public API; having a type mismatch at that
49// point will make TS alert us of this risk, while letting us use the
50// Condition-input processing helper fns against these specs for now.
51export type DerivedFieldSpec = {
52 source:
53 | { type: 'FULL_ITEM' }
54 | { type: 'CONTENT_FIELD'; name: string; contentTypeId: string }
55 | { type: 'CONTENT_COOP_INPUT'; name: CoopInput };
56 derivationType: DerivedFieldType;
57};
58
59export type DerivedFieldSpecSource = DerivedFieldSpec['source'];
60
61export type DerivedFieldType = keyof typeof derivedFieldRecipes;
62
63export type DeriveFieldOperation = RunSignalOperation;
64
65export enum DerivedFieldOperationType {
66 RUN_SIGNAL = 'RUN_SIGNAL',
67}
68
69export type RunSignalOperation = {
70 type: DerivedFieldOperationType.RUN_SIGNAL;
71 args: { id: SignalId; subcategory?: string };
72};
73
74// For each DerivedFieldType that we can reference in a DerivedFieldSpec, we map
75// it below into a more detailed set of steps (a 'recipe') for how to actually
76// compute the derived value. Unlike the DerivedFieldSpec, recipes are wholly
77// internal implementation details, not exposed anywhere to end users (via REST
78// or GraphQL).
79//
80// These recipes are defined as data (an array of sequential operation objects),
81// not as functions, so that the recipe can be analyzed -- e.g., we can
82// calculate the total cost of deriving the field by finding the 'RUN_SIGNAL'
83// steps and summing the cost of each associated signal.
84//
85// Currently, the only operation type is calling a signal, but we know that
86// these 'recipes' are gonna have to support other kind of steps for derived
87// fields that use multiple input fields and aggregate the results, like the
88// 'All text (including text extracted from images and videos)' field.
89export const derivedFieldRecipes = {
90 VIDEO_TRANSCRIPTION: [
91 {
92 type: DerivedFieldOperationType.RUN_SIGNAL,
93 args: { id: { type: SignalType.OPEN_AI_WHISPER_TRANSCRIPTION } },
94 },
95 ] satisfies DerivedFieldRecipe as DerivedFieldRecipe,
96 ENGLISH_TRANSLATION: [
97 {
98 type: DerivedFieldOperationType.RUN_SIGNAL,
99 args: { id: { type: SignalType.GOOGLE_CLOUD_TRANSLATE_MODEL } },
100 },
101 ] satisfies DerivedFieldRecipe as DerivedFieldRecipe,
102};
103
104export type DerivedFieldRecipe = ReadonlyDeep<
105 NonEmptyArray<DeriveFieldOperation>
106>;
107
108export const derivedFieldTypes = Object.keys(
109 derivedFieldRecipes,
110) as DerivedFieldType[];
111
112export async function getFieldDerivationCost(
113 getSignalCost: (it: SignalId) => Promise<number>,
114 spec: DerivedFieldSpec,
115) {
116 return sum(
117 await Promise.all(
118 derivedFieldRecipes[spec.derivationType]
119 // This filter is currently unnecessary, but it's here to make sure if
120 // we ever add a new value to the DerivedFieldOperationType enum, we'll be
121 // properly filtering these derived field recipes
122 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
123 .filter((it) => it.type === DerivedFieldOperationType.RUN_SIGNAL)
124 .map(async (it) => getSignalCost(it.args.id)),
125 ),
126 );
127}
128
129/**
130 * Returns what ScalarTypes (and possibly the full content object) are eligible
131 * to be fed in as inputs to derived fields of the given derivationType.
132 *
133 * For now, its assumed that, if some ScalarType, x, is a valid input, then an
134 * array/map ContainerType that has x as its valueScalarType is valid as well.
135 */
136export async function getDerivedFieldInputTypes(
137 getSignal: SignalsService['getSignalOrThrow'],
138 derivationType: DerivedFieldType,
139 orgId: string,
140): Promise<readonly SignalInputType[]> {
141 const [recipeFirstStep] = derivedFieldRecipes[derivationType];
142 // NB: this intentionally doesn't check the step's type, so that we'll get
143 // type errors if we define new step types besides RUN_SIGNAL.
144 const signal = await getSignal({ orgId, signalId: recipeFirstStep.args.id });
145 return signal.eligibleInputs;
146}
147
148/**
149 * Returns what ScalarTypes (and possibly the full content object) are eligible
150 * to be returned as values for derived fields of the given derivationType.
151 *
152 * For now, its assumed that, if some ScalarType, x, is a valid input, then an
153 * array/map ContainerType that has x as its valueScalarType is valid as well.
154 */
155export async function getDerivedFieldOutputType(
156 getSignal: SignalsService['getSignalOrThrow'],
157 derivationType: DerivedFieldType,
158 orgId: string,
159): Promise<SignalOutputType> {
160 const recipeLastStep = derivedFieldRecipes[derivationType].at(-1)!;
161 // NB: this intentionally doesn't check the step's type, so that we'll get type
162 // errors if we define new step types besides RUN_SIGNAL.
163 const signal = await getSignal({ orgId, signalId: recipeLastStep.args.id });
164 return signal.outputType;
165}
166
167/**
168 * Returns whether the derived field is enabled for the given org. For derived
169 * fields whose recipe involves running a signal, we check if that signal is
170 * enabled.
171 */
172export async function getDerivedFieldIsEnabled(
173 getSignalDisabled: SignalsService['getSignalDisabledForOrg'],
174 derivationType: DerivedFieldType,
175 orgId: string,
176) {
177 const runSignalSteps = derivedFieldRecipes[derivationType].filter(
178 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
179 (it) => it.type === DerivedFieldOperationType.RUN_SIGNAL,
180 );
181
182 const allSignalsEnabled = await everyAsync(runSignalSteps, async (step) => {
183 const res = await getSignalDisabled({ orgId, signalId: step.args.id });
184 return !res ? false : !res.disabled;
185 });
186
187 return allSignalsEnabled;
188}
189
190// We allow users to request specific derived fields from the API.
191// To make this possible, we have to be able to convert a DerivedFieldSpec to
192// and from a string, which will be used by the consumer to request their field
193// of interest. The easiest way, of course -- both now and if/when we want to
194// extend the DerivedFieldSpec format -- would be to just JSON.stringify() the
195// spec, and probably base64url encode the result, so that callers don't run
196// into url encoding issues. The one potential issue with that is that it's not
197// very user-friendly. Then again, almost no format we could come up with here
198// would be simple enough to allow user devs to guess what value they should
199// use, and even hand-writing the value following some documented syntax would
200// be tricky (esp. w/o encoding edge cases), so I think a simple, standard
201// encodings of JSON might actually be the best.
202export function serializeDerivedFieldSpec(spec: DerivedFieldSpec) {
203 // TODO: this really should use a stable serialization -- i.e., one where the
204 // order in which we add the keys to the `spec` object doesn't change the
205 // serialization result. However, we didn't do that from the beginning, and
206 // normalizing the key order in the stringified result now would likely break
207 // existing users, who are depending on the current strings. So, we need
208 // to find a migration path for those, and switch to `true` for the future.
209 return b64UrlEncode(jsonStringifyUnstable(spec));
210}
211
212const derivedFieldSpecSchema: JSONSchemaType<DerivedFieldSpec> = {
213 type: 'object',
214 properties: {
215 source: {
216 type: 'object',
217 required: ['type'],
218 oneOf: [
219 {
220 properties: {
221 type: { type: 'string', const: 'FULL_ITEM' },
222 },
223 required: ['type'],
224 additionalProperties: false,
225 },
226 {
227 properties: {
228 type: { type: 'string', const: 'CONTENT_FIELD' },
229 name: { type: 'string' },
230 contentTypeId: { type: 'string' },
231 },
232 required: ['type', 'name', 'contentTypeId'],
233 additionalProperties: false,
234 },
235 {
236 properties: {
237 type: { type: 'string', const: 'CONTENT_COOP_INPUT' },
238 name: { type: 'string', enum: Object.values(CoopInput) },
239 },
240 required: ['type', 'name'],
241 additionalProperties: false,
242 },
243 ],
244 },
245 derivationType: { type: 'string', enum: derivedFieldTypes },
246 },
247 required: ['source', 'derivationType'],
248 additionalProperties: false,
249};
250
251const ajv = new Ajv();
252const validateDerivedFieldSpec = ajv.compile(derivedFieldSpecSchema);
253
254export function parseDerivedFieldSpec(
255 spec: B64UrlOf<JsonOf<DerivedFieldSpec>>,
256): DerivedFieldSpec {
257 const parsedResult = jsonParse(b64UrlDecode(spec));
258 if (validateDerivedFieldSpec(parsedResult)) {
259 return parsedResult;
260 } else {
261 throw new Error(`Invalid derived field spec`, {
262 cause: new AggregateError(validateDerivedFieldSpec.errors ?? []),
263 });
264 }
265}
266
267export type DerivedFieldValue<T extends ScalarType = ScalarType> =
268 | TaggedScalar<T>
269 | TaggedScalar<T>[]
270 | TaggedItemData
271 | undefined
272 // if we weren't able to derive the field's value for some permanent reason;
273 // likely a permanent failure in one of the used derivation signals.
274 | CoopError<'DerivedFieldPermanentError'>;
275
276/**
277 * This computes and returns the value of a derived field.
278 *
279 * @param runSignal A function to run a signal, which is needed b/c a derived
280 * field will usually use a signal result as part of creating its final value.
281 * @param contextOrgId The org id for which the derived field is being computed.
282 * This needs to be passed as part of running the relevant signals (e.g., for
283 * looking up the org's API keys for third-party-based signals.)
284 * @param submission The content from which to derive the field's value.
285 * @param derivedFieldSpec The spec defining what field to derive.
286 * (See {@link DerivedFieldSpec}).
287 */
288export async function getDerivedFieldValue(
289 runSignal: TransientRunSignalWithCache,
290 contextOrgId: string,
291 itemSubmission: ItemSubmission,
292 derivedFieldSpec: DerivedFieldSpec,
293): Promise<DerivedFieldValue> {
294 const signalInput = extractContentValueOrValues(
295 itemSubmission,
296 derivedFieldSpec.source,
297 );
298
299 // NB: it's fairly expected to have some cases where the derived field simply
300 // doesn't exist on the submission, which is what undefined represents. A
301 // typical case would be if the derived field's spec uses a field from content
302 // type A as the derived field's input, but this derived field is referenced
303 // in a condition in a rule that runs on content types A and B. In that case,
304 // content submissions of content type B will have undefined for the derived field.
305 if (signalInput === undefined) {
306 return undefined;
307 }
308
309 async function transformWithSignal(
310 signal: SignalId,
311 signalArgs: { subcategory?: string | null },
312 value: TaggedScalar<ScalarType> | TaggedItemData,
313 ): Promise<TaggedScalar<ScalarType>> {
314 // NB: no matchingValues support here yet, b/c we don't need it,
315 // and b/c the matchingValues aren't stored on the ConditionSignalInfo
316 // type, which we're reusing for the signal's args.
317 const signalResult = await runSignal({
318 signal,
319 value,
320 orgId: contextOrgId,
321 userId: itemSubmission.creator?.id,
322 subcategory: signalArgs.subcategory ?? undefined,
323 });
324
325 if (isSignalErrorResult(signalResult)) {
326 throw signalResult.score;
327 }
328
329 // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
330 return {
331 type: signalResult.outputType.scalarType,
332 value: signalResult.score,
333 } as TaggedScalar<ScalarType>;
334 }
335
336 return derivedFieldRecipes[derivedFieldSpec.derivationType]
337 .reduce(async (derivedResPromise, recipeOperation) => {
338 // get value(s) as they've been generated so far.
339 const valueOrValues = await derivedResPromise;
340 switch (recipeOperation.type) {
341 case DerivedFieldOperationType.RUN_SIGNAL: {
342 const transformValue = transformWithSignal.bind(
343 null,
344 recipeOperation.args.id,
345 recipeOperation.args,
346 );
347
348 return Array.isArray(valueOrValues)
349 ? Promise.all(valueOrValues.map(async (v) => transformValue(v)))
350 : transformValue(valueOrValues);
351 }
352
353 default:
354 assertUnreachable(recipeOperation.type);
355 }
356 }, Promise.resolve(signalInput))
357 .catch((error) => {
358 return makeDerivedFieldPermanentError('Failed to derive field value.', {
359 cause: error,
360 shouldErrorSpan: true,
361 });
362 });
363}