Mirror of https://github.com/roostorg/coop github.com/roostorg/coop
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 557ff54b2b435e5f1e789c6a8a4e1bebf2d7deb6 363 lines 14 kB view raw
1import { type ScalarType, type TaggedScalar } from '@roostorg/types'; 2import _Ajv, { type JSONSchemaType } from 'ajv-draft-04'; 3import _ from 'lodash'; 4import { type ReadonlyDeep } from 'type-fest'; 5 6import { extractContentValueOrValues } from '../../condition_evaluator/leafCondition.js'; 7import { type TaggedItemData } from '../../models/rules/item-type-fields.js'; 8import { 9 b64UrlDecode, 10 b64UrlEncode, 11 jsonParse, 12 jsonStringifyUnstable, 13 type B64UrlOf, 14 type JsonOf, 15} from '../../utils/encoding.js'; 16import { 17 makeDerivedFieldPermanentError, 18 type CoopError, 19} from '../../utils/errors.js'; 20import { everyAsync } from '../../utils/fp-helpers.js'; 21import { assertUnreachable } from '../../utils/misc.js'; 22import { type NonEmptyArray } from '../../utils/typescript-types.js'; 23import { type ItemSubmission } from '../itemProcessingService/makeItemSubmission.js'; 24import { CoopInput } from '../moderationConfigService/index.js'; 25import { type TransientRunSignalWithCache } from '../orgAwareSignalExecutionService/signalExecutionService.js'; 26import { 27 isSignalErrorResult, 28 SignalType, 29 type SignalId, 30 type SignalInputType, 31 type SignalOutputType, 32 type SignalsService, 33} from '../signalsService/index.js'; 34 35const Ajv = _Ajv as unknown as typeof _Ajv.default; 36 37const { sum } = _; 38 39// A bit of data that defines how to create a derived field, and that also 40// serves as a unique identity for the field within a given content type. 41// (Ie, no two derived fields on the same content type will have the same spec.) 42// 43// NB: this data is exposed to end users through the API -- both directly and 44// through the stringified version of the spec used to request the associated 45// derived field's comuputed value -- which is why it (partially) duplicates, 46// rather than references, the ConditionInput types in its `source` definition: 47// if we later modify ConditionInput's definition for internal use, we don't 48// want to inadvertently change the public API; having a type mismatch at that 49// point will make TS alert us of this risk, while letting us use the 50// Condition-input processing helper fns against these specs for now. 51export type DerivedFieldSpec = { 52 source: 53 | { type: 'FULL_ITEM' } 54 | { type: 'CONTENT_FIELD'; name: string; contentTypeId: string } 55 | { type: 'CONTENT_COOP_INPUT'; name: CoopInput }; 56 derivationType: DerivedFieldType; 57}; 58 59export type DerivedFieldSpecSource = DerivedFieldSpec['source']; 60 61export type DerivedFieldType = keyof typeof derivedFieldRecipes; 62 63export type DeriveFieldOperation = RunSignalOperation; 64 65export enum DerivedFieldOperationType { 66 RUN_SIGNAL = 'RUN_SIGNAL', 67} 68 69export type RunSignalOperation = { 70 type: DerivedFieldOperationType.RUN_SIGNAL; 71 args: { id: SignalId; subcategory?: string }; 72}; 73 74// For each DerivedFieldType that we can reference in a DerivedFieldSpec, we map 75// it below into a more detailed set of steps (a 'recipe') for how to actually 76// compute the derived value. Unlike the DerivedFieldSpec, recipes are wholly 77// internal implementation details, not exposed anywhere to end users (via REST 78// or GraphQL). 79// 80// These recipes are defined as data (an array of sequential operation objects), 81// not as functions, so that the recipe can be analyzed -- e.g., we can 82// calculate the total cost of deriving the field by finding the 'RUN_SIGNAL' 83// steps and summing the cost of each associated signal. 84// 85// Currently, the only operation type is calling a signal, but we know that 86// these 'recipes' are gonna have to support other kind of steps for derived 87// fields that use multiple input fields and aggregate the results, like the 88// 'All text (including text extracted from images and videos)' field. 89export const derivedFieldRecipes = { 90 VIDEO_TRANSCRIPTION: [ 91 { 92 type: DerivedFieldOperationType.RUN_SIGNAL, 93 args: { id: { type: SignalType.OPEN_AI_WHISPER_TRANSCRIPTION } }, 94 }, 95 ] satisfies DerivedFieldRecipe as DerivedFieldRecipe, 96 ENGLISH_TRANSLATION: [ 97 { 98 type: DerivedFieldOperationType.RUN_SIGNAL, 99 args: { id: { type: SignalType.GOOGLE_CLOUD_TRANSLATE_MODEL } }, 100 }, 101 ] satisfies DerivedFieldRecipe as DerivedFieldRecipe, 102}; 103 104export type DerivedFieldRecipe = ReadonlyDeep< 105 NonEmptyArray<DeriveFieldOperation> 106>; 107 108export const derivedFieldTypes = Object.keys( 109 derivedFieldRecipes, 110) as DerivedFieldType[]; 111 112export async function getFieldDerivationCost( 113 getSignalCost: (it: SignalId) => Promise<number>, 114 spec: DerivedFieldSpec, 115) { 116 return sum( 117 await Promise.all( 118 derivedFieldRecipes[spec.derivationType] 119 // This filter is currently unnecessary, but it's here to make sure if 120 // we ever add a new value to the DerivedFieldOperationType enum, we'll be 121 // properly filtering these derived field recipes 122 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition 123 .filter((it) => it.type === DerivedFieldOperationType.RUN_SIGNAL) 124 .map(async (it) => getSignalCost(it.args.id)), 125 ), 126 ); 127} 128 129/** 130 * Returns what ScalarTypes (and possibly the full content object) are eligible 131 * to be fed in as inputs to derived fields of the given derivationType. 132 * 133 * For now, its assumed that, if some ScalarType, x, is a valid input, then an 134 * array/map ContainerType that has x as its valueScalarType is valid as well. 135 */ 136export async function getDerivedFieldInputTypes( 137 getSignal: SignalsService['getSignalOrThrow'], 138 derivationType: DerivedFieldType, 139 orgId: string, 140): Promise<readonly SignalInputType[]> { 141 const [recipeFirstStep] = derivedFieldRecipes[derivationType]; 142 // NB: this intentionally doesn't check the step's type, so that we'll get 143 // type errors if we define new step types besides RUN_SIGNAL. 144 const signal = await getSignal({ orgId, signalId: recipeFirstStep.args.id }); 145 return signal.eligibleInputs; 146} 147 148/** 149 * Returns what ScalarTypes (and possibly the full content object) are eligible 150 * to be returned as values for derived fields of the given derivationType. 151 * 152 * For now, its assumed that, if some ScalarType, x, is a valid input, then an 153 * array/map ContainerType that has x as its valueScalarType is valid as well. 154 */ 155export async function getDerivedFieldOutputType( 156 getSignal: SignalsService['getSignalOrThrow'], 157 derivationType: DerivedFieldType, 158 orgId: string, 159): Promise<SignalOutputType> { 160 const recipeLastStep = derivedFieldRecipes[derivationType].at(-1)!; 161 // NB: this intentionally doesn't check the step's type, so that we'll get type 162 // errors if we define new step types besides RUN_SIGNAL. 163 const signal = await getSignal({ orgId, signalId: recipeLastStep.args.id }); 164 return signal.outputType; 165} 166 167/** 168 * Returns whether the derived field is enabled for the given org. For derived 169 * fields whose recipe involves running a signal, we check if that signal is 170 * enabled. 171 */ 172export async function getDerivedFieldIsEnabled( 173 getSignalDisabled: SignalsService['getSignalDisabledForOrg'], 174 derivationType: DerivedFieldType, 175 orgId: string, 176) { 177 const runSignalSteps = derivedFieldRecipes[derivationType].filter( 178 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition 179 (it) => it.type === DerivedFieldOperationType.RUN_SIGNAL, 180 ); 181 182 const allSignalsEnabled = await everyAsync(runSignalSteps, async (step) => { 183 const res = await getSignalDisabled({ orgId, signalId: step.args.id }); 184 return !res ? false : !res.disabled; 185 }); 186 187 return allSignalsEnabled; 188} 189 190// We allow users to request specific derived fields from the API. 191// To make this possible, we have to be able to convert a DerivedFieldSpec to 192// and from a string, which will be used by the consumer to request their field 193// of interest. The easiest way, of course -- both now and if/when we want to 194// extend the DerivedFieldSpec format -- would be to just JSON.stringify() the 195// spec, and probably base64url encode the result, so that callers don't run 196// into url encoding issues. The one potential issue with that is that it's not 197// very user-friendly. Then again, almost no format we could come up with here 198// would be simple enough to allow user devs to guess what value they should 199// use, and even hand-writing the value following some documented syntax would 200// be tricky (esp. w/o encoding edge cases), so I think a simple, standard 201// encodings of JSON might actually be the best. 202export function serializeDerivedFieldSpec(spec: DerivedFieldSpec) { 203 // TODO: this really should use a stable serialization -- i.e., one where the 204 // order in which we add the keys to the `spec` object doesn't change the 205 // serialization result. However, we didn't do that from the beginning, and 206 // normalizing the key order in the stringified result now would likely break 207 // existing users, who are depending on the current strings. So, we need 208 // to find a migration path for those, and switch to `true` for the future. 209 return b64UrlEncode(jsonStringifyUnstable(spec)); 210} 211 212const derivedFieldSpecSchema: JSONSchemaType<DerivedFieldSpec> = { 213 type: 'object', 214 properties: { 215 source: { 216 type: 'object', 217 required: ['type'], 218 oneOf: [ 219 { 220 properties: { 221 type: { type: 'string', const: 'FULL_ITEM' }, 222 }, 223 required: ['type'], 224 additionalProperties: false, 225 }, 226 { 227 properties: { 228 type: { type: 'string', const: 'CONTENT_FIELD' }, 229 name: { type: 'string' }, 230 contentTypeId: { type: 'string' }, 231 }, 232 required: ['type', 'name', 'contentTypeId'], 233 additionalProperties: false, 234 }, 235 { 236 properties: { 237 type: { type: 'string', const: 'CONTENT_COOP_INPUT' }, 238 name: { type: 'string', enum: Object.values(CoopInput) }, 239 }, 240 required: ['type', 'name'], 241 additionalProperties: false, 242 }, 243 ], 244 }, 245 derivationType: { type: 'string', enum: derivedFieldTypes }, 246 }, 247 required: ['source', 'derivationType'], 248 additionalProperties: false, 249}; 250 251const ajv = new Ajv(); 252const validateDerivedFieldSpec = ajv.compile(derivedFieldSpecSchema); 253 254export function parseDerivedFieldSpec( 255 spec: B64UrlOf<JsonOf<DerivedFieldSpec>>, 256): DerivedFieldSpec { 257 const parsedResult = jsonParse(b64UrlDecode(spec)); 258 if (validateDerivedFieldSpec(parsedResult)) { 259 return parsedResult; 260 } else { 261 throw new Error(`Invalid derived field spec`, { 262 cause: new AggregateError(validateDerivedFieldSpec.errors ?? []), 263 }); 264 } 265} 266 267export type DerivedFieldValue<T extends ScalarType = ScalarType> = 268 | TaggedScalar<T> 269 | TaggedScalar<T>[] 270 | TaggedItemData 271 | undefined 272 // if we weren't able to derive the field's value for some permanent reason; 273 // likely a permanent failure in one of the used derivation signals. 274 | CoopError<'DerivedFieldPermanentError'>; 275 276/** 277 * This computes and returns the value of a derived field. 278 * 279 * @param runSignal A function to run a signal, which is needed b/c a derived 280 * field will usually use a signal result as part of creating its final value. 281 * @param contextOrgId The org id for which the derived field is being computed. 282 * This needs to be passed as part of running the relevant signals (e.g., for 283 * looking up the org's API keys for third-party-based signals.) 284 * @param submission The content from which to derive the field's value. 285 * @param derivedFieldSpec The spec defining what field to derive. 286 * (See {@link DerivedFieldSpec}). 287 */ 288export async function getDerivedFieldValue( 289 runSignal: TransientRunSignalWithCache, 290 contextOrgId: string, 291 itemSubmission: ItemSubmission, 292 derivedFieldSpec: DerivedFieldSpec, 293): Promise<DerivedFieldValue> { 294 const signalInput = extractContentValueOrValues( 295 itemSubmission, 296 derivedFieldSpec.source, 297 ); 298 299 // NB: it's fairly expected to have some cases where the derived field simply 300 // doesn't exist on the submission, which is what undefined represents. A 301 // typical case would be if the derived field's spec uses a field from content 302 // type A as the derived field's input, but this derived field is referenced 303 // in a condition in a rule that runs on content types A and B. In that case, 304 // content submissions of content type B will have undefined for the derived field. 305 if (signalInput === undefined) { 306 return undefined; 307 } 308 309 async function transformWithSignal( 310 signal: SignalId, 311 signalArgs: { subcategory?: string | null }, 312 value: TaggedScalar<ScalarType> | TaggedItemData, 313 ): Promise<TaggedScalar<ScalarType>> { 314 // NB: no matchingValues support here yet, b/c we don't need it, 315 // and b/c the matchingValues aren't stored on the ConditionSignalInfo 316 // type, which we're reusing for the signal's args. 317 const signalResult = await runSignal({ 318 signal, 319 value, 320 orgId: contextOrgId, 321 userId: itemSubmission.creator?.id, 322 subcategory: signalArgs.subcategory ?? undefined, 323 }); 324 325 if (isSignalErrorResult(signalResult)) { 326 throw signalResult.score; 327 } 328 329 // eslint-disable-next-line @typescript-eslint/consistent-type-assertions 330 return { 331 type: signalResult.outputType.scalarType, 332 value: signalResult.score, 333 } as TaggedScalar<ScalarType>; 334 } 335 336 return derivedFieldRecipes[derivedFieldSpec.derivationType] 337 .reduce(async (derivedResPromise, recipeOperation) => { 338 // get value(s) as they've been generated so far. 339 const valueOrValues = await derivedResPromise; 340 switch (recipeOperation.type) { 341 case DerivedFieldOperationType.RUN_SIGNAL: { 342 const transformValue = transformWithSignal.bind( 343 null, 344 recipeOperation.args.id, 345 recipeOperation.args, 346 ); 347 348 return Array.isArray(valueOrValues) 349 ? Promise.all(valueOrValues.map(async (v) => transformValue(v))) 350 : transformValue(valueOrValues); 351 } 352 353 default: 354 assertUnreachable(recipeOperation.type); 355 } 356 }, Promise.resolve(signalInput)) 357 .catch((error) => { 358 return makeDerivedFieldPermanentError('Failed to derive field value.', { 359 cause: error, 360 shouldErrorSpan: true, 361 }); 362 }); 363}