Mirror of https://github.com/roostorg/coop github.com/roostorg/coop
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 557ff54b2b435e5f1e789c6a8a4e1bebf2d7deb6 333 lines 13 kB view raw
1import { type ItemIdentifier } from '@roostorg/types'; 2import { type Opaque, type ReadonlyDeep, type UnwrapOpaque } from 'type-fest'; 3import { v4 as uuidv4 } from 'uuid'; 4 5import { ErrorType, makeBadRequestError } from '../../utils/errors.js'; 6import { assertUnreachable } from '../../utils/misc.js'; 7import { instantiateOpaqueType } from '../../utils/typescript-types.js'; 8import { 9 type ItemType, 10 type ItemTypeSchemaVariant, 11 type ItemTypeSelector, 12} from '../moderationConfigService/index.js'; 13import { type GetItemTypeEventuallyConsistent } from '../moderationConfigService/moderationConfigServiceQueries.js'; 14import { getFieldValueForRole } from './extractItemDataValues.js'; 15import { 16 toNormalizedItemDataOrErrors, 17 type NormalizedItemData, 18 type RawItemData, 19} from './toNormalizedItemDataOrErrors.js'; 20import { type RawItemSubmission } from './types.js'; 21 22/** 23 * An "item" represents a logical _entity_ on the user's platform. In this 24 * context, an entity specifically refers to something with a fixed identity but 25 * mutable state. For example, a user would be an item, as it's still "the same" 26 * user, even if they change their username or profile picture. 27 * 28 * Because an item's state can change over time -- e.g., a post's content can 29 * get edited; a user can change their profile pic; etc -- we also have a 30 * concept representing "an item with its current data as sent to Coop at some 31 * point in time". That's what an "item submission" is, conceptually. 32 * 33 * However, when an item with its (current) data is submitted to Coop, it comes 34 * in without any validation or normalization having been performed on it. So, 35 * we use the `RawItemSubmission` to represent items in this state. 36 * 37 * Then, once the item has been validated and normalized, and we've assigned it 38 * an id, filled in some legacy properties (like `creator`), etc., we use the 39 * `ItemSubmission` type below to represent that. 40 * 41 * This type should only be instantiated through either the 42 * {@link rawItemSubmissionToItemSubmission} or the 43 * {@link submissionDataToItemSubmission} functions below, in order to ensure 44 * that the ItemSubmission is instantiated correctly, with validation, a proper 45 * id, etc. See those functions for details. 46 */ 47export type ItemSubmission<Type extends ItemType = ItemType> = Opaque< 48 { 49 /** 50 * This is a unique, opaque id we assign to every submission, to track it. 51 */ 52 readonly submissionId: SubmissionId; 53 54 /** 55 * We store the time when the submission was received because we want some 56 * way to _order_ submissions so that, when rendering an item to the 57 * user (e.g., in the thread summary in MRT), we can show the submission 58 * for that item that has the most up-to-date data. 59 * 60 * If we assume that, whenenver a user submits an item to us, they're 61 * submitting the most recent version's of the item's data, then the 62 * submission with the most recent data will also be the one we received 63 * most recently, so we can show that one. We track this with 64 * `submissionTime`. 65 * 66 * Strictly speaking, the assumption that the user will always submit 67 * the latest version of an item's data is unsafe, if the user generates 68 * submissions from multiple data systems that are themselves only 69 * eventually consistent. However, the only workaround for that would be for 70 * us to allow users to provide a field in the item data indicating its 71 * version, and we don't currently have a schema field role for that; 72 * moreover, even if we did, it would be optional for users to use it, 73 * so we're always gonna want to use submission time as a baseline ordering 74 * scheme. 75 * 76 * Submission time is optional because we'd have to do some semi-involved 77 * data warehouse migrations to populate it on existing submissions. However, the 78 * intention is to store it for all new submissions going forward (although 79 * that's unfortunately difficult to enforce). Submissions for the same item 80 * without a `submissionTime` have undefined ordering. 81 * 82 * NB: Submission time is very different from the "created at" time given in 83 * the item's data. "Created at" represents when the item was created -- not 84 * when it was submitted to us -- so, if the item is edited by an end user 85 * on the platform side, it's "created at" should not change (if the 86 * user is populating that field correctly). I.e., all the submissions 87 * for a given item should actually have the same created at -- which is 88 * critical for keeping items properly positioned in a thread after they're 89 * edited -- but different submission times. 90 */ 91 readonly submissionTime?: Date; 92 93 /** 94 * The ItemIdentifier for the user that created this item. 95 * 96 * NB: this only applies to content items; user items don't list the user as 97 * the item's creator, and threads don't (yet) have creators. 98 * 99 * NB: this could be generated from the schema and field roles on the 100 * itemType, but to support older users who submitted the creator as a 101 * separate field outside the item data (before schemaFieldRoles existed), 102 * we maintain this field explicitly, and all downstream users should 103 * use it for now. 104 */ 105 readonly creator: Readonly<ItemIdentifier> | undefined; 106 107 /** 108 * This id is meaningless on its own! However, with `itemType.id`, it forms 109 * the `ItemIdentifier` for the item that this submission is about. 110 */ 111 readonly itemId: string; 112 113 /** 114 * The (current) data of the item, already parsed, validated and normalized. 115 */ 116 readonly data: NormalizedItemData; 117 118 /** 119 * The itemType for the item, which lets us uniquely identify it (by 120 * combining `itemType.id` with `itemId`) and lets us interpret its data, 121 * through the `schema` and `schemaFieldRoles` on the item type. 122 */ 123 readonly itemType: Type; 124 }, 125 'ItemSubmission' 126>; 127 128/** 129 * This function instantiates an ItemSubmission opaque type from stored 130 * submission data. 131 * 132 * It accepts creatorId and creatorTypeId as separate inputs in order to 133 * populate the `ItemSubmission.creator` when creating an `ItemSubmission` from 134 * legacy a data warehouse row. In those rows, the creator info is only stored in 135 * separate columns (i.e., it's not part of the item data). Once we add the 136 * creator info to the data in these legacy submissions, we can remove creatorId 137 * and creatorTypeId as explicit arguments. Until then, we make them required to 138 * make sure that queries whose results are passed to this function can't forget 139 * to select those columns. 140 * 141 * When this is called with new records as it's input, where the data does hold 142 * the creator (e.g., in the REPORTS warehouse table), creatorId and 143 * creatorTypeId can be explicitly set null, and the function will look in the 144 * data to try to fill in the creator. 145 */ 146export async function submissionDataToItemSubmission( 147 getItemType: (it: { 148 orgId: string; 149 typeSelector: ItemTypeSelector; 150 }) => Promise<ReadonlyDeep<ItemType> | undefined>, 151 it: { 152 orgId: string; 153 submissionId: SubmissionId; 154 submissionTime?: Date; 155 itemId: string; 156 itemTypeId: string; 157 itemTypeVersion: string; 158 itemTypeSchemaVariant: ItemTypeSchemaVariant; 159 data: NormalizedItemData; 160 } & ( 161 | { creatorId: string; creatorTypeId: string } 162 | { creatorId: null; creatorTypeId: null } 163 ), 164) { 165 const { orgId, itemTypeId, itemTypeVersion, itemTypeSchemaVariant } = it; 166 const itemType = await getItemType({ 167 orgId, 168 typeSelector: { 169 id: itemTypeId, 170 version: itemTypeVersion, 171 schemaVariant: itemTypeSchemaVariant, 172 }, 173 }); 174 175 if (!itemType) { 176 throw new Error('Item type not found for ID: ' + itemTypeId); 177 } 178 179 return instantiateOpaqueType<ItemSubmission>({ 180 submissionId: it.submissionId, 181 submissionTime: it.submissionTime, 182 itemId: it.itemId, 183 creator: it.creatorId 184 ? { id: it.creatorId, typeId: it.creatorTypeId } 185 : getCreator(itemType, it.data), 186 data: it.data, 187 itemType, 188 }); 189} 190 191/** 192 * This instantiates a new ItemSubmission from the raw, unvalidated data we 193 * actually receive from users. It's meant to be used when a submission 194 * first enters our system, as it assigns the submissionId and does validation. 195 * 196 * It's _not_ meant to be used when we're reconstituting an ItemSubmission from 197 * stored data (e.g., in the data warehouse), as we don't want to assign a new 198 * SubmissionId in that case. For that, see {@link submissionDataToItemSubmission}. 199 */ 200export async function rawItemSubmissionToItemSubmission( 201 allCurrentItemTypeVersionsForOrg: readonly ItemType[], 202 orgId: string, 203 getItemTypeEventuallyConsistent: GetItemTypeEventuallyConsistent, 204 rawItemSubmission: RawItemSubmission, 205): Promise< 206 | { 207 itemSubmission?: Omit<UnwrapOpaque<ItemSubmission>, 'data'> & { 208 data: RawItemData; 209 }; 210 error: AggregateError; 211 } 212 | { 213 itemSubmission: ItemSubmission; 214 error: undefined; 215 } 216> { 217 const submissionId = makeSubmissionId(); 218 const submissionTime = new Date(); 219 220 const typeSelector = 221 'type' in rawItemSubmission 222 ? { 223 id: rawItemSubmission.type.id, 224 version: rawItemSubmission.type.version, 225 schemaVariant: rawItemSubmission.type.schemaVariant, 226 } 227 : { 228 id: rawItemSubmission.typeId, 229 version: rawItemSubmission.typeVersion, 230 schemaVariant: rawItemSubmission.typeSchemaVariant, 231 }; 232 233 // Get the Item Type instead of finding it in allItemTypesForOrg because the 234 // ItemTypes in allItemTypesForOrg refers only to the latest original versions 235 const itemType = await getItemTypeEventuallyConsistent({ 236 orgId, 237 typeSelector, 238 }); 239 240 if (itemType === undefined) { 241 return { 242 error: new AggregateError([ 243 makeBadRequestError( 244 `We could not find an Item Type created by your organization with ID: ${typeSelector.id}`, 245 { 246 type: [ErrorType.DataInvalidForItemType], 247 shouldErrorSpan: true, 248 }, 249 ), 250 ]), 251 }; 252 } 253 254 // Validate item data 255 const normalizedDataOrValidationErrors = toNormalizedItemDataOrErrors( 256 allCurrentItemTypeVersionsForOrg.map((it) => it.id), 257 itemType, 258 rawItemSubmission.data, 259 ); 260 261 if (Array.isArray(normalizedDataOrValidationErrors)) { 262 // Put each error on a separate line, prefixed with 'Error: ' 263 return { 264 itemSubmission: { 265 submissionId, 266 submissionTime, 267 itemId: rawItemSubmission.id, 268 data: rawItemSubmission.data, 269 creator: undefined, 270 itemType, 271 }, 272 error: new AggregateError(normalizedDataOrValidationErrors), 273 }; 274 } 275 276 return { 277 itemSubmission: instantiateOpaqueType<ItemSubmission>({ 278 submissionId, 279 submissionTime, 280 itemId: rawItemSubmission.id, 281 creator: getCreator(itemType, normalizedDataOrValidationErrors), 282 data: normalizedDataOrValidationErrors, 283 itemType, 284 }), 285 error: undefined, 286 }; 287} 288 289function getCreator(itemType: ItemType, itemData: NormalizedItemData) { 290 switch (itemType.kind) { 291 case 'THREAD': 292 case 'USER': 293 return undefined; 294 case 'CONTENT': 295 return getFieldValueForRole( 296 itemType.schema, 297 itemType.schemaFieldRoles, 298 'creatorId', 299 itemData, 300 ); 301 default: 302 assertUnreachable(itemType); 303 } 304} 305 306/** 307 * The unique id we assigned to the submission. 308 * 309 * To ensure uniqueness and unguessability, we now generate these as UUID v4s. 310 * However, we previously used a mix of uuid v1 (our code's old default), uuid 311 * v4 (when backfilling some rows in the data warehouse), and (due to bugs) some strings 312 * that weren't uuids at all. Therefore, code consuming SubmissionIds can't 313 * assume anything about it other than that it's a unique, opaque string. 314 * 315 * We briefly considered generating these as uuid v1s going forward, to remove 316 * the need to store `submissionTime` separately, but needing to handle the both 317 * uuid v1 ids and the legacy non-uuid ids mentioned above wouldn't have been 318 * worth the effort. Moreover, uuid v1s have some potential security 319 * vulnerabilities, in that they have no randomness between successive id 320 * generations and leak some information about the generating host. So, we chose 321 * to stick with v4 uuids going forward and add `submissionTime` instead to get 322 * the "submission ordering" benefits that a uuid v1 would've offered. 323 */ 324export type SubmissionId = Opaque<string, 'SubmissionId'>; 325 326/** 327 * See {@link SubmissionId} for details. 328 * 329 * NB: calling code cannot assume that the SubmissionId is always a uuidv4. 330 */ 331export function makeSubmissionId() { 332 return instantiateOpaqueType<SubmissionId>(uuidv4()); 333}