Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1import { type ItemIdentifier } from '@roostorg/types';
2import { type Opaque, type ReadonlyDeep, type UnwrapOpaque } from 'type-fest';
3import { v4 as uuidv4 } from 'uuid';
4
5import { ErrorType, makeBadRequestError } from '../../utils/errors.js';
6import { assertUnreachable } from '../../utils/misc.js';
7import { instantiateOpaqueType } from '../../utils/typescript-types.js';
8import {
9 type ItemType,
10 type ItemTypeSchemaVariant,
11 type ItemTypeSelector,
12} from '../moderationConfigService/index.js';
13import { type GetItemTypeEventuallyConsistent } from '../moderationConfigService/moderationConfigServiceQueries.js';
14import { getFieldValueForRole } from './extractItemDataValues.js';
15import {
16 toNormalizedItemDataOrErrors,
17 type NormalizedItemData,
18 type RawItemData,
19} from './toNormalizedItemDataOrErrors.js';
20import { type RawItemSubmission } from './types.js';
21
22/**
23 * An "item" represents a logical _entity_ on the user's platform. In this
24 * context, an entity specifically refers to something with a fixed identity but
25 * mutable state. For example, a user would be an item, as it's still "the same"
26 * user, even if they change their username or profile picture.
27 *
28 * Because an item's state can change over time -- e.g., a post's content can
29 * get edited; a user can change their profile pic; etc -- we also have a
30 * concept representing "an item with its current data as sent to Coop at some
31 * point in time". That's what an "item submission" is, conceptually.
32 *
33 * However, when an item with its (current) data is submitted to Coop, it comes
34 * in without any validation or normalization having been performed on it. So,
35 * we use the `RawItemSubmission` to represent items in this state.
36 *
37 * Then, once the item has been validated and normalized, and we've assigned it
38 * an id, filled in some legacy properties (like `creator`), etc., we use the
39 * `ItemSubmission` type below to represent that.
40 *
41 * This type should only be instantiated through either the
42 * {@link rawItemSubmissionToItemSubmission} or the
43 * {@link submissionDataToItemSubmission} functions below, in order to ensure
44 * that the ItemSubmission is instantiated correctly, with validation, a proper
45 * id, etc. See those functions for details.
46 */
47export type ItemSubmission<Type extends ItemType = ItemType> = Opaque<
48 {
49 /**
50 * This is a unique, opaque id we assign to every submission, to track it.
51 */
52 readonly submissionId: SubmissionId;
53
54 /**
55 * We store the time when the submission was received because we want some
56 * way to _order_ submissions so that, when rendering an item to the
57 * user (e.g., in the thread summary in MRT), we can show the submission
58 * for that item that has the most up-to-date data.
59 *
60 * If we assume that, whenenver a user submits an item to us, they're
61 * submitting the most recent version's of the item's data, then the
62 * submission with the most recent data will also be the one we received
63 * most recently, so we can show that one. We track this with
64 * `submissionTime`.
65 *
66 * Strictly speaking, the assumption that the user will always submit
67 * the latest version of an item's data is unsafe, if the user generates
68 * submissions from multiple data systems that are themselves only
69 * eventually consistent. However, the only workaround for that would be for
70 * us to allow users to provide a field in the item data indicating its
71 * version, and we don't currently have a schema field role for that;
72 * moreover, even if we did, it would be optional for users to use it,
73 * so we're always gonna want to use submission time as a baseline ordering
74 * scheme.
75 *
76 * Submission time is optional because we'd have to do some semi-involved
77 * data warehouse migrations to populate it on existing submissions. However, the
78 * intention is to store it for all new submissions going forward (although
79 * that's unfortunately difficult to enforce). Submissions for the same item
80 * without a `submissionTime` have undefined ordering.
81 *
82 * NB: Submission time is very different from the "created at" time given in
83 * the item's data. "Created at" represents when the item was created -- not
84 * when it was submitted to us -- so, if the item is edited by an end user
85 * on the platform side, it's "created at" should not change (if the
86 * user is populating that field correctly). I.e., all the submissions
87 * for a given item should actually have the same created at -- which is
88 * critical for keeping items properly positioned in a thread after they're
89 * edited -- but different submission times.
90 */
91 readonly submissionTime?: Date;
92
93 /**
94 * The ItemIdentifier for the user that created this item.
95 *
96 * NB: this only applies to content items; user items don't list the user as
97 * the item's creator, and threads don't (yet) have creators.
98 *
99 * NB: this could be generated from the schema and field roles on the
100 * itemType, but to support older users who submitted the creator as a
101 * separate field outside the item data (before schemaFieldRoles existed),
102 * we maintain this field explicitly, and all downstream users should
103 * use it for now.
104 */
105 readonly creator: Readonly<ItemIdentifier> | undefined;
106
107 /**
108 * This id is meaningless on its own! However, with `itemType.id`, it forms
109 * the `ItemIdentifier` for the item that this submission is about.
110 */
111 readonly itemId: string;
112
113 /**
114 * The (current) data of the item, already parsed, validated and normalized.
115 */
116 readonly data: NormalizedItemData;
117
118 /**
119 * The itemType for the item, which lets us uniquely identify it (by
120 * combining `itemType.id` with `itemId`) and lets us interpret its data,
121 * through the `schema` and `schemaFieldRoles` on the item type.
122 */
123 readonly itemType: Type;
124 },
125 'ItemSubmission'
126>;
127
128/**
129 * This function instantiates an ItemSubmission opaque type from stored
130 * submission data.
131 *
132 * It accepts creatorId and creatorTypeId as separate inputs in order to
133 * populate the `ItemSubmission.creator` when creating an `ItemSubmission` from
134 * legacy a data warehouse row. In those rows, the creator info is only stored in
135 * separate columns (i.e., it's not part of the item data). Once we add the
136 * creator info to the data in these legacy submissions, we can remove creatorId
137 * and creatorTypeId as explicit arguments. Until then, we make them required to
138 * make sure that queries whose results are passed to this function can't forget
139 * to select those columns.
140 *
141 * When this is called with new records as it's input, where the data does hold
142 * the creator (e.g., in the REPORTS warehouse table), creatorId and
143 * creatorTypeId can be explicitly set null, and the function will look in the
144 * data to try to fill in the creator.
145 */
146export async function submissionDataToItemSubmission(
147 getItemType: (it: {
148 orgId: string;
149 typeSelector: ItemTypeSelector;
150 }) => Promise<ReadonlyDeep<ItemType> | undefined>,
151 it: {
152 orgId: string;
153 submissionId: SubmissionId;
154 submissionTime?: Date;
155 itemId: string;
156 itemTypeId: string;
157 itemTypeVersion: string;
158 itemTypeSchemaVariant: ItemTypeSchemaVariant;
159 data: NormalizedItemData;
160 } & (
161 | { creatorId: string; creatorTypeId: string }
162 | { creatorId: null; creatorTypeId: null }
163 ),
164) {
165 const { orgId, itemTypeId, itemTypeVersion, itemTypeSchemaVariant } = it;
166 const itemType = await getItemType({
167 orgId,
168 typeSelector: {
169 id: itemTypeId,
170 version: itemTypeVersion,
171 schemaVariant: itemTypeSchemaVariant,
172 },
173 });
174
175 if (!itemType) {
176 throw new Error('Item type not found for ID: ' + itemTypeId);
177 }
178
179 return instantiateOpaqueType<ItemSubmission>({
180 submissionId: it.submissionId,
181 submissionTime: it.submissionTime,
182 itemId: it.itemId,
183 creator: it.creatorId
184 ? { id: it.creatorId, typeId: it.creatorTypeId }
185 : getCreator(itemType, it.data),
186 data: it.data,
187 itemType,
188 });
189}
190
191/**
192 * This instantiates a new ItemSubmission from the raw, unvalidated data we
193 * actually receive from users. It's meant to be used when a submission
194 * first enters our system, as it assigns the submissionId and does validation.
195 *
196 * It's _not_ meant to be used when we're reconstituting an ItemSubmission from
197 * stored data (e.g., in the data warehouse), as we don't want to assign a new
198 * SubmissionId in that case. For that, see {@link submissionDataToItemSubmission}.
199 */
200export async function rawItemSubmissionToItemSubmission(
201 allCurrentItemTypeVersionsForOrg: readonly ItemType[],
202 orgId: string,
203 getItemTypeEventuallyConsistent: GetItemTypeEventuallyConsistent,
204 rawItemSubmission: RawItemSubmission,
205): Promise<
206 | {
207 itemSubmission?: Omit<UnwrapOpaque<ItemSubmission>, 'data'> & {
208 data: RawItemData;
209 };
210 error: AggregateError;
211 }
212 | {
213 itemSubmission: ItemSubmission;
214 error: undefined;
215 }
216> {
217 const submissionId = makeSubmissionId();
218 const submissionTime = new Date();
219
220 const typeSelector =
221 'type' in rawItemSubmission
222 ? {
223 id: rawItemSubmission.type.id,
224 version: rawItemSubmission.type.version,
225 schemaVariant: rawItemSubmission.type.schemaVariant,
226 }
227 : {
228 id: rawItemSubmission.typeId,
229 version: rawItemSubmission.typeVersion,
230 schemaVariant: rawItemSubmission.typeSchemaVariant,
231 };
232
233 // Get the Item Type instead of finding it in allItemTypesForOrg because the
234 // ItemTypes in allItemTypesForOrg refers only to the latest original versions
235 const itemType = await getItemTypeEventuallyConsistent({
236 orgId,
237 typeSelector,
238 });
239
240 if (itemType === undefined) {
241 return {
242 error: new AggregateError([
243 makeBadRequestError(
244 `We could not find an Item Type created by your organization with ID: ${typeSelector.id}`,
245 {
246 type: [ErrorType.DataInvalidForItemType],
247 shouldErrorSpan: true,
248 },
249 ),
250 ]),
251 };
252 }
253
254 // Validate item data
255 const normalizedDataOrValidationErrors = toNormalizedItemDataOrErrors(
256 allCurrentItemTypeVersionsForOrg.map((it) => it.id),
257 itemType,
258 rawItemSubmission.data,
259 );
260
261 if (Array.isArray(normalizedDataOrValidationErrors)) {
262 // Put each error on a separate line, prefixed with 'Error: '
263 return {
264 itemSubmission: {
265 submissionId,
266 submissionTime,
267 itemId: rawItemSubmission.id,
268 data: rawItemSubmission.data,
269 creator: undefined,
270 itemType,
271 },
272 error: new AggregateError(normalizedDataOrValidationErrors),
273 };
274 }
275
276 return {
277 itemSubmission: instantiateOpaqueType<ItemSubmission>({
278 submissionId,
279 submissionTime,
280 itemId: rawItemSubmission.id,
281 creator: getCreator(itemType, normalizedDataOrValidationErrors),
282 data: normalizedDataOrValidationErrors,
283 itemType,
284 }),
285 error: undefined,
286 };
287}
288
289function getCreator(itemType: ItemType, itemData: NormalizedItemData) {
290 switch (itemType.kind) {
291 case 'THREAD':
292 case 'USER':
293 return undefined;
294 case 'CONTENT':
295 return getFieldValueForRole(
296 itemType.schema,
297 itemType.schemaFieldRoles,
298 'creatorId',
299 itemData,
300 );
301 default:
302 assertUnreachable(itemType);
303 }
304}
305
306/**
307 * The unique id we assigned to the submission.
308 *
309 * To ensure uniqueness and unguessability, we now generate these as UUID v4s.
310 * However, we previously used a mix of uuid v1 (our code's old default), uuid
311 * v4 (when backfilling some rows in the data warehouse), and (due to bugs) some strings
312 * that weren't uuids at all. Therefore, code consuming SubmissionIds can't
313 * assume anything about it other than that it's a unique, opaque string.
314 *
315 * We briefly considered generating these as uuid v1s going forward, to remove
316 * the need to store `submissionTime` separately, but needing to handle the both
317 * uuid v1 ids and the legacy non-uuid ids mentioned above wouldn't have been
318 * worth the effort. Moreover, uuid v1s have some potential security
319 * vulnerabilities, in that they have no randomness between successive id
320 * generations and leak some information about the generating host. So, we chose
321 * to stick with v4 uuids going forward and add `submissionTime` instead to get
322 * the "submission ordering" benefits that a uuid v1 would've offered.
323 */
324export type SubmissionId = Opaque<string, 'SubmissionId'>;
325
326/**
327 * See {@link SubmissionId} for details.
328 *
329 * NB: calling code cannot assume that the SubmissionId is always a uuidv4.
330 */
331export function makeSubmissionId() {
332 return instantiateOpaqueType<SubmissionId>(uuidv4());
333}