Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1-- Create ClickHouse databases mirroring data warehouse schemas
2CREATE DATABASE IF NOT EXISTS analytics;
3CREATE DATABASE IF NOT EXISTS ACTION_STATISTICS_SERVICE;
4CREATE DATABASE IF NOT EXISTS MANUAL_REVIEW_TOOL;
5CREATE DATABASE IF NOT EXISTS NCMEC_SERVICE;
6CREATE DATABASE IF NOT EXISTS REPORTING_SERVICE;
7CREATE DATABASE IF NOT EXISTS RULE_ANOMALY_DETECTION_SERVICE;
8CREATE DATABASE IF NOT EXISTS USER_STATISTICS_SERVICE;
9
10-------------------------------------------------------------------------------
11-- ACTION_STATISTICS_SERVICE
12-------------------------------------------------------------------------------
13
14DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.ACTIONED_SUBMISSION_COUNTS;
15CREATE TABLE ACTION_STATISTICS_SERVICE.ACTIONED_SUBMISSION_COUNTS
16(
17 ds Date,
18 org_id String,
19 num_submissions Int64,
20 submission_ids Array(String)
21)
22ENGINE = MergeTree
23PARTITION BY ds
24ORDER BY (ds, org_id);
25
26DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.ACTIONED_SUBMISSION_COUNTS_BY_POLICY;
27CREATE TABLE ACTION_STATISTICS_SERVICE.ACTIONED_SUBMISSION_COUNTS_BY_POLICY
28(
29 ds Date,
30 org_id String,
31 num_submissions Int64,
32 submission_ids Array(String),
33 policy_id String,
34 policy_name String
35)
36ENGINE = MergeTree
37PARTITION BY ds
38ORDER BY (ds, org_id, policy_id);
39
40DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.ACTIONED_SUBMISSION_COUNTS_BY_TAG;
41CREATE TABLE ACTION_STATISTICS_SERVICE.ACTIONED_SUBMISSION_COUNTS_BY_TAG
42(
43 ds Date,
44 org_id String,
45 num_submissions Int64,
46 submission_ids Array(String),
47 tag String
48)
49ENGINE = MergeTree
50PARTITION BY ds
51ORDER BY (ds, org_id, tag);
52
53DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.BY_ACTION;
54CREATE TABLE ACTION_STATISTICS_SERVICE.BY_ACTION
55(
56 org_id String,
57 item_id Nullable(String),
58 item_type_id Nullable(String),
59 action_id String,
60 action_time DateTime64(3)
61)
62ENGINE = MergeTree
63PARTITION BY toDate(action_time)
64ORDER BY (org_id, action_time, action_id);
65
66DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.BY_ITEM_TYPE;
67CREATE TABLE ACTION_STATISTICS_SERVICE.BY_ITEM_TYPE
68(
69 org_id String,
70 item_id Nullable(String),
71 item_type_id Nullable(String),
72 action_time DateTime64(3)
73)
74ENGINE = MergeTree
75PARTITION BY toDate(action_time)
76ORDER BY (org_id, action_time, ifNull(item_type_id, ''));
77
78DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.BY_POLICY;
79CREATE TABLE ACTION_STATISTICS_SERVICE.BY_POLICY
80(
81 org_id String,
82 item_id Nullable(String),
83 item_type_id Nullable(String),
84 policy_id String,
85 policy_name Nullable(String),
86 action_time DateTime64(3)
87)
88ENGINE = MergeTree
89PARTITION BY toDate(action_time)
90ORDER BY (org_id, action_time, ifNull(policy_id, ''));
91
92DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.BY_RULE;
93CREATE TABLE ACTION_STATISTICS_SERVICE.BY_RULE
94(
95 org_id String,
96 item_id Nullable(String),
97 item_type_id Nullable(String),
98 rule_id String,
99 action_time DateTime64(3)
100)
101ENGINE = MergeTree
102PARTITION BY toDate(action_time)
103ORDER BY (org_id, action_time, rule_id);
104
105DROP TABLE IF EXISTS ACTION_STATISTICS_SERVICE.BY_SOURCE;
106CREATE TABLE ACTION_STATISTICS_SERVICE.BY_SOURCE
107(
108 org_id String,
109 item_id Nullable(String),
110 item_type_id Nullable(String),
111 source String,
112 action_time DateTime64(3)
113)
114ENGINE = MergeTree
115PARTITION BY toDate(action_time)
116ORDER BY (org_id, action_time, source);
117
118-------------------------------------------------------------------------------
119-- MANUAL_REVIEW_TOOL
120-------------------------------------------------------------------------------
121
122DROP TABLE IF EXISTS MANUAL_REVIEW_TOOL.ROUTING_RULE_EXECUTIONS;
123CREATE TABLE MANUAL_REVIEW_TOOL.ROUTING_RULE_EXECUTIONS
124(
125 rule String NOT NULL,
126 rule_id String NOT NULL,
127 rule_version DateTime64(3) NOT NULL,
128 destination_queue_id Nullable(String),
129 org_id String NOT NULL,
130 correlation_id Nullable(String),
131 result Nullable(String),
132 passed UInt8 NOT NULL,
133 job_kind String NOT NULL,
134 ts DateTime64(3) NOT NULL,
135 ds Date NOT NULL,
136 item_data Nullable(String),
137 item_id String NOT NULL,
138 item_type_name Nullable(String),
139 item_type_id String NOT NULL,
140 item_type_kind String NOT NULL,
141 item_creator_id Nullable(String),
142 item_creator_type_id Nullable(String),
143 item_type_schema Nullable(String),
144 item_type_schema_field_roles Nullable(String),
145 item_type_version Nullable(String),
146 item_type_schema_variant Nullable(String)
147)
148ENGINE = MergeTree
149PARTITION BY ds
150ORDER BY (ds, org_id, rule_id, item_id);
151
152-------------------------------------------------------------------------------
153-- PUBLIC (core analytics tables)
154-------------------------------------------------------------------------------
155
156DROP TABLE IF EXISTS analytics.ALL_ORGS;
157CREATE TABLE analytics.ALL_ORGS
158(
159 id String NOT NULL,
160 name String NOT NULL,
161 email String NOT NULL,
162 website_url String NOT NULL,
163 date_created Date NOT NULL
164)
165ENGINE = MergeTree
166ORDER BY (id, date_created)
167COMMENT 'Contains metadata about every organization, with one row per organization';
168
169DROP TABLE IF EXISTS analytics.ACTION_EXECUTIONS;
170CREATE TABLE analytics.ACTION_EXECUTIONS
171(
172 org_id String NOT NULL,
173 action_id String NOT NULL,
174 action_name String NOT NULL,
175 rules String NOT NULL DEFAULT '[]',
176 policies String NOT NULL DEFAULT '[]',
177 rule_tags String NOT NULL DEFAULT '[]',
178 policy_ids Array(String) NOT NULL DEFAULT [],
179 policy_names Array(String) NOT NULL DEFAULT [],
180 rule_environment Nullable(String),
181 correlation_id String NOT NULL,
182 ts DateTime64(3) NOT NULL,
183 ds Date NOT NULL,
184 item_type_id Nullable(String),
185 item_submission_id Nullable(String),
186 item_creator_id Nullable(String),
187 item_creator_type_id Nullable(String),
188 item_id Nullable(String),
189 item_type_kind String NOT NULL,
190 action_source String NOT NULL,
191 actor_id Nullable(String),
192 job_id Nullable(String),
193 failed UInt8 NOT NULL DEFAULT 0
194)
195ENGINE = MergeTree
196PARTITION BY ds
197ORDER BY (ds, ts, org_id, action_id);
198
199DROP TABLE IF EXISTS analytics.CONTENT_API_REQUESTS;
200CREATE TABLE analytics.CONTENT_API_REQUESTS
201(
202 org_id String NOT NULL,
203 event String NOT NULL,
204 request_id String NOT NULL,
205 submission_id String NOT NULL,
206 failure_reason Nullable(String),
207 ts DateTime64(3) NOT NULL,
208 ds Date NOT NULL,
209 item_id String NOT NULL,
210 item_type_name String NOT NULL,
211 item_type_id String NOT NULL,
212 item_creator_id Nullable(String),
213 item_data String NOT NULL,
214 item_type_schema String NOT NULL,
215 item_type_kind String NOT NULL,
216 item_creator_type_id Nullable(String),
217 item_type_version String NOT NULL,
218 item_type_schema_variant String NOT NULL,
219 item_type_schema_field_roles String NOT NULL DEFAULT '{}'
220)
221ENGINE = MergeTree
222PARTITION BY ds
223ORDER BY (ds, org_id, event, item_type_id, item_id);
224
225DROP TABLE IF EXISTS analytics.CONTENT_DETAILS_API_REQUESTS;
226CREATE TABLE analytics.CONTENT_DETAILS_API_REQUESTS
227(
228 content_id String,
229 org_id String,
230 event Nullable(String),
231 failure_reason Nullable(String),
232 ds Date DEFAULT toDate(0),
233 ts Nullable(String)
234)
235ENGINE = MergeTree
236PARTITION BY ds
237ORDER BY (ds, org_id, content_id);
238
239DROP TABLE IF EXISTS analytics.INGESTED_JSON;
240CREATE TABLE analytics.INGESTED_JSON
241(
242 target_table String,
243 data String,
244 ds Date
245)
246ENGINE = MergeTree
247PARTITION BY ds
248ORDER BY (ds, target_table);
249
250DROP TABLE IF EXISTS analytics.ITEM_MODEL_SCORES_LOG;
251CREATE TABLE analytics.ITEM_MODEL_SCORES_LOG
252(
253 org_id String,
254 model_id Nullable(String),
255 model_version Nullable(Int64),
256 model_score Nullable(Float64),
257 event String,
258 submission_id String,
259 failure_reason Nullable(String),
260 ts DateTime64(3),
261 ds Date,
262 item_id String,
263 item_type_name String,
264 item_type_id String,
265 item_creator_id Nullable(String),
266 item_data String,
267 item_type_schema String,
268 item_type_kind String,
269 item_creator_type_id Nullable(String),
270 item_type_version String,
271 item_type_schema_variant String,
272 item_type_schema_field_roles String DEFAULT '{}'
273)
274ENGINE = MergeTree
275PARTITION BY ds
276ORDER BY (ds, org_id, event, item_type_id, item_id);
277
278DROP TABLE IF EXISTS analytics.RULE_EXECUTIONS;
279CREATE TABLE analytics.RULE_EXECUTIONS
280(
281 rule String NOT NULL,
282 rule_id String NOT NULL,
283 rule_version Nullable(DateTime64(3)),
284 org_id String NOT NULL,
285 environment String NOT NULL,
286 correlation_id Nullable(String),
287 policy_ids Array(String) DEFAULT [],
288 policy_names Array(String) DEFAULT [],
289 tags Array(String) DEFAULT [],
290 result Nullable(String),
291 passed UInt8 NOT NULL,
292 ts DateTime64(3) NOT NULL,
293 ds Date NOT NULL,
294 item_data Nullable(String),
295 item_id String NOT NULL,
296 item_submission_id Nullable(String),
297 item_type_name Nullable(String),
298 item_type_id String NOT NULL,
299 item_type_kind String NOT NULL,
300 item_creator_id Nullable(String),
301 item_creator_type_id Nullable(String),
302 item_type_schema Nullable(String),
303 item_type_schema_field_roles Nullable(String),
304 item_type_version Nullable(String),
305 item_type_schema_variant Nullable(String)
306)
307ENGINE = MergeTree
308PARTITION BY ds
309ORDER BY (ds, org_id, rule_id, item_id);
310
311DROP TABLE IF EXISTS analytics.RULE_EXECUTION_STATISTICS;
312CREATE TABLE analytics.RULE_EXECUTION_STATISTICS
313(
314 org_id String,
315 rule_id String,
316 rule_version DateTime64(3),
317 num_passes Int64,
318 num_runs Int64,
319 ts_start_inclusive DateTime64(3),
320 ts_end_exclusive DateTime64(3),
321 environment Nullable(String),
322 rule_policy_names Array(String),
323 rule_policy_ids Array(String),
324 rule_tags Array(String)
325)
326ENGINE = MergeTree
327PARTITION BY toDate(ts_start_inclusive)
328ORDER BY (org_id, rule_id, ts_start_inclusive);
329
330-------------------------------------------------------------------------------
331-- REPORTING_SERVICE
332-------------------------------------------------------------------------------
333
334DROP TABLE IF EXISTS REPORTING_SERVICE.APPEALS;
335CREATE TABLE REPORTING_SERVICE.APPEALS
336(
337 org_id String NOT NULL,
338 request_id String NOT NULL,
339 appeal_id String NOT NULL,
340 appealed_by_user_id Nullable(String),
341 appealed_by_user_item_type_id Nullable(String),
342 appealed_at DateTime64(3) NOT NULL,
343 appeal_reason Nullable(String),
344 actions_taken Array(String) NOT NULL DEFAULT [],
345 actioned_item_data String NOT NULL,
346 actioned_item_id String NOT NULL,
347 actioned_item_type_id String NOT NULL,
348 actioned_item_type_kind String NOT NULL,
349 actioned_item_type_schema String NOT NULL,
350 actioned_item_type_schema_field_roles String NOT NULL,
351 actioned_item_type_version String NOT NULL,
352 actioned_item_type_schema_variant String NOT NULL,
353 additional_items String NOT NULL DEFAULT '[]',
354 ts DateTime64(3) NOT NULL
355)
356ENGINE = MergeTree
357PARTITION BY toDate(ts)
358ORDER BY (org_id, appeal_id, ts);
359
360DROP TABLE IF EXISTS REPORTING_SERVICE.REPORTING_RULE_EXECUTIONS;
361CREATE TABLE REPORTING_SERVICE.REPORTING_RULE_EXECUTIONS
362(
363 rule_name String NOT NULL,
364 rule_id String NOT NULL,
365 rule_version DateTime64(3) NOT NULL,
366 rule_environment String NOT NULL,
367 org_id String NOT NULL,
368 correlation_id String NOT NULL,
369 result String NOT NULL,
370 passed UInt8 NOT NULL,
371 ts DateTime64(3) NOT NULL,
372 ds Date NOT NULL,
373 policy_ids Array(String) NOT NULL,
374 policy_names Array(String) NOT NULL DEFAULT [],
375 item_data String NOT NULL,
376 item_id String NOT NULL,
377 item_type_name String NOT NULL,
378 item_type_id String NOT NULL,
379 item_type_kind String NOT NULL,
380 item_creator_id Nullable(String),
381 item_creator_type_id Nullable(String),
382 item_type_schema String NOT NULL,
383 item_type_schema_field_roles String NOT NULL,
384 item_type_version String NOT NULL,
385 item_type_schema_variant String NOT NULL
386)
387ENGINE = MergeTree
388PARTITION BY ds
389ORDER BY (ds, org_id, rule_id, item_id);
390
391DROP TABLE IF EXISTS REPORTING_SERVICE.REPORTING_RULE_EXECUTION_STATISTICS;
392CREATE TABLE REPORTING_SERVICE.REPORTING_RULE_EXECUTION_STATISTICS
393(
394 org_id String,
395 rule_id String,
396 rule_version DateTime64(3),
397 rule_environment Nullable(String),
398 rule_policy_names Array(String),
399 rule_policy_ids Array(String),
400 num_passes Int64,
401 num_runs Int64,
402 ts_start_inclusive DateTime64(3),
403 ts_end_exclusive DateTime64(3)
404)
405ENGINE = MergeTree
406PARTITION BY toDate(ts_start_inclusive)
407ORDER BY (org_id, rule_id, ts_start_inclusive);
408
409DROP TABLE IF EXISTS REPORTING_SERVICE.REPORTS;
410CREATE TABLE REPORTING_SERVICE.REPORTS
411(
412 org_id String NOT NULL,
413 request_id String NOT NULL,
414 reporter_user_id Nullable(String),
415 reported_at DateTime64(3) NOT NULL,
416 policy_id Nullable(String),
417 reported_for_reason Nullable(String),
418 ts DateTime64(3) NOT NULL,
419 reporter_user_item_type_id Nullable(String),
420 reporter_kind String NOT NULL,
421 reported_item_id String NOT NULL,
422 reported_item_data String NOT NULL,
423 reported_item_type_id String NOT NULL,
424 reported_item_type_kind String NOT NULL,
425 reported_item_type_schema String NOT NULL,
426 reported_item_type_schema_field_roles String NOT NULL,
427 reported_item_type_schema_variant String NOT NULL,
428 reported_item_type_version String NOT NULL,
429 reported_item_thread Nullable(String),
430 reported_items_in_thread Nullable(String),
431 additional_items String NOT NULL DEFAULT '[]'
432)
433ENGINE = MergeTree
434PARTITION BY toDate(ts)
435ORDER BY (org_id, request_id, ts);
436
437-------------------------------------------------------------------------------
438-- RULE_ANOMALY_DETECTION_SERVICE
439-------------------------------------------------------------------------------
440
441DROP TABLE IF EXISTS RULE_ANOMALY_DETECTION_SERVICE.RULE_EXECUTION_STATISTICS;
442CREATE TABLE RULE_ANOMALY_DETECTION_SERVICE.RULE_EXECUTION_STATISTICS
443(
444 org_id String,
445 rule_id String,
446 rule_version DateTime64(3),
447 num_passes Int64,
448 passes_distinct_user_ids String,
449 num_runs Int64,
450 ts_start_inclusive DateTime64(3),
451 ts_end_exclusive DateTime64(3)
452)
453ENGINE = MergeTree
454PARTITION BY toDate(ts_start_inclusive)
455ORDER BY (org_id, rule_id, ts_start_inclusive);
456
457-------------------------------------------------------------------------------
458-- USER_STATISTICS_SERVICE
459-------------------------------------------------------------------------------
460
461DROP TABLE IF EXISTS USER_STATISTICS_SERVICE.LIFETIME_ACTION_STATS;
462CREATE TABLE USER_STATISTICS_SERVICE.LIFETIME_ACTION_STATS
463(
464 org_id String,
465 user_id String,
466 action_id String,
467 policy_id Nullable(String),
468 item_submission_ids Array(String),
469 count Int64,
470 user_type_id String,
471 actor_id Nullable(String)
472)
473ENGINE = MergeTree
474ORDER BY (org_id, user_id, action_id, ifNull(policy_id, ''));
475
476DROP TABLE IF EXISTS USER_STATISTICS_SERVICE.SUBMISSION_STATS;
477CREATE TABLE USER_STATISTICS_SERVICE.SUBMISSION_STATS
478(
479 org_id String,
480 user_id String,
481 item_type_id String,
482 num_submissions Int64,
483 ts_start_inclusive DateTime64(3),
484 ts_end_exclusive DateTime64(3),
485 user_type_id Nullable(String)
486)
487ENGINE = MergeTree
488PARTITION BY toDate(ts_start_inclusive)
489ORDER BY (org_id, ifNull(user_id, ''), item_type_id, ts_start_inclusive);
490
491DROP TABLE IF EXISTS USER_STATISTICS_SERVICE.USER_SCORES;
492CREATE TABLE USER_STATISTICS_SERVICE.USER_SCORES
493(
494 org_id String,
495 user_id String,
496 score Float64,
497 score_date DateTime64(3),
498 user_type_id String
499)
500ENGINE = MergeTree
501PARTITION BY toDate(score_date)
502ORDER BY (org_id, user_id, score_date);
503