[Code Simplification] Replace Kafka with BullMQ for item submission processing (#137)

-10

.devops/aws/src/app.ts

··· 144 144 'arn:aws:secretsmanager:us-east-2:361188080279:secret:prod/Api/Scylla-Ff5SYT', 145 145 sessionSecret: 146 146 'arn:aws:secretsmanager:us-east-2:361188080279:secret:prod/Api/SessionSecret-pqjJMo', 147 - kafkaSchemaRegistrySecret: 148 - 'arn:aws:secretsmanager:us-east-2:361188080279:secret:prod/kafka/schema-registry-ngb9kf', 149 - kafkaApiServiceAccountSecret: 150 - 'arn:aws:secretsmanager:us-east-2:361188080279:secret:prod/kafka/api-service-account-2Ir1Vc', 151 147 redisSecret: 152 148 'arn:aws:secretsmanager:us-east-2:361188080279:secret:prod/Api/Redis-5wOZWr', 153 149 graphqlOpaqueScalarSecret: ··· 160 156 'arn:aws:secretsmanager:us-east-2:361188080279:secret:staging/Api/Scylla-8qB2b6', 161 157 sessionSecret: 162 158 'arn:aws:secretsmanager:us-east-2:361188080279:secret:staging/Api/SessionSecret-Hp8WsT', 163 - kafkaSchemaRegistrySecret: 164 - 'arn:aws:secretsmanager:us-east-2:361188080279:secret:staging/kafka/schema-registry-tOpR2W', 165 - kafkaApiServiceAccountSecret: 166 - 'arn:aws:secretsmanager:us-east-2:361188080279:secret:staging/kafka/api-service-account-LzIZdE', 167 159 redisSecret: 168 160 'arn:aws:secretsmanager:us-east-2:361188080279:secret:staging/Api/Redis-S4vd12', 169 161 graphqlOpaqueScalarSecret: ··· 174 166 scyllaSecret: '', 175 167 sessionSecret: 176 168 'arn:aws:secretsmanager:us-east-2:361188080279:secret:Demo/Api/SessionSecret-Vu2BpX', 177 - kafkaSchemaRegistrySecret: 'RESET_ON_CLUSTER_RECREATE', 178 - kafkaApiServiceAccountSecret: 'RESET_ON_CLUSTER_RECREATE', 179 169 redisSecret: 180 170 'arn:aws:secretsmanager:us-east-2:361188080279:secret:staging/Api/Redis-S4vd12', 181 171 graphqlOpaqueScalarSecret: '',

-49

.devops/aws/src/constants.ts

··· 4 4 import { KubernetesVersion } from 'aws-cdk-lib/aws-eks'; 5 5 import { Construct } from 'constructs'; 6 6 7 - import { DeploymentEnvironmentName } from './stacks/app_pipeline.js'; 8 - 9 7 const __dirname = dirname(fileURLToPath(import.meta.url)); 10 8 11 9 // Useful for getting reference to the main app's Dockerfile, or the source code ··· 39 37 'SCYLLA_HOSTS', 40 38 'SCYLLA_LOCAL_DATACENTER', 41 39 ] as const; 42 - 43 - export const kafkaSecretEnvVars = [ 44 - 'KAFKA_SCHEMA_REGISTRY_USERNAME', 45 - 'KAFKA_SCHEMA_REGISTRY_PASSWORD', 46 - // Each client (producer/consnumer) also has a secret defined for its service 47 - // account, but those have different keys for each account, so not listed here. 48 - ] as const; 49 - 50 - export const kafkaEnvVars = [ 51 - ...kafkaSecretEnvVars, 52 - 'KAFKA_BROKER_HOST', 53 - 'KAFKA_SCHEMA_REGISTRY_HOST', 54 - ] as const; 55 - 56 - export type KafkaSecretEnvVar = (typeof kafkaSecretEnvVars)[number]; 57 - export type KafkaEnvVar = (typeof kafkaEnvVars)[number]; 58 - 59 - // TODO: replace w/ some sort of infrastructure as code solution. 60 - export const topicSchemaIds = { 61 - Demo: { 62 - // TODO 63 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS: 0, 64 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS: 0, 65 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0: 0, 66 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0: 0, 67 - }, 68 - Staging: { 69 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS: 100006, 70 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS: 100008, 71 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0: 100006, 72 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0: 100008, 73 - }, 74 - Prod: { 75 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS: 100005, 76 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS: 100008, 77 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0: 100005, 78 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0: 100008, 79 - }, 80 - } satisfies { 81 - [K in DeploymentEnvironmentName]: { [Var in TopicEnvVar]: number }; 82 - }; 83 - 84 - type TopicNameInEnvVar = 85 - | 'ITEM_SUBMISSION_EVENTS' 86 - | 'ITEM_SUBMISSION_EVENTS_RETRY_0'; 87 - // prettier-ignore 88 - type TopicEnvVar = `KAFKA_TOPIC_${'KEY' | 'VALUE'}_SCHEMA_ID_${TopicNameInEnvVar}`; 89 40 90 41 export const makeKubectlVersionProps = (scope: Construct) => ({ 91 42 version: KubernetesVersion.V1_29,

+1 -39

.devops/aws/src/stacks/api-service/api.ts

··· 25 25 import { Provider } from 'aws-cdk-lib/custom-resources'; 26 26 import { App as Cdk8sApp, Chart } from 'cdk8s'; 27 27 import { Construct } from 'constructs'; 28 - import _ from 'lodash'; 29 - 30 28 import { 31 - KafkaSecretEnvVar, 32 29 makeKubectlVersionProps, 33 30 PgEnvVar, 34 31 RedisEnvVar, 35 32 repoRootDir, 36 - topicSchemaIds, 37 33 } from '../../constants.js'; 38 34 import { 39 35 clusterFromAttributes, ··· 77 73 } from '../../utils.js'; 78 74 import { type DeploymentEnvironmentName } from '../app_pipeline.js'; 79 75 80 - const { omit } = _; 81 76 82 77 type ApiStackProps = StackProps & { 83 78 namespaceName: string; // where to put all the k8s resources. ··· 95 90 | 'GOOGLE_TRANSLATE_API_KEY' 96 91 | 'OPEN_AI_API_KEY' 97 92 | 'GRAPHQL_OPAQUE_SCALAR_SECRET' 98 - | KafkaSecretEnvVar 99 - | 'KAFKA_API_SERVICE_ACCOUNT_USERNAME' 100 - | 'KAFKA_API_SERVICE_ACCOUNT_PASSWORD' 101 93 | 'SLACK_APP_BEARER_TOKEN' 102 94 | PgEnvVar 103 95 | RedisEnvVar 104 96 >; 105 97 stage: DeploymentEnvironmentName; 106 - kafkaHosts: { 107 - broker: string; 108 - schemaRegistry: string; 109 - }; 110 98 rdsReadOnlyClusterHost: string; 111 99 monitoringAlertsTopicArn: string; 112 100 provisionProdLevelsOfCompute: boolean; ··· 134 122 routes: CoopApiGatewayProps['routes']; 135 123 }; 136 124 rdsReadOnlyClusterHost: string; 137 - // This metric to autoscale on is only defined in environments where the 138 - // Datadog agents are installed. 139 - kafkaHosts: { 140 - broker: string; 141 - schemaRegistry: string; 142 - }; 143 125 provisionProdLevelsOfCompute: boolean; 144 126 }; 145 127 ··· 152 134 constructor(scope: Construct, id: string, props: ApiStackProps) { 153 135 const { 154 136 clusterAttributes, 155 - kafkaHosts, 156 137 monitoringAlertsTopicArn, 157 138 namespaceName, 158 139 rdsReadOnlyClusterHost, ··· 440 421 enableDatadog: props.enableDatadog, 441 422 stack: this, 442 423 stage, 443 - kafkaHosts, 444 424 cluster, 445 425 gitCommitSha: 446 426 process.env.CODEBUILD_RESOLVED_SOURCE_VERSION ?? 'undefined', 447 427 servicePort, 448 428 secretsHandler: new KubernetesSecretsIntegration(this, 'ApiSecrets', { 449 429 serviceAccount, 450 - secrets: { 451 - ...omit(secrets, [ 452 - 'KAFKA_API_SERVICE_ACCOUNT_USERNAME', 453 - 'KAFKA_API_SERVICE_ACCOUNT_PASSWORD', 454 - ]), 455 - KAFKA_BROKER_USERNAME: secrets.KAFKA_API_SERVICE_ACCOUNT_USERNAME, 456 - KAFKA_BROKER_PASSWORD: secrets.KAFKA_API_SERVICE_ACCOUNT_PASSWORD, 457 - }, 430 + secrets, 458 431 }), 459 432 provisionProdLevelsOfCompute: props.provisionProdLevelsOfCompute, 460 433 rolloutNotificationsSlackChannel: props.rolloutNotificationsSlackChannel, ··· 1254 1227 //{ name: 'DD_TRACE_SAMPLE_RATE', value: '0.1' }, 1255 1228 ...getTracingEnvVars(serviceName, stage), 1256 1229 { 1257 - name: 'KAFKA_BROKER_HOST', 1258 - value: props.kafkaHosts.broker, 1259 - }, 1260 - { 1261 1230 name: 'LOG_REQUEST_BODY', 1262 1231 value: props.stage === 'Prod' ? 'false' : 'true', 1263 1232 }, 1264 - { 1265 - name: 'KAFKA_SCHEMA_REGISTRY_HOST', 1266 - value: props.kafkaHosts.schemaRegistry, 1267 - }, 1268 - ...Object.entries(topicSchemaIds[props.stage] ?? {}).map( 1269 - ([k, v]) => ({ name: k, value: String(v) }), 1270 - ), 1271 1233 { 1272 1234 name: 'DATABASE_READ_ONLY_HOST', 1273 1235 value: props.rdsReadOnlyClusterHost,

+12 -115

.devops/aws/src/stacks/api-service/jobs_workers.ts

··· 6 6 import _ from 'lodash'; 7 7 8 8 import { 9 - KafkaSecretEnvVar, 10 - kafkaSecretEnvVars, 11 9 makeKubectlVersionProps, 12 10 PgEnvVar, 13 11 pgEnvVars, ··· 16 14 repoRootDir, 17 15 ScyllaEnvVars, 18 16 scyllaEnvVars, 19 - topicSchemaIds, 20 17 } from '../../constants.js'; 21 18 import { 22 19 clusterFromAttributes, ··· 42 39 | PgEnvVar 43 40 | RedisEnvVar 44 41 | 'SENDGRID_API_KEY' 45 - | KafkaSecretEnvVar 46 42 | ScyllaEnvVars 47 - | 'KAFKA_API_SERVICE_ACCOUNT_USERNAME' 48 - | 'KAFKA_API_SERVICE_ACCOUNT_PASSWORD' 49 43 >; 50 44 stage: DeploymentEnvironmentName; 51 45 provisionProdLevelsOfCompute: boolean; 52 46 rdsReadOnlyClusterHost: string; 53 47 bullmqTokenSecretArn?: string; 54 - kafkaHosts: { 55 - broker: string; 56 - schemaRegistry: string; 57 - }; 58 48 enableDatadog: boolean; 59 49 }; 60 50 ··· 71 61 namespaceName, 72 62 secrets, 73 63 rdsReadOnlyClusterHost, 74 - kafkaHosts, 75 64 ...stackProps 76 65 } = props; 77 66 super(scope, id, stackProps); ··· 139 128 'ItemProcessingWorkerSecrets', 140 129 { 141 130 serviceAccount, 142 - secrets: { 143 - ..._.pick(secrets, [ 144 - ...kafkaSecretEnvVars, 145 - ...scyllaEnvVars, 146 - ...pgEnvVars, 147 - ...redisEnvVars, 148 - 'OPEN_AI_API_KEY', 149 - ]), 150 - KAFKA_BROKER_USERNAME: secrets.KAFKA_API_SERVICE_ACCOUNT_USERNAME, 151 - KAFKA_BROKER_PASSWORD: secrets.KAFKA_API_SERVICE_ACCOUNT_PASSWORD, 152 - }, 131 + secrets: _.pick(secrets, [ 132 + ...scyllaEnvVars, 133 + ...pgEnvVars, 134 + ...redisEnvVars, 135 + 'OPEN_AI_API_KEY', 136 + ]), 153 137 }, 154 138 ), 155 139 targetReplicas: props.provisionProdLevelsOfCompute ? 10 : 1, 156 140 stage, 157 141 env: { 158 - KAFKA_BROKER_HOST: kafkaHosts.broker, 159 - KAFKA_SCHEMA_REGISTRY_HOST: kafkaHosts.schemaRegistry, 160 142 DATABASE_READ_ONLY_HOST: rdsReadOnlyClusterHost, 161 - ..._.mapValues(topicSchemaIds[stage], String), 162 143 }, 163 144 dependencies: [workerPodImage], 164 145 }).addToCluster(cluster); 165 146 166 - // new CoopCronJob(new Cdk8sApp(), 'run-user-rules-job', { 167 - // jobName: 'RunUserRulesJob', 168 - // imageUrl: workerPodImage.imageUri, 169 - // allowedNodeTypes: deployedIntelNodeTypes, 170 - // namespace: namespaceName, 171 - // concurrencyPolicy: 'Forbid', 172 - // schedule: '*/5 * * * *' as any, 173 - // resources: { 174 - // requests: { 175 - // cpu: Quantity.fromString('500m'), 176 - // memory: Quantity.fromString('1Gi'), 177 - // }, 178 - // limits: { 179 - // cpu: Quantity.fromString('1'), 180 - // memory: Quantity.fromString('1Gi'), 181 - // }, 182 - // }, 183 - // secretsHandler: new KubernetesSecretsIntegration( 184 - // this, 185 - // 'RunUserRulesJobSecrets', 186 - // { 187 - // serviceAccount, 188 - // secrets: { 189 - // ...omit(secrets, [ 190 - // 'KAFKA_API_SERVICE_ACCOUNT_USERNAME', 191 - // 'KAFKA_API_SERVICE_ACCOUNT_PASSWORD', 192 - // ]), 193 - // KAFKA_BROKER_USERNAME: secrets.KAFKA_API_SERVICE_ACCOUNT_USERNAME, 194 - // KAFKA_BROKER_PASSWORD: secrets.KAFKA_API_SERVICE_ACCOUNT_PASSWORD, 195 - // }, 196 - // }, 197 - // ), 198 - // stage, 199 - // env: { 200 - // DATABASE_READ_ONLY_HOST: rdsReadOnlyClusterHost, 201 - // KAFKA_BROKER_HOST: kafkaHosts.broker, 202 - // KAFKA_SCHEMA_REGISTRY_HOST: kafkaHosts.schemaRegistry, 203 - // ..._.mapValues(topicSchemaIds[stage], String), 204 - // }, 205 - // dependencies: [workerPodImage], 206 - // }).addToCluster(cluster); 207 - 208 - // new CoopCronJob(new Cdk8sApp(), 'refresh-user-scores-job', { 209 - // jobName: 'RefreshUserScoresCacheJob', 210 - // imageUrl: workerPodImage.imageUri, 211 - // allowedNodeTypes: deployedIntelNodeTypes, 212 - // namespace: namespaceName, 213 - // concurrencyPolicy: 'Forbid', 214 - // schedule: '*/5 * * * *' as any, 215 - // secretsHandler: new KubernetesSecretsIntegration( 216 - // this, 217 - // 'RefreshUserScoresCacheJobSecrets', 218 - // { serviceAccount, secrets }, 219 - // ), 220 - // stage, 221 - // env: { 222 - // DATABASE_READ_ONLY_HOST: rdsReadOnlyClusterHost, 223 - // ..._.mapValues(topicSchemaIds[stage], String), 224 - // }, 225 - // dependencies: [workerPodImage], 226 - // resources: { 227 - // requests: { 228 - // cpu: Quantity.fromString('300m'), 229 - // memory: Quantity.fromString('756Mi'), 230 - // }, 231 - // limits: { 232 - // cpu: Quantity.fromString('500m'), 233 - // memory: Quantity.fromString('756Mi'), 234 - // }, 235 - // }, 236 - // nodeJsMemoryOptions: { 237 - // nodeExternalMemoryPercent: 0.03, 238 - // }, 239 - // }).addToCluster(cluster); 240 - 241 147 new CoopCronJob( 242 148 new Cdk8sApp(), 243 149 'refresh-mrt-decisions-materialized-view-job', ··· 294 200 stage, 295 201 env: { 296 202 DATABASE_READ_ONLY_HOST: rdsReadOnlyClusterHost, 297 - ..._.mapValues(topicSchemaIds[stage], String), 298 203 }, 299 204 dependencies: [workerPodImage], 300 205 resources: { ··· 323 228 'RetryFailedNcmecDecisions', 324 229 { 325 230 serviceAccount, 326 - secrets: { 327 - ..._.pick(secrets, [ 328 - ...kafkaSecretEnvVars, 329 - ...scyllaEnvVars, 330 - ...pgEnvVars, 331 - ...redisEnvVars, 332 - 'OPEN_AI_API_KEY', 333 - ]), 334 - KAFKA_BROKER_USERNAME: secrets.KAFKA_API_SERVICE_ACCOUNT_USERNAME, 335 - KAFKA_BROKER_PASSWORD: secrets.KAFKA_API_SERVICE_ACCOUNT_PASSWORD, 336 - }, 231 + secrets: _.pick(secrets, [ 232 + ...scyllaEnvVars, 233 + ...pgEnvVars, 234 + ...redisEnvVars, 235 + 'OPEN_AI_API_KEY', 236 + ]), 337 237 }, 338 238 ), 339 239 stage, 340 240 env: { 341 241 DATABASE_READ_ONLY_HOST: rdsReadOnlyClusterHost, 342 - KAFKA_BROKER_HOST: kafkaHosts.broker, 343 - KAFKA_SCHEMA_REGISTRY_HOST: kafkaHosts.schemaRegistry, 344 - ..._.mapValues(topicSchemaIds[stage], String), 345 242 }, 346 243 dependencies: [workerPodImage], 347 244 resources: {

-29

.devops/aws/src/stacks/app_pipeline.ts

··· 59 59 type EnvSpecificArns = { 60 60 sessionSecret: string; 61 61 redisSecret: string; 62 - kafkaSchemaRegistrySecret: string; 63 - kafkaApiServiceAccountSecret: string; 64 62 datadogRedisSecret: string; 65 63 scyllaSecret: string; 66 64 graphqlOpaqueScalarSecret: string; ··· 222 220 223 221 this.k8sOutputs = clusterStack.outputs; 224 222 225 - const kafkaHosts = { 226 - // TODO: wire this up more dynamically when we have IaC in place for Confluent. 227 - // NB: it's intentional that the same url is used for all environments, as the 228 - // staging and prod clusters we're using on Confluent Cloud both happen to be 229 - // exposed through the same frontend endpoint on our current plan. 230 - broker: 'pkc-ymrq7.us-east-2.aws.confluent.cloud:9092', 231 - schemaRegistry: 'https://psrc-68gz8.us-east-2.aws.confluent.cloud', 232 - }; 233 - 234 223 // Define all secrets here, using common env var names, 235 224 // and then we can pick subsets to pass to each deployment w/i each stack. 236 225 const { arns, globalArns } = props; ··· 272 261 'arn:aws:secretsmanager:us-east-2:361188080279:secret:prod/Api/GoogleTranslateApiKey-MCdqWJ', 273 262 ], 274 263 GRAPHQL_OPAQUE_SCALAR_SECRET: [arns.graphqlOpaqueScalarSecret], 275 - KAFKA_API_SERVICE_ACCOUNT_USERNAME: [ 276 - arns.kafkaApiServiceAccountSecret, 277 - 'API_KEY', 278 - ], 279 - KAFKA_API_SERVICE_ACCOUNT_PASSWORD: [ 280 - arns.kafkaApiServiceAccountSecret, 281 - 'API_SECRET', 282 - ], 283 - KAFKA_SCHEMA_REGISTRY_USERNAME: [ 284 - arns.kafkaSchemaRegistrySecret, 285 - 'USERNAME', 286 - ], 287 - KAFKA_SCHEMA_REGISTRY_PASSWORD: [ 288 - arns.kafkaSchemaRegistrySecret, 289 - 'PASSWORD', 290 - ], 291 264 SLACK_APP_BEARER_TOKEN: [ 292 265 'arn:aws:secretsmanager:us-east-2:361188080279:secret:prod/SlackService-UH2lqy', 293 266 'bearer_token', ··· 349 322 dockerTagPrefix: `${id}-api-`, 350 323 }), 351 324 rdsReadOnlyClusterHost, 352 - kafkaHosts, 353 325 apiGateway: { 354 326 apiName: `Coop ${id} API`, 355 327 usagePlans: [ ··· 434 406 }), 435 407 clusterAttributes, 436 408 secrets, 437 - kafkaHosts, 438 409 rdsReadOnlyClusterHost, 439 410 provisionProdLevelsOfCompute, 440 411 enableDatadog: props.enableDatadog,

+1 -2

.devops/cdktf/cdktf.json

··· 4 4 "projectId": "bc1246cb-e595-4291-b719-40dfb8e9b1c6", 5 5 "sendCrashReports": "true", 6 6 "terraformProviders": [ 7 - "scylladb/scylladbcloud@~> 1.6", 8 - "confluentinc/confluent@~> 1.72" 7 + "scylladb/scylladbcloud@~> 1.6" 9 8 ], 10 9 "terraformModules": [], 11 10 "context": {},

-328

.devops/cdktf/src/kafka/kafka.ts

··· 1 - import { TerraformStack } from 'cdktf'; 2 - import type { Construct } from 'constructs'; 3 - 4 - import { ApiKey } from '../../.gen/providers/confluent/api-key/index'; 5 - import { DataConfluentSchemaRegistryCluster } from '../../.gen/providers/confluent/data-confluent-schema-registry-cluster/index'; 6 - import { Environment } from '../../.gen/providers/confluent/environment/index'; 7 - import { KafkaCluster } from '../../.gen/providers/confluent/kafka-cluster/index'; 8 - import { KafkaTopic } from '../../.gen/providers/confluent/kafka-topic/index'; 9 - import { ConfluentProvider } from '../../.gen/providers/confluent/provider/index'; 10 - import { RoleBinding } from '../../.gen/providers/confluent/role-binding/index'; 11 - import { Schema } from '../../.gen/providers/confluent/schema/index'; 12 - import { ServiceAccount } from '../../.gen/providers/confluent/service-account/index'; 13 - import { type EnvironmentProps } from '../main'; 14 - import { makeAwsProvider, makeS3Backend } from '../utils'; 15 - 16 - export class KafkaStack extends TerraformStack { 17 - constructor(scope: Construct, id: string, props: EnvironmentProps) { 18 - super(scope, id); 19 - 20 - makeS3Backend(this, id, props); 21 - 22 - if (!process.env.CONFLUENT_CLOUD_API_KEY) { 23 - throw new Error( 24 - 'CONFLUENT_CLOUD_API_KEY environment variable must be set.', 25 - ); 26 - } 27 - 28 - if (!process.env.CONFLUENT_CLOUD_API_SECRET) { 29 - throw new Error( 30 - 'CONFLUENT_CLOUD_API_SECRET environment variable must be set.', 31 - ); 32 - } 33 - 34 - new ConfluentProvider(this, 'kafka-provider', {}); 35 - 36 - makeAwsProvider(this, props.region, props.environment); 37 - 38 - const environment = new Environment(this, 'environment', { 39 - displayName: props.kafka.environmentName, 40 - lifecycle: { preventDestroy: true }, 41 - }); 42 - 43 - const schemaRegistryCluster = new DataConfluentSchemaRegistryCluster( 44 - this, 45 - 'schema-registry-cluster', 46 - { 47 - environment, 48 - }, 49 - ); 50 - 51 - const kafkaCluster = new KafkaCluster(this, 'kafka-cluster', { 52 - availability: props.kafka.availability, 53 - displayName: props.kafka.clusterName, 54 - cloud: 'AWS', 55 - region: props.region, 56 - environment, 57 - [props.kafka.clusterType]: [{}], 58 - lifecycle: { 59 - preventDestroy: true, 60 - }, 61 - }); 62 - 63 - const serviceAccount = new ServiceAccount(this, 'service-account', { 64 - displayName: `${props.environment}-cdktf`, 65 - }); 66 - 67 - const schemaRegistryRoleBinding = new RoleBinding( 68 - this, 69 - 'schema-registry-role-binding', 70 - { 71 - crnPattern: `${schemaRegistryCluster.resourceName}/subject=*`, 72 - roleName: 'DeveloperWrite', 73 - principal: `User:${serviceAccount.id}`, 74 - }, 75 - ); 76 - 77 - const schemaRegistryApiKey = new ApiKey(this, 'schema-registry-api-key', { 78 - owner: { 79 - id: serviceAccount.id, 80 - apiVersion: serviceAccount.apiVersion, 81 - kind: serviceAccount.kind, 82 - }, 83 - managedResource: { 84 - id: schemaRegistryCluster.id, 85 - apiVersion: schemaRegistryCluster.apiVersion, 86 - kind: schemaRegistryCluster.kind, 87 - environment, 88 - }, 89 - dependsOn: [schemaRegistryRoleBinding], 90 - }); 91 - schemaRegistryApiKey; 92 - 93 - new Schema(this, 'ITEM_SUBMISSION_EVENTS-key', { 94 - format: 'AVRO', 95 - subjectName: 'ITEM_SUBMISSION_EVENTS-key', 96 - schemaRegistryCluster: { id: schemaRegistryCluster.id }, 97 - restEndpoint: schemaRegistryCluster.restEndpoint, 98 - recreateOnUpdate: true, 99 - schema: JSON.stringify({ 100 - type: 'record', 101 - name: 'ItemSubmissionPartitioningInfo', 102 - doc: 'This schema defines the key used to partition incoming item submissions.', 103 - fields: [ 104 - { 105 - name: 'syntheticThreadId', 106 - type: 'string', 107 - doc: "The thread id, or a synthetic version for items that aren't in a thread.", 108 - }, 109 - ], 110 - }), 111 - credentials: { 112 - key: schemaRegistryApiKey.id, 113 - secret: schemaRegistryApiKey.secret, 114 - }, 115 - lifecycle: { 116 - preventDestroy: true, 117 - }, 118 - }); 119 - 120 - new Schema(this, 'ITEM_SUBMISSION_EVENTS-value', { 121 - format: 'AVRO', 122 - subjectName: 'ITEM_SUBMISSION_EVENTS-value', 123 - schemaRegistryCluster: { id: schemaRegistryCluster.id }, 124 - restEndpoint: schemaRegistryCluster.restEndpoint, 125 - recreateOnUpdate: true, 126 - credentials: { 127 - key: schemaRegistryApiKey.id, 128 - secret: schemaRegistryApiKey.secret, 129 - }, 130 - schema: JSON.stringify({ 131 - type: 'record', 132 - name: 'ItemSubmissionMessage', 133 - fields: [ 134 - { 135 - name: 'metadata', 136 - type: { 137 - type: 'record', 138 - name: 'ItemSubmissionMetadata', 139 - fields: [ 140 - { name: 'syntheticThreadId', type: 'string' }, 141 - { name: 'requestId', type: 'string' }, 142 - { name: 'orgId', type: 'string' }, 143 - ], 144 - }, 145 - }, 146 - { 147 - name: 'itemSubmissionWithTypeIdentifier', 148 - type: { 149 - type: 'record', 150 - name: 'ItemSubmissionWithTypeIdentifier', 151 - fields: [ 152 - { name: 'submissionId', type: 'string' }, 153 - { 154 - name: 'submissionTime', 155 - type: { type: 'long', logicalType: 'timestamp-millis' }, 156 - }, 157 - { name: 'itemId', type: 'string' }, 158 - { name: 'dataJSON', type: 'string' }, 159 - { 160 - name: 'itemTypeIdentifier', 161 - type: { 162 - type: 'record', 163 - name: 'ItemTypeIdentifier', 164 - fields: [ 165 - { name: 'id', type: 'string' }, 166 - { name: 'version', type: 'string' }, 167 - { 168 - name: 'schemaVariant', 169 - type: { 170 - type: 'enum', 171 - name: 'SchemaVariant', 172 - symbols: ['original', 'partial'], 173 - }, 174 - }, 175 - ], 176 - }, 177 - }, 178 - ], 179 - }, 180 - }, 181 - ], 182 - }), 183 - }); 184 - 185 - new Schema(this, 'ITEM_SUBMISSION_EVENTS_RETRY_0-key', { 186 - format: 'AVRO', 187 - subjectName: 'ITEM_SUBMISSION_EVENTS_RETRY_0-key', 188 - schemaRegistryCluster: { id: schemaRegistryCluster.id }, 189 - restEndpoint: schemaRegistryCluster.restEndpoint, 190 - recreateOnUpdate: true, 191 - credentials: { 192 - key: schemaRegistryApiKey.id, 193 - secret: schemaRegistryApiKey.secret, 194 - }, 195 - schema: JSON.stringify({ 196 - type: 'record', 197 - name: 'ItemSubmissionPartitioningInfo', 198 - doc: 'This schema defines the key used to partition incoming item submissions.', 199 - fields: [ 200 - { 201 - name: 'syntheticThreadId', 202 - type: 'string', 203 - doc: "The thread id, or a synthetic version for items that aren't in a thread.", 204 - }, 205 - ], 206 - }), 207 - }); 208 - 209 - new Schema(this, 'ITEM_SUBMISSION_EVENTS_RETRY_0-value', { 210 - format: 'AVRO', 211 - subjectName: 'ITEM_SUBMISSION_EVENTS_RETRY_0-value', 212 - schemaRegistryCluster: { id: schemaRegistryCluster.id }, 213 - restEndpoint: schemaRegistryCluster.restEndpoint, 214 - recreateOnUpdate: true, 215 - credentials: { 216 - key: schemaRegistryApiKey.id, 217 - secret: schemaRegistryApiKey.secret, 218 - }, 219 - schema: JSON.stringify({ 220 - type: 'record', 221 - name: 'ItemSubmissionMessage', 222 - fields: [ 223 - { 224 - name: 'metadata', 225 - type: { 226 - type: 'record', 227 - name: 'ItemSubmissionMetadata', 228 - fields: [ 229 - { name: 'syntheticThreadId', type: 'string' }, 230 - { name: 'requestId', type: 'string' }, 231 - { name: 'orgId', type: 'string' }, 232 - ], 233 - }, 234 - }, 235 - { 236 - name: 'itemSubmissionWithTypeIdentifier', 237 - type: { 238 - type: 'record', 239 - name: 'ItemSubmissionWithTypeIdentifier', 240 - fields: [ 241 - { name: 'submissionId', type: 'string' }, 242 - { 243 - name: 'submissionTime', 244 - type: { type: 'long', logicalType: 'timestamp-millis' }, 245 - }, 246 - { name: 'itemId', type: 'string' }, 247 - { name: 'dataJSON', type: 'string' }, 248 - { 249 - name: 'itemTypeIdentifier', 250 - type: { 251 - type: 'record', 252 - name: 'ItemTypeIdentifier', 253 - fields: [ 254 - { name: 'id', type: 'string' }, 255 - { name: 'version', type: 'string' }, 256 - { 257 - name: 'schemaVariant', 258 - type: { 259 - type: 'enum', 260 - name: 'SchemaVariant', 261 - symbols: ['original', 'partial'], 262 - }, 263 - }, 264 - ], 265 - }, 266 - }, 267 - ], 268 - }, 269 - }, 270 - ], 271 - }), 272 - }); 273 - 274 - const kafkaClusterRoleBinding = new RoleBinding( 275 - this, 276 - 'kafka-cluster-role-binding', 277 - { 278 - crnPattern: kafkaCluster.rbacCrn, 279 - roleName: 'CloudClusterAdmin', 280 - principal: `User:${serviceAccount.id}`, 281 - }, 282 - ); 283 - 284 - const kafkaClusterApiKey = new ApiKey(this, 'kafka-cluster-api-key', { 285 - owner: { 286 - id: serviceAccount.id, 287 - apiVersion: serviceAccount.apiVersion, 288 - kind: serviceAccount.kind, 289 - }, 290 - managedResource: { 291 - id: kafkaCluster.id, 292 - apiVersion: kafkaCluster.apiVersion, 293 - kind: kafkaCluster.kind, 294 - environment, 295 - }, 296 - dependsOn: [kafkaClusterRoleBinding], 297 - }); 298 - kafkaClusterApiKey; 299 - 300 - new KafkaTopic(this, 'ITEM_SUBMISSION_EVENTS', { 301 - topicName: 'ITEM_SUBMISSION_EVENTS', 302 - kafkaCluster, 303 - restEndpoint: kafkaCluster.restEndpoint, 304 - partitionsCount: 200, 305 - credentials: { 306 - key: kafkaClusterApiKey.id, 307 - secret: kafkaClusterApiKey.secret, 308 - }, 309 - lifecycle: { 310 - preventDestroy: true, 311 - }, 312 - }); 313 - 314 - new KafkaTopic(this, 'ITEM_SUBMISSION_EVENTS_RETRY_0', { 315 - topicName: 'ITEM_SUBMISSION_EVENTS_RETRY_0', 316 - kafkaCluster, 317 - restEndpoint: kafkaCluster.restEndpoint, 318 - partitionsCount: 200, 319 - credentials: { 320 - key: kafkaClusterApiKey.id, 321 - secret: kafkaClusterApiKey.secret, 322 - }, 323 - lifecycle: { 324 - preventDestroy: true, 325 - }, 326 - }); 327 - } 328 - }

-20

.devops/cdktf/src/main.ts

··· 1 1 import { App } from 'cdktf'; 2 2 3 - import { KafkaStack } from './kafka/kafka'; 4 3 import { ScyllaStack } from './scylla/scylla'; 5 4 import { StateBackendStack } from './state-backend'; 6 5 import type { StackProps } from './types'; ··· 17 16 nodeCount: number; 18 17 nodeDiskSize: number; 19 18 }; 20 - kafka: { 21 - environmentName: string; 22 - clusterName: string; 23 - availability: string; 24 - clusterType: 'basic' | 'standard'; 25 - }; 26 19 sourceBranch: string; 27 20 lifecycle: { 28 21 preventDestroy: boolean; ··· 36 29 new ScyllaStack(app, `${props.environment}-scylla`, { 37 30 ...props, 38 31 }); 39 - new KafkaStack(app, `${props.environment}-kafka`, props); 40 32 } 41 33 42 34 deployEnvironment({ ··· 53 45 nodeDiskSize: 60, 54 46 scyllaVersion: '2024.1.4', 55 47 }, 56 - kafka: { 57 - clusterName: 'staging_cluster', 58 - environmentName: 'Staging', 59 - clusterType: 'basic', 60 - availability: 'SINGLE_ZONE', 61 - }, 62 48 }); 63 49 64 50 deployEnvironment({ ··· 74 60 scyllaVersion: '2024.1.4', 75 61 nodeCount: 9, 76 62 nodeDiskSize: 1250, 77 - }, 78 - kafka: { 79 - clusterName: 'prod_cluster', 80 - environmentName: 'Prod', 81 - clusterType: 'standard', 82 - availability: 'MULTI_ZONE', 83 63 }, 84 64 }); 85 65

-14

.env.githubci

··· 66 66 REDIS_HOST=redis 67 67 REDIS_PORT=6379 68 68 69 - KAFKA_BROKER_HOST=kafka:29092 70 - KAFKA_BROKER_USERNAME= 71 - KAFKA_BROKER_PASSWORD= 72 - KAFKA_SCHEMA_REGISTRY_HOST=http://schema-registry:8081 73 - KAFKA_SCHEMA_REGISTRY_USERNAME= 74 - KAFKA_SCHEMA_REGISTRY_PASSWORD= 75 - KAFKAJS_NO_PARTITIONER_WARNING=1 76 - KAFKA_TOPIC_KEY_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS= 77 - KAFKA_TOPIC_VALUE_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS= 78 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS= 79 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS= 80 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0= 81 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0= 82 - 83 69 OTEL_SERVICE_NAME=COOP_TEST_SERVICE 84 70 85 71 NODE_ENV=CI

-50

docker-compose.yaml

··· 83 83 retries: 28 84 84 start_period: 35s 85 85 86 - zookeeper: 87 - image: zookeeper:3.7.0 88 - container_name: zookeeper 89 - ports: 90 - - '22181:2181' 91 - environment: 92 - ZOOKEEPER_CLIENT_PORT: 2181 93 - ZOOKEEPER_TICK_TIME: 2000 94 - 95 - kafka: 96 - image: confluentinc/cp-kafka:8.1.0-1-ubi9 97 - container_name: kafka 98 - depends_on: 99 - - zookeeper 100 - ports: 101 - - '29092:29092' 102 - # https://github.com/confluentinc/kafka-images/issues/127#issuecomment-1152703071 103 - user: root 104 - environment: 105 - KAFKA_BROKER_ID: 1 106 - KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 107 - KAFKA_LISTENERS: PLAINTEXT://kafka:9092,CONTROLLER://kafka:9093,PLAINTEXT_HOST://0.0.0.0:29092 108 - KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092 109 - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 110 - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 111 - KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT 112 - KAFKA_PROCESS_ROLES: broker,controller 113 - KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER 114 - KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093 115 - 116 - schema_registry: 117 - image: confluentinc/cp-schema-registry 118 - container_name: schema-registry 119 - depends_on: 120 - - kafka 121 - ports: 122 - - '8081:8081' 123 - environment: 124 - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181 125 - SCHEMA_REGISTRY_HOST_NAME: schema_registry 126 - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka:9092 127 - volumes: 128 - - schema_registry_data:/var/lib/schema-registry 129 - 130 86 # Runs the api server's tests 131 87 test: 132 88 build: ··· 142 98 migrations: 143 99 condition: service_completed_successfully 144 100 redis: 145 - condition: service_started 146 - kafka: 147 - condition: service_started 148 - schema_registry: 149 101 condition: service_started 150 102 151 103 ··· 212 164 pg_data: 213 165 scylla_data: 214 166 redis_data: 215 - kafka_data: 216 - schema_registry_data: 217 167 clickhouse_data:

+2 -5

docs/ARCHITECTURE.md

··· 22 22 | **Frontend** | React, TypeScript, Ant Design, TailwindCSS, Apollo Client | 23 23 | **Backend** | Node.js, Express, Apollo Server, TypeScript | 24 24 | **Databases** | PostgreSQL, Scylla(5.2), ClickHouse, Redis | 25 - | **Messaging** | Kafka (optional), BullMQ | 25 + | **Messaging** | BullMQ (Redis) | 26 26 | **ORM** | Sequelize, Kysely | 27 27 | **Auth** | Passport.js, express-session, SAML (SSO) | 28 28 | **Observability** | OpenTelemetry | ··· 66 66 Coop accepts both synchronous and asynchronous input. 67 67 68 68 * Synchronous input is handled via REST APIs and supports item submission, action execution, reporting workflows, policy retrieval, and related operations. 69 - * Asynchronous input is handled via Kafka-based event streaming using the ITEM\_SUBMISSION\_EVENT topic. 69 + * Asynchronous input is handled via BullMQ job queues backed by Redis. 70 70 71 71 All API requests require an organization API key passed via the x-api-key header. 72 72 ··· 472 472 473 473 * PostgreSQL 474 474 * Redis 475 - * Kafka 476 - * Schema registry 477 - * Zookeeper 478 475 * Clickhouse 479 476 * ScyllaDb 480 477 * Metrics

+13 -6

docs/DEVELOPMENT.md

··· 17 17 Copy `server/.env.example` to `server/.env`. The example file contains all available options with documentation. Key sections: 18 18 19 19 - **Database connections**: PostgreSQL, ClickHouse, ScyllaDB, Redis 20 - - **Kafka**: Broker and schema registry settings 21 20 - **External APIs**: OpenAI, SendGrid, Google APIs (optional) 22 21 - **Security**: Session secrets, JWT signing keys 23 22 ··· 42 41 PostgreSQL | 5432 | Primary DB (with pgvector) 43 42 ClickHouse | 8123, 9000 | Analytics warehouse 44 43 ScyllaDB | 9042 | Item submission history 45 - Redis | 6379 | Caching and job queues 46 - Kafka | 29092 | Event streaming 47 - Schema Registry | 8081 | Kafka schemas 48 - Zookeeper | 22181 | Kafka coordination 49 - Jaeger | 16686 | Tracing UI (opens automatically) 44 + Redis | 6379 | Caching and job queues 45 + Jaeger | 16686 | Tracing UI (opens automatically) 50 46 OTEL Collector | 4317 | Telemetry collection 51 47 52 48 Check service health: ··· 114 110 # Terminal 3 (optional, for GraphQL schema changes) 115 111 npm run generate:watch 116 112 ``` 113 + 114 + ### Background Workers 115 + 116 + Item submissions are processed asynchronously via a BullMQ worker that consumes from Redis. To process items locally, run the worker in a separate terminal: 117 + 118 + ```bash 119 + cd server 120 + npm run runWorkerOrJob ItemProcessingWorker 121 + ``` 122 + 123 + Without this running, submitted items will be enqueued in Redis but not processed. Other available workers/jobs can be found in `server/iocContainer/services/workersAndJobs.ts`. 117 124 118 125 ### With Distributed Tracing 119 126

+1 -1

package.json

··· 18 18 "generate:watch": "graphql-codegen --watch \"server/graphql/**/**.ts\"", 19 19 "prepare": "husky install", 20 20 "lint": "cd client && npm run lint; cd ../server && npm run lint", 21 - "up": "docker compose up --detach postgres clickhouse hma scylla redis otel-collector kafka && open http://localhost:16686", 21 + "up": "docker compose up --detach postgres clickhouse hma scylla redis otel-collector && open http://localhost:16686", 22 22 "down": "docker compose down", 23 23 "betterer": "betterer" 24 24 },

-20

server/.env.example

··· 28 28 29 29 HMA_SERVICE_URL=http://localhost:9876 30 30 31 - # Kafka authentication info. 32 - KAFKA_BROKER_HOST=localhost:29092 33 - KAFKA_BROKER_USERNAME= 34 - KAFKA_BROKER_PASSWORD= 35 - KAFKA_SCHEMA_REGISTRY_HOST=http://localhost:8081 36 - KAFKA_SCHEMA_REGISTRY_USERNAME= 37 - KAFKA_SCHEMA_REGISTRY_PASSWORD= 38 - KAFKAJS_NO_PARTITIONER_WARNING=1 39 - 40 - # NB: these schema ids are different on prod + staging clusters; may be 41 - # different in our future local dev setup. Eventually, we'll likely want 42 - # a more sophisticated approach than env vars for determining these values, 43 - # but we need to figure out our Kafka schema migration system first. 44 - KAFKA_TOPIC_KEY_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS= 45 - KAFKA_TOPIC_VALUE_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS= 46 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS= 47 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS= 48 - KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0= 49 - KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0= 50 - 51 31 # Scylla Cluster Details 52 32 SCYLLA_USERNAME=cassandra 53 33 SCYLLA_PASSWORD=cassandra

-9

server/decs.d.ts

··· 245 245 WAREHOUSE_ADAPTER?: string; 246 246 ANALYTICS_ADAPTER?: string; 247 247 DATA_WAREHOUSE_PROVIDER?: string; 248 - KAFKA_BROKER_HOST?: string; 249 - KAFKA_BROKER_USERNAME?: string; 250 - KAFKA_BROKER_PASSWORD?: string; 251 - KAFKA_SCHEMA_REGISTRY_HOST?: string; 252 - KAFKA_SCHEMA_REGISTRY_USERNAME?: string; 253 - KAFKA_SCHEMA_REGISTRY_PASSWORD?: string; 254 - KAFKAJS_NO_PARTITIONER_WARNING?: string; 255 - KAFKA_TOPIC_KEY_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS?: string; 256 - KAFKA_TOPIC_VALUE_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS?: string; 257 248 NODE_ENV?: string; 258 249 EXPOSE_SENSITIVE_IMPLEMENTATION_DETAILS_IN_ERRORS?: string; 259 250 ALLOW_USER_INPUT_LOCALHOST_URIS?: string;

+17 -160

server/iocContainer/index.ts

··· 1 1 /* eslint-disable max-lines */ 2 2 import { createRequire } from 'module'; 3 3 import Bottle from '@ethanresnick/bottlejs'; 4 - import { SchemaType } from '@kafkajs/confluent-schema-registry'; 5 4 import opentelemetry from '@opentelemetry/api'; 6 5 import { makeDateString, type ItemIdentifier } from '@roostorg/types'; 7 - import avro from 'avsc'; 8 6 import { types as scyllaTypes } from 'cassandra-driver'; 9 7 import IORedis, { type Cluster } from 'ioredis'; 10 - import { logLevel } from 'kafkajs'; 11 8 import * as knexPkg from 'knex'; 12 9 import { type Knex } from 'knex'; 13 10 import { ··· 22 19 import { type JsonObject, type ReadonlyDeep } from 'type-fest'; 23 20 import { v1 as uuidv1 } from 'uuid'; 24 21 25 - import Kafka, { SchemaRegistry, type SchemaIdFor } from '../kafka/index.js'; 26 - import { 27 - makeItemQueueBulkWrite, 28 - type ItemQueueBulkWrite, 29 - } from '../kafka/itemQueueBulkWrite.js'; 30 - import logCreator from '../kafka/logger.js'; 31 22 import makeDb from '../models/index.js'; 23 + import { 24 + makeItemSubmissionBulkWrite, 25 + type ItemSubmissionBulkWrite, 26 + ITEM_SUBMISSION_QUEUE_NAME, 27 + ITEM_SUBMISSION_DLQ_NAME, 28 + } from '../queues/itemSubmissionQueue.js'; 32 29 import { type PolicyActionPenalties } from '../models/OrgModel.js'; 33 30 import { type HashBank, HashBankService } from '../services/hmaService/index.js'; 34 31 import makeActionPublisher, { ··· 249 246 import { registerWorkersAndJobs } from './services/workersAndJobs.js'; 250 247 import { register, safeGetEnvVar } from './utils.js'; 251 248 252 - type DataWarehouseOutboxKafkaMessageKey = { 253 - orgId: string; 254 - userId: string; 255 - }; 256 - 257 - type DataWarehouseOutboxKafkaMessageValue = { 258 - dataJSON: string; 259 - table: string; 260 - recordedAt: Date; 261 - }; 262 - 263 249 // the otel instrumentation currently intercepts require statements. support for 264 250 // esm support is experimental so we should wait until it is stable 265 251 const require = createRequire(import.meta.url); 266 252 const { Client: ScyllaClient } = require('cassandra-driver'); 267 253 export type { DataSources } from './services/gqlDataSources.js'; 268 254 269 - // All Kafka topics and their schemas should be referenced here. Currently, we 270 - // have to create schemas and topics manually, and manually keep them in sync 271 - // across environments, which is hard to do reliably. Eventually, we'll have 272 - // an IaC solution that lets us keep these schemas in code somewhere and view 273 - // them in the repo. Until then, talk to Ethan if you need a new topic or schema. 274 - export type ItemSubmissionKafkaMessageKey = { 255 + export type ItemSubmissionMessageKey = { 275 256 syntheticThreadId: string; 276 257 }; 277 - export type ItemSubmissionKafkaMessageValue = { 258 + export type ItemSubmissionMessageValue = { 278 259 metadata: { 279 260 syntheticThreadId: string; 280 261 requestId: CorrelationId<'post-items'>; ··· 293 274 }; 294 275 }; 295 276 296 - export type KafkaSchemaMap = { 297 - ITEM_SUBMISSION_EVENTS: { 298 - keySchema: SchemaIdFor<ItemSubmissionKafkaMessageKey>; 299 - valueSchema: SchemaIdFor<ItemSubmissionKafkaMessageValue>; 300 - }; 301 - ITEM_SUBMISSION_EVENTS_RETRY_0: { 302 - keySchema: SchemaIdFor<ItemSubmissionKafkaMessageKey>; 303 - valueSchema: SchemaIdFor<ItemSubmissionKafkaMessageValue>; 304 - }; 305 - DATA_WAREHOUSE_INGEST_EVENTS: { 306 - keySchema: SchemaIdFor<DataWarehouseOutboxKafkaMessageKey>; 307 - valueSchema: SchemaIdFor<DataWarehouseOutboxKafkaMessageValue>; 308 - }; 309 - }; 310 - 311 277 // Defines a global map type of all injectable dependencies, where the key is, 312 278 // conceptually, the name of the "interface"/name of the contract, and the value 313 279 // is the type that any implementation must sastify. ··· 365 331 ContentApiRequestsAdapter: IContentApiRequestsAdapter; 366 332 OrgCreationAdapter: IOrgCreationAdapter; 367 333 368 - itemSubmissionQueueBulkWrite: ItemQueueBulkWrite; 369 - itemSubmissionRetryQueueBulkWrite: ItemQueueBulkWrite; 334 + itemSubmissionQueueBulkWrite: ItemSubmissionBulkWrite; 335 + itemSubmissionRetryQueueBulkWrite: ItemSubmissionBulkWrite; 370 336 Knex: Knex; 371 337 IORedis: IORedis.Redis | Cluster; 372 - // We register the services as Kafka<any> so that each service that depends 373 - // on Kafka can type its arg more specifically, based on the topic that 374 - // it's supposed to be able to "see". E.g., a worker can type 375 - // its argument as `Kafka<Pick<KafkaSchemaMap, 'ITEM_SUBMISSION_EVENTS'>>`, so that its code can only 376 - // read messages from the topic with the intended schema, and the `Kafka` 377 - // service will be assignable to that argument because of the `any`. 378 - // eslint-disable-next-line @typescript-eslint/no-explicit-any 379 - Kafka: Kafka<any>; 380 338 381 339 // Loggers 382 340 RuleExecutionLogger: RuleExecutionLogger; ··· 557 515 dnsLookup: (address, callback) => callback(null, address), 558 516 redisOptions: { 559 517 tls: {}, 518 + // Required by BullMQ: its workers use blocking Redis commands 519 + // that would otherwise be misinterpreted as timed-out requests. 520 + maxRetriesPerRequest: null, 560 521 username: safeGetEnvVar('REDIS_USER'), 561 522 password: safeGetEnvVar('REDIS_PASSWORD'), 562 523 }, 563 524 }, 564 525 ) 565 526 : new IORedis.default({ 527 + // Required by BullMQ: its workers use blocking Redis commands 528 + // that would otherwise be misinterpreted as timed-out requests. 566 529 maxRetriesPerRequest: null, 567 530 port: parseInt(process.env.REDIS_PORT ?? '6379'), 568 531 host: safeGetEnvVar('REDIS_HOST'), 569 532 }), 570 533 ); 571 534 572 - bottle.factory('Kafka', () => { 573 - // TODO: think about shutdown logic. Right now, creating this instance 574 - // doesn't open up any resources that need to be shutdown, so we're ok. 575 - // However, when a producer/consumer are created from this instance and then 576 - // they call .connect(), that opens a connection that we must terminate by 577 - // manually calling .disconnect() on shutdown. Maybe there's a more 578 - // elegant/robust way? 579 - return new Kafka( 580 - { 581 - // NB: Confluent Cloud exposes only one endpoint URL that load balances 582 - // between multiple brokers, so we don't need to worry about splitting 583 - // this to an array. 584 - brokers: [safeGetEnvVar('KAFKA_BROKER_HOST')], 585 - ...(['CI', 'development'].includes(process.env.NODE_ENV ?? 'production') ? {} : { 586 - ssl: true, 587 - sasl: { 588 - mechanism: 'plain', 589 - username: safeGetEnvVar('KAFKA_BROKER_USERNAME'), 590 - password: safeGetEnvVar('KAFKA_BROKER_PASSWORD'), 591 - }, 592 - }), 593 - // Found experimentally. Confluent docs seem to recommend setting at 594 - // least some timeouts to a value above 10s, but they don't mention a 595 - // specific value to use, and the setting described in those docs may 596 - // not map 1:1 to a kafkajs setting. Nevertheless, the kafkajs default 597 - // of 1s was giving timeout errors, so we had to bump this. See 598 - // https://docs.confluent.io/cloud/current/cp-component/clients-cloud-config.html#prerequisitesq 599 - connectionTimeout: 10_000, 600 - // Default here is 30s but we set it to avoid long-running requests 601 - // wait that long. 602 - requestTimeout: 10_000, 603 - // Set clientId to help with monitoring/observability. 604 - // See https://kafka.js.org/docs/configuration#client-id 605 - clientId: getEnvVarOrWarn('OTEL_SERVICE_NAME'), 606 - logLevel: logLevel.WARN, 607 - logCreator, 608 - }, 609 - { 610 - DATA_WAREHOUSE_INGEST_EVENTS: { 611 - keySchema: parseInt( 612 - safeGetEnvVar('KAFKA_TOPIC_KEY_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS'), 613 - ) as SchemaIdFor<DataWarehouseOutboxKafkaMessageKey>, 614 - valueSchema: parseInt( 615 - safeGetEnvVar( 616 - 'KAFKA_TOPIC_VALUE_SCHEMA_ID_DATA_WAREHOUSE_INGEST_EVENTS', 617 - ), 618 - ) as SchemaIdFor<DataWarehouseOutboxKafkaMessageValue>, 619 - }, 620 - ITEM_SUBMISSION_EVENTS: { 621 - keySchema: parseInt( 622 - safeGetEnvVar('KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS'), 623 - ) as SchemaIdFor<ItemSubmissionKafkaMessageKey>, 624 - valueSchema: parseInt( 625 - safeGetEnvVar('KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS'), 626 - ) as SchemaIdFor<ItemSubmissionKafkaMessageValue>, 627 - }, 628 - ITEM_SUBMISSION_EVENTS_RETRY_0: { 629 - keySchema: parseInt( 630 - safeGetEnvVar( 631 - 'KAFKA_TOPIC_KEY_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0', 632 - ), 633 - ) as SchemaIdFor<ItemSubmissionKafkaMessageKey>, 634 - valueSchema: parseInt( 635 - safeGetEnvVar( 636 - 'KAFKA_TOPIC_VALUE_SCHEMA_ID_ITEM_SUBMISSION_EVENTS_RETRY_0', 637 - ), 638 - ) as SchemaIdFor<ItemSubmissionKafkaMessageValue>, 639 - }, 640 - }, 641 - new SchemaRegistry( 642 - { 643 - host: safeGetEnvVar('KAFKA_SCHEMA_REGISTRY_HOST'), 644 - auth: { 645 - username: safeGetEnvVar('KAFKA_SCHEMA_REGISTRY_USERNAME'), 646 - password: safeGetEnvVar('KAFKA_SCHEMA_REGISTRY_PASSWORD'), 647 - }, 648 - }, 649 - { 650 - [SchemaType.AVRO]: { 651 - logicalTypes: { 652 - // Implementation copied from avsc docs. 653 - // See https://gist.github.com/mtth/1aec40375fbcb077aee7#file-date-js 654 - 'timestamp-millis': class extends avro.types.LogicalType { 655 - override _fromValue(val: string) { 656 - return new Date(val); 657 - } 658 - override _toValue(date: Date) { 659 - return date instanceof Date ? Number(date) : undefined; 660 - } 661 - override _resolve(type: unknown) { 662 - return avro.Type.isType( 663 - type, 664 - 'long', 665 - 'string', 666 - 'logical:timestamp-millis', 667 - ) 668 - ? this._fromValue 669 - : undefined; 670 - } 671 - }, 672 - }, 673 - }, 674 - }, 675 - ), 676 - ); 677 - }); 678 535 679 536 bottle.factory('Sequelize', () => makeDb()); 680 537 bottle.factory('OrgModel', ({ Sequelize }) => Sequelize.Org); ··· 755 612 }); 756 613 757 614 bottle.factory('itemSubmissionQueueBulkWrite', (container) => 758 - makeItemQueueBulkWrite(container.Kafka, 'ITEM_SUBMISSION_EVENTS'), 615 + makeItemSubmissionBulkWrite(container.IORedis, ITEM_SUBMISSION_QUEUE_NAME), 759 616 ); 760 617 bottle.factory('itemSubmissionRetryQueueBulkWrite', (container) => 761 - makeItemQueueBulkWrite(container.Kafka, 'ITEM_SUBMISSION_EVENTS_RETRY_0'), 618 + makeItemSubmissionBulkWrite(container.IORedis, ITEM_SUBMISSION_DLQ_NAME), 762 619 ); 763 620 764 621 // Legacy service deprecated in favor of kysely.

-18

server/kafka/KafkajsZstdCompressionCodec.ts

··· 1 - import { compress, decompress } from '@mongodb-js/zstd'; 2 - 3 - // The encoder class from Kafkajs doesn't have an exported type, 4 - // but we can make a minimal stub in the meantime. 5 - // See https://github.com/tulios/kafkajs/issues/1552 6 - type Encoder = { buffer: Buffer }; 7 - 8 - export class KafkajsZstdCompressionCodec { 9 - constructor(private readonly level: number) {} 10 - 11 - async compress(encoder: Encoder) { 12 - return compress(encoder.buffer, this.level); 13 - } 14 - 15 - async decompress(buffer: Buffer) { 16 - return decompress(buffer); 17 - } 18 - }

-99

server/kafka/SchemaAwareClient.ts

··· 1 - import { SchemaRegistry as UntypedSchemaRegistry } from '@kafkajs/confluent-schema-registry'; 2 - import type { ConsumerConfig, Kafka as KafkaJS, KafkaConfig, ProducerConfig } from 'kafkajs'; 3 - 4 - import { createRequire } from 'module'; 5 - import SchemaAwareConsumer from './SchemaAwareConsumer.js'; 6 - import SchemaAwareProducer from './SchemaAwareProducer.js'; 7 - 8 - // NB: we import kafkajs using require() here instead of import because the 9 - // open-telemetry instrumentations intercepts only require() calls in order 10 - // to patch modules. If kafkajs is imported using import, it won't be patched. 11 - const require = createRequire(import.meta.url); 12 - const {Kafka: KafkaClient} = require('kafkajs') 13 - 14 - // Generic fake symbol for holding type-level metadata. 15 - declare const meta: unique symbol; 16 - 17 - // This type holds the id of a schema in the Schema Registry, alongside 18 - // TS type-level metadata showing the expected shape of the decoded message. 19 - export type SchemaIdFor<T> = number & { readonly [meta]: T }; 20 - 21 - // Allows storing the Schema Registry, with some TS metadata reflecting 22 - // which schemas have been registered with the registry. 23 - export type SchemaRegistry<T extends AnyTopicSchemaMap> = 24 - UntypedSchemaRegistry & { readonly [meta]: T }; 25 - 26 - // Re-export the SchemaRegistry constructor w/ a type cast that lets us hold 27 - // registered schema metadata in the type param. 28 - export const SchemaRegistry = UntypedSchemaRegistry as new < 29 - T extends AnyTopicSchemaMap, 30 - >( 31 - args: ConstructorParameters<typeof UntypedSchemaRegistry>[0], 32 - options?: ConstructorParameters<typeof UntypedSchemaRegistry>[1], 33 - ) => SchemaRegistry<T>; 34 - 35 - export type AnyTopicSchemaMap = { 36 - [topicName: string]: { 37 - keySchema: SchemaIdFor<unknown>; 38 - valueSchema: SchemaIdFor<unknown>; 39 - }; 40 - }; 41 - 42 - // A union of the message key types for the given topics. 43 - export type KeyTypes<T extends AnyTopicSchemaMap> = 44 - T[keyof T]['keySchema'][typeof meta]; 45 - 46 - // A union of the message value types for the given topics. 47 - export type ValueTypes<T extends AnyTopicSchemaMap> = 48 - T[keyof T]['valueSchema'][typeof meta]; 49 - 50 - /** 51 - * Constructs a wrapped Kafka client instance that's aware of the Schema 52 - * Registry and our schemas in it. 53 - */ 54 - export default class Kafka<TopicSchemaMap extends AnyTopicSchemaMap> { 55 - readonly #client: KafkaJS; 56 - readonly #schemaMap: TopicSchemaMap; 57 - readonly #registry: SchemaRegistry<TopicSchemaMap>; 58 - 59 - constructor( 60 - config: KafkaConfig, 61 - schemaMap: TopicSchemaMap, 62 - registry: SchemaRegistry<TopicSchemaMap>, 63 - ) { 64 - this.#client = new KafkaClient(config); 65 - this.#schemaMap = schemaMap; 66 - this.#registry = registry; 67 - } 68 - 69 - public producer(config?: ProducerConfig) { 70 - return new SchemaAwareProducer( 71 - this.#client, 72 - this.#registry, 73 - this.#schemaMap, 74 - // Unlike in Kafkajs, default allowAutoTopicCreation to false, since it's 75 - // not a super safe setting. We may have to revise this as we think about 76 - // the local dev story (and it may not be necessary if we have proper ACLs 77 - // in prod that bans our clients from creating topics). 78 - { allowAutoTopicCreation: false, ...config }, 79 - ); 80 - } 81 - 82 - /** 83 - * The Topics type parameter should be filled in with the list of topic names 84 - * that the consumer might subscribe to. (It will only be allowed to subscribe 85 - * to these topics, and all of these topics must have a corresponding registered 86 - * schema.) In KafkaJS, choosing which topics to subscribe to and then actually 87 - * consuming the messages on those topics are two separate operations. 88 - * However, we have to link them in the types (i.e., the type of each decoded 89 - * message needs to depend on which topics the consumer has subscribed to), so 90 - * we use this Topics type parameter to do that. 91 - */ 92 - public consumer<Topics extends keyof TopicSchemaMap>(config: ConsumerConfig) { 93 - return new SchemaAwareConsumer( 94 - this.#client, 95 - this.#registry as SchemaRegistry<Pick<TopicSchemaMap, Topics>>, 96 - config, 97 - ); 98 - } 99 - }

-359

server/kafka/SchemaAwareConsumer.ts

··· 1 - import { 2 - type ConnectEvent, 3 - type Consumer, 4 - type ConsumerCommitOffsetsEvent, 5 - type ConsumerConfig, 6 - type ConsumerCrashEvent, 7 - type ConsumerEndBatchProcessEvent, 8 - type ConsumerEvents, 9 - type ConsumerFetchEvent, 10 - type ConsumerFetchStartEvent, 11 - type ConsumerGroupJoinEvent, 12 - type ConsumerHeartbeatEvent, 13 - type ConsumerRebalancingEvent, 14 - type ConsumerReceivedUnsubcribedTopicsEvent, 15 - type ConsumerStartBatchProcessEvent, 16 - type DisconnectEvent, 17 - type InstrumentationEvent, 18 - type TopicPartition as KafakJSTopicPartition, 19 - type TopicPartitionOffset as KafakJSTopicPartitionOffset, 20 - type TopicPartitionOffsetAndMetadata as KafakJSTopicPartitionOffsetAndMetadata, 21 - type Kafka as KafkaJS, 22 - type ConsumerRunConfig as KafkaJSConsumerRunConfig, 23 - type ConsumerSubscribeTopics as KafkaJSConsumerSubscribeTopics, 24 - type EachBatchPayload as KafkaJSEachBatchPayload, 25 - type EachMessagePayload as KafkaJSEachMessagePayload, 26 - type KafkaMessage, 27 - type RemoveInstrumentationEventListener, 28 - type RequestEvent, 29 - type RequestQueueSizeEvent, 30 - type RequestTimeoutEvent, 31 - type KafkaJSError as KafkaJSErrorType, 32 - type KafkaJSProtocolError as KafkaJSProtocolErrorType 33 - } from 'kafkajs'; 34 - import kafkaJs from 'kafkajs'; 35 - 36 - const { KafkaJSError, KafkaJSProtocolError } = kafkaJs; 37 - 38 - 39 - import { type Mutable } from '../utils/typescript-types.js'; 40 - import { 41 - type AnyTopicSchemaMap, 42 - type KeyTypes, 43 - type SchemaRegistry, 44 - type ValueTypes, 45 - } from './SchemaAwareClient.js'; 46 - 47 - // Redefine a number of types to support subscribing to/processing messages 48 - // from topics that have a registered schema in a type-safe way. 49 - type ConsumerSubscribeTopics<T extends AnyTopicSchemaMap> = Pick< 50 - KafkaJSConsumerSubscribeTopics, 51 - 'fromBeginning' 52 - > & { topics: readonly (keyof T & string)[] }; 53 - 54 - export type DecodedMessage<EligibleTopics extends AnyTopicSchemaMap> = Omit< 55 - KafkaMessage, 56 - 'key' | 'value' 57 - > & { 58 - key: KeyTypes<EligibleTopics> | null; 59 - value: ValueTypes<EligibleTopics> | null; 60 - }; 61 - 62 - type EachMessagePayload<T extends AnyTopicSchemaMap> = Omit< 63 - KafkaJSEachMessagePayload, 64 - 'message' | 'topic' 65 - > & { topic: keyof T & string; message: DecodedMessage<T> }; 66 - 67 - type EachBatchPayload<T extends AnyTopicSchemaMap> = Omit< 68 - KafkaJSEachBatchPayload, 69 - 'batch' 70 - > & { 71 - batch: Omit<KafkaJSEachBatchPayload['batch'], 'topic'> & { 72 - topic: keyof T & string; 73 - decodedMessages: DecodedMessage<T>[]; 74 - }; 75 - }; 76 - 77 - export type ConsumerRunConfig<T extends AnyTopicSchemaMap> = Pick< 78 - KafkaJSConsumerRunConfig, 79 - | 'autoCommit' 80 - | 'autoCommitInterval' 81 - | 'autoCommitThreshold' 82 - | 'eachBatchAutoResolve' 83 - | 'partitionsConsumedConcurrently' 84 - > & { 85 - eachBatch?: (payload: EachBatchPayload<T>) => Promise<void>; 86 - eachMessage?: (payload: EachMessagePayload<T>) => Promise<void>; 87 - }; 88 - 89 - type TopicPartition<EligibleTopics extends AnyTopicSchemaMap> = Omit< 90 - KafakJSTopicPartition, 91 - 'topic' 92 - > & { topic: keyof EligibleTopics & string }; 93 - 94 - type TopicPartitionOffset<EligibleTopics extends AnyTopicSchemaMap> = Omit< 95 - KafakJSTopicPartitionOffset, 96 - 'topic' 97 - > & { topic: keyof EligibleTopics & string }; 98 - 99 - type TopicPartitionOffsetAndMetadata<EligibleTopics extends AnyTopicSchemaMap> = 100 - Omit<KafakJSTopicPartitionOffsetAndMetadata, 'topic'> & { 101 - topic: keyof EligibleTopics & string; 102 - }; 103 - 104 - /** 105 - * Returns a Kafka consumer whose received messages will be transparently 106 - * decoded using their schema in the schema registry. 107 - * 108 - * Note overridden argument types, to only allow subscribing to/processing 109 - * messages from known topics. 110 - */ 111 - export default class SchemaAwareConsumer< 112 - EligibleTopicsSchemaMap extends AnyTopicSchemaMap, 113 - > { 114 - readonly #registry: SchemaRegistry<EligibleTopicsSchemaMap>; 115 - readonly #consumer: Consumer; 116 - public readonly config: ConsumerConfig; 117 - 118 - constructor( 119 - client: KafkaJS, 120 - registry: SchemaRegistry<EligibleTopicsSchemaMap>, 121 - config: ConsumerConfig, 122 - ) { 123 - this.config = config; 124 - this.#registry = registry; 125 - this.#consumer = client.consumer(config); 126 - } 127 - 128 - async #decodeMessage(message: KafkaMessage) { 129 - const [key, value] = await Promise.all([ 130 - message.key ? this.#registry.decode(message.key) : message.key, 131 - message.value ? this.#registry.decode(message.value) : message.value, 132 - ]); 133 - 134 - return { 135 - ...message, 136 - key: key as KeyTypes<EligibleTopicsSchemaMap>, 137 - value: value as ValueTypes<EligibleTopicsSchemaMap>, 138 - }; 139 - } 140 - 141 - async run(config?: ConsumerRunConfig<EligibleTopicsSchemaMap>) { 142 - return this.#consumer.run({ 143 - // This cast helps TS understand that eachBatch and eachMessage, if 144 - // present on config, will always get overridden before being passed to 145 - // this.#consumer.run (i.e., will never be passed with the type defined in 146 - // ConsumerRunConfig<T>). 147 - ...(config as Omit<typeof config, 'eachBatch' | 'eachMessage'>), 148 - ...(config?.eachBatch 149 - ? { 150 - eachBatch: async (payload) => { 151 - // TODO: does this need plimit? It shouldn't bc the schema is 152 - // cached, but idk if the cache is smart enough to avoid a huge 153 - // spike in initial requests for the schema(s) if the batch kicks 154 - // off a lot of decodes at a time. 155 - const decodedMessages = await Promise.all( 156 - payload.batch.messages.map(async (msg) => 157 - this.#decodeMessage(msg), 158 - ), 159 - ); 160 - 161 - // We have to create the new batch by putting the original batch 162 - // in the prototype chain, in order for methods on the batch 163 - // object (like `lastOffset()`) to continue to work. We can't use 164 - // something like { ...origBatch, messages: decodedMessages } as 165 - // the new batch. 166 - return config.eachBatch!({ 167 - ...payload, 168 - batch: Object.create(payload.batch, { 169 - decodedMessages: { 170 - value: decodedMessages, 171 - writable: false, 172 - configurable: false, 173 - enumerable: true, 174 - }, 175 - }) as typeof payload.batch & { 176 - decodedMessages: typeof decodedMessages; 177 - }, 178 - }); 179 - }, 180 - } 181 - : {}), 182 - ...(config?.eachMessage 183 - ? { 184 - eachMessage: async (payload) => { 185 - return config.eachMessage!({ 186 - ...payload, 187 - message: await this.#decodeMessage(payload.message), 188 - }); 189 - }, 190 - } 191 - : {}), 192 - }); 193 - } 194 - 195 - // Bunch of blindly delegated methods below, although with arg types redefined 196 - // for some of them to limit the set of applicable topics like above. 197 - // 198 - // These delegated methods are explicitly enumerated on purpose (rather than 199 - // just, e.g., putting the kakfajs consumer instance in the prototype chain) 200 - // to make sure that the abstraction isn't leaky; i.e., that some KafkaJS API 201 - // isn't automatically delegated to that exposes messages without calling 202 - // registry.decode() on them. This choice of explicit delegation reflects that 203 - // I'd rather have the API surface be missing some KafkaJS methods (which can 204 - // easily be added if needed) than have the abstraction inadvertently leak. 205 - async subscribe(opts: ConsumerSubscribeTopics<EligibleTopicsSchemaMap>) { 206 - return this.#consumer.subscribe( 207 - // cast bc kafkajs' typings incorrectly fail to mark the `topics` key as 208 - // readonly (which it should be, since kafkajs doesn't mutate this array) 209 - opts as Omit<typeof opts, 'topics'> & { 210 - topics: Mutable<typeof opts.topics>; 211 - }, 212 - ); 213 - } 214 - 215 - async commitOffsets( 216 - topicPartitions: TopicPartitionOffsetAndMetadata<EligibleTopicsSchemaMap>[], 217 - ) { 218 - try { 219 - return await this.#consumer.commitOffsets(topicPartitions); 220 - } catch (e) { 221 - // We want to unwrap the underlying KafkaJSProtocolError and throw that 222 - // instead. This is because there is logic within the KafkaJS library 223 - // that handles KafkaJSProtocolErrors, and will e.g. recover and 224 - // rejoin the group on errors that are associated with rebalancing. 225 - // However, the error thrown by consumer.commitOffsets() is always 226 - // wrapped in a KafkaJSNonRetriableError because it went through the 227 - // retrier already. This prevents the KafkaJSProtocolError from being 228 - // gracefully handled by the library unless we unwrap and throw it here. 229 - // 230 - // Alternatively we could turn on autoCommit for the simpler 231 - // consumers, which currently throws protocol errors directly. 232 - if (e instanceof KafkaJSError) { 233 - throw unwrapProtocolError(e) ?? e; 234 - } 235 - 236 - throw e; 237 - } 238 - } 239 - 240 - async seek( 241 - topicPartitionOffset: TopicPartitionOffset<EligibleTopicsSchemaMap>, 242 - ) { 243 - return this.#consumer.seek(topicPartitionOffset); 244 - } 245 - 246 - async pause(topics: TopicPartition<EligibleTopicsSchemaMap>[]) { 247 - return this.#consumer.pause(topics); 248 - } 249 - 250 - async resume(topics: TopicPartition<EligibleTopicsSchemaMap>[]) { 251 - return this.#consumer.resume(topics); 252 - } 253 - 254 - async stop() { 255 - return this.#consumer.stop(); 256 - } 257 - 258 - async connect() { 259 - return this.#consumer.connect(); 260 - } 261 - 262 - async disconnect() { 263 - return this.#consumer.disconnect(); 264 - } 265 - 266 - // Overloads copied straight from the KafkaJS typings. 267 - // This is hella ugly, but idk a better alternative. 268 - on( 269 - eventName: ConsumerEvents['HEARTBEAT'], 270 - listener: (event: ConsumerHeartbeatEvent) => void, 271 - ): RemoveInstrumentationEventListener<typeof eventName>; 272 - on( 273 - eventName: ConsumerEvents['COMMIT_OFFSETS'], 274 - listener: (event: ConsumerCommitOffsetsEvent) => void, 275 - ): RemoveInstrumentationEventListener<typeof eventName>; 276 - on( 277 - eventName: ConsumerEvents['GROUP_JOIN'], 278 - listener: (event: ConsumerGroupJoinEvent) => void, 279 - ): RemoveInstrumentationEventListener<typeof eventName>; 280 - on( 281 - eventName: ConsumerEvents['FETCH_START'], 282 - listener: (event: ConsumerFetchStartEvent) => void, 283 - ): RemoveInstrumentationEventListener<typeof eventName>; 284 - on( 285 - eventName: ConsumerEvents['FETCH'], 286 - listener: (event: ConsumerFetchEvent) => void, 287 - ): RemoveInstrumentationEventListener<typeof eventName>; 288 - on( 289 - eventName: ConsumerEvents['START_BATCH_PROCESS'], 290 - listener: (event: ConsumerStartBatchProcessEvent) => void, 291 - ): RemoveInstrumentationEventListener<typeof eventName>; 292 - on( 293 - eventName: ConsumerEvents['END_BATCH_PROCESS'], 294 - listener: (event: ConsumerEndBatchProcessEvent) => void, 295 - ): RemoveInstrumentationEventListener<typeof eventName>; 296 - on( 297 - eventName: ConsumerEvents['CONNECT'], 298 - listener: (event: ConnectEvent) => void, 299 - ): RemoveInstrumentationEventListener<typeof eventName>; 300 - on( 301 - eventName: ConsumerEvents['DISCONNECT'], 302 - listener: (event: DisconnectEvent) => void, 303 - ): RemoveInstrumentationEventListener<typeof eventName>; 304 - on( 305 - eventName: ConsumerEvents['STOP'], 306 - listener: (event: InstrumentationEvent<null>) => void, 307 - ): RemoveInstrumentationEventListener<typeof eventName>; 308 - on( 309 - eventName: ConsumerEvents['CRASH'], 310 - listener: (event: ConsumerCrashEvent) => void, 311 - ): RemoveInstrumentationEventListener<typeof eventName>; 312 - on( 313 - eventName: ConsumerEvents['REBALANCING'], 314 - listener: (event: ConsumerRebalancingEvent) => void, 315 - ): RemoveInstrumentationEventListener<typeof eventName>; 316 - on( 317 - eventName: ConsumerEvents['RECEIVED_UNSUBSCRIBED_TOPICS'], 318 - listener: (event: ConsumerReceivedUnsubcribedTopicsEvent) => void, 319 - ): RemoveInstrumentationEventListener<typeof eventName>; 320 - on( 321 - eventName: ConsumerEvents['REQUEST'], 322 - listener: (event: RequestEvent) => void, 323 - ): RemoveInstrumentationEventListener<typeof eventName>; 324 - on( 325 - eventName: ConsumerEvents['REQUEST_TIMEOUT'], 326 - listener: (event: RequestTimeoutEvent) => void, 327 - ): RemoveInstrumentationEventListener<typeof eventName>; 328 - on( 329 - eventName: ConsumerEvents['REQUEST_QUEUE_SIZE'], 330 - listener: (event: RequestQueueSizeEvent) => void, 331 - ): RemoveInstrumentationEventListener<typeof eventName>; 332 - on( 333 - eventName: ConsumerEvents[keyof ConsumerEvents], 334 - // The type parameter here has to be `any` (or some union that'd be hard to 335 - // generate), rather than unknown, for TS to allow the overloads. 336 - // eslint-disable-next-line @typescript-eslint/no-explicit-any 337 - listener: (event: InstrumentationEvent<any>) => void, 338 - ): RemoveInstrumentationEventListener<typeof eventName> { 339 - return this.#consumer.on(eventName, listener); 340 - } 341 - 342 - public get events() { 343 - return this.#consumer.events; 344 - } 345 - } 346 - 347 - // Helper function to unwrap the underlying KafkaJSProtocolError from a 348 - // KafkaJSError, if present. 349 - function unwrapProtocolError(e: KafkaJSErrorType): KafkaJSProtocolErrorType | undefined { 350 - if (e instanceof KafkaJSProtocolError) { 351 - return e; 352 - } 353 - 354 - if (e.cause && e.cause instanceof KafkaJSError) { 355 - return unwrapProtocolError(e.cause); 356 - } 357 - 358 - return undefined; 359 - }

-175

server/kafka/SchemaAwareProducer.ts

··· 1 - import type { 2 - ConnectEvent, 3 - DisconnectEvent, 4 - InstrumentationEvent, 5 - Kafka as KafkaJS, 6 - ProducerBatch as KafkaJSProducerBatch, 7 - ProducerRecord as KafkaJSProducerRecord, 8 - Message as KafkaJSWriteMessage, 9 - Producer, 10 - ProducerConfig, 11 - ProducerEvents, 12 - RemoveInstrumentationEventListener, 13 - RequestEvent, 14 - RequestQueueSizeEvent, 15 - RequestTimeoutEvent, 16 - } from 'kafkajs'; 17 - 18 - import { 19 - type AnyTopicSchemaMap, 20 - type KeyTypes, 21 - type SchemaRegistry, 22 - type ValueTypes, 23 - } from './SchemaAwareClient.js'; 24 - // This is imported just so that the docblock comment can link to it. 25 - // eslint-disable-next-line @typescript-eslint/no-unused-vars 26 - import SchemaAwareConsumer from './SchemaAwareConsumer.js'; 27 - 28 - // Represents a message to produce to a topic before it's encoded. 29 - // NB: for best accuracy, Topic should be instantiated w/ a single string 30 - // literal type (as we do in ProducerBatch) rather than a union of literals. 31 - type TopicMessage<T extends AnyTopicSchemaMap, Topic extends keyof T> = Omit< 32 - KafkaJSWriteMessage, 33 - 'key' | 'value' 34 - > & { 35 - key?: KeyTypes<Pick<T, Topic>>; 36 - value: ValueTypes<Pick<T, Topic>>; 37 - }; 38 - 39 - type TopicMessages<T extends AnyTopicSchemaMap, Topic extends keyof T> = { 40 - topic: Topic; 41 - messages: TopicMessage<T, Topic>[]; 42 - }; 43 - 44 - type ProducerRecord<T extends AnyTopicSchemaMap, Topic extends keyof T> = Omit< 45 - KafkaJSProducerRecord, 46 - 'topic' | 'messages' 47 - > & 48 - TopicMessages<T, Topic>; 49 - 50 - type ProducerBatch<T extends AnyTopicSchemaMap, Topics extends keyof T> = Omit< 51 - KafkaJSProducerBatch, 52 - 'topicMessages' 53 - > & { topicMessages: { [Topic in Topics]: TopicMessages<T, Topic> }[Topics][] }; 54 - 55 - /** 56 - * This class is analogous to the {@link SchemaAwareConsumer} class, 57 - * so see that class for details behind the implementation rationale. 58 - * 59 - * TODO: support producer transactions. 60 - */ 61 - export default class SchemaAwareProducer<T extends AnyTopicSchemaMap> { 62 - readonly #schemaMap: T; 63 - readonly #registry: SchemaRegistry<T>; 64 - readonly #producer: Producer; 65 - public readonly config: ProducerConfig | undefined; 66 - 67 - constructor( 68 - client: KafkaJS, 69 - registry: SchemaRegistry<T>, 70 - schemaMap: T, 71 - config?: ProducerConfig, 72 - ) { 73 - this.config = config; 74 - this.#registry = registry; 75 - this.#schemaMap = schemaMap; 76 - this.#producer = client.producer(config); 77 - } 78 - 79 - async #encodeTopicMessage<Topic extends keyof T>( 80 - topic: Topic, 81 - message: TopicMessage<T, Topic>, 82 - ) { 83 - const { keySchema, valueSchema } = this.#schemaMap[topic]; 84 - const [key, value] = await Promise.all([ 85 - message.key != null 86 - ? this.#registry.encode(keySchema, message.key) 87 - : null, 88 - message.value != null 89 - ? this.#registry.encode(valueSchema, message.value) 90 - : null, 91 - ]); 92 - 93 - return { ...message, key, value }; 94 - } 95 - 96 - async #encodeTopicMessages<Topic extends keyof T>( 97 - it: TopicMessages<T, Topic>, 98 - ) { 99 - return Promise.all( 100 - // We don't make the map callback async as that just wastefully allocates 101 - // (a lot) of extra promises. (We're already ensured that synchronosuly 102 - // thrown errors in `#encodeTopicMessage` will be handled correctly 103 - // because it's an async function.) 104 - // eslint-disable-next-line @typescript-eslint/promise-function-async 105 - it.messages.map((message) => this.#encodeTopicMessage(it.topic, message)), 106 - ); 107 - } 108 - 109 - async send<Topic extends keyof T & string>(record: ProducerRecord<T, Topic>) { 110 - return this.#producer.send({ 111 - ...record, 112 - messages: await this.#encodeTopicMessages(record), 113 - }); 114 - } 115 - 116 - async sendBatch<Topics extends keyof T & string>( 117 - batch: ProducerBatch<T, Topics>, 118 - ) { 119 - return this.#producer.sendBatch({ 120 - ...batch, 121 - topicMessages: await Promise.all( 122 - batch.topicMessages.map(async (it) => ({ 123 - ...it, 124 - messages: await this.#encodeTopicMessages(it), 125 - })), 126 - ), 127 - }); 128 - } 129 - 130 - async connect() { 131 - return this.#producer.connect(); 132 - } 133 - 134 - async disconnect() { 135 - return this.#producer.disconnect(); 136 - } 137 - 138 - isIdempotent() { 139 - return this.#producer.isIdempotent(); 140 - } 141 - 142 - get events() { 143 - return this.#producer.events; 144 - } 145 - 146 - on( 147 - eventName: ProducerEvents['CONNECT'], 148 - listener: (event: ConnectEvent) => void, 149 - ): RemoveInstrumentationEventListener<typeof eventName>; 150 - on( 151 - eventName: ProducerEvents['DISCONNECT'], 152 - listener: (event: DisconnectEvent) => void, 153 - ): RemoveInstrumentationEventListener<typeof eventName>; 154 - on( 155 - eventName: ProducerEvents['REQUEST'], 156 - listener: (event: RequestEvent) => void, 157 - ): RemoveInstrumentationEventListener<typeof eventName>; 158 - on( 159 - eventName: ProducerEvents['REQUEST_QUEUE_SIZE'], 160 - listener: (event: RequestQueueSizeEvent) => void, 161 - ): RemoveInstrumentationEventListener<typeof eventName>; 162 - on( 163 - eventName: ProducerEvents['REQUEST_TIMEOUT'], 164 - listener: (event: RequestTimeoutEvent) => void, 165 - ): RemoveInstrumentationEventListener<typeof eventName>; 166 - on( 167 - eventName: ProducerEvents[keyof ProducerEvents], 168 - // The type parameter here has to be `any` (or some union that'd be hard to 169 - // generate), rather than unknown, for TS to allow the overloads. 170 - // eslint-disable-next-line @typescript-eslint/no-explicit-any 171 - listener: (event: InstrumentationEvent<any>) => void, 172 - ): RemoveInstrumentationEventListener<typeof eventName> { 173 - return this.#producer.on(eventName, listener); 174 - } 175 - }

-46

server/kafka/index.ts

··· 1 - import kafkaJs from 'kafkajs'; 2 - 3 - import { KafkajsZstdCompressionCodec } from './KafkajsZstdCompressionCodec.js'; 4 - import SchemaAwareKafkaClient, { 5 - SchemaRegistry, 6 - type SchemaIdFor, 7 - } from './SchemaAwareClient.js'; 8 - import { 9 - type ConsumerRunConfig, 10 - type DecodedMessage, 11 - } from './SchemaAwareConsumer.js'; 12 - import type SchemaAwareConsumer from './SchemaAwareConsumer.js'; 13 - import type SchemaAwareProducer from './SchemaAwareProducer.js'; 14 - 15 - // Only the wrapper client class is exported, not the consumer/producer classes. 16 - export default SchemaAwareKafkaClient; 17 - 18 - const { CompressionCodecs, CompressionTypes } = kafkaJs; 19 - 20 - // The line below will allow producers to generate, and consumers to read, 21 - // messages compressed w/ zstd. However, it doesn't require (or automatically 22 - // opt-in) the producers to using compression, nor does it stop the consumers 23 - // from reading uncompressed messages. 24 - // 25 - // In Kafkajs, the registered compression codecs are global, so there's no way 26 - // to (e.g.) provide different detailed compression options per client/ 27 - // producer/topic/message batch. In other words, any messages that request 28 - // compression w/ zstd will get this compression level 5, which is a bit 29 - // annoying because different topics might warrant different compression levels. 30 - // See https://github.com/tulios/kafkajs/issues/1553 31 - // 32 - // Given that this setting is global, we also can't expose any 33 - // compression-related options on the classes we export from this module, 34 - // as they can't do any sort of local override. 35 - CompressionCodecs[CompressionTypes.ZSTD] = () => 36 - new KafkajsZstdCompressionCodec(5); 37 - 38 - export type { 39 - SchemaIdFor, 40 - DecodedMessage, 41 - SchemaAwareKafkaClient as Kafka, 42 - SchemaAwareProducer as KafkaProducer, 43 - SchemaAwareConsumer as KafkaConsumer, 44 - ConsumerRunConfig as KafkaConsumerRunConfig, 45 - }; 46 - export { SchemaRegistry };

-117

server/kafka/itemQueueBulkWrite.ts

··· 1 - import DataLoader from 'dataloader'; 2 - import { CompressionTypes } from 'kafkajs'; 3 - 4 - import { 5 - type ItemSubmissionKafkaMessageValue, 6 - type KafkaSchemaMap, 7 - } from '../iocContainer/index.js'; 8 - import { type Kafka, type KafkaProducer } from '../kafka/index.js'; 9 - import { sleep } from '../utils/misc.js'; 10 - 11 - type ITEM_SUBMISSION_SCHEMAS = 12 - | 'ITEM_SUBMISSION_EVENTS' 13 - | 'ITEM_SUBMISSION_EVENTS_RETRY_0'; 14 - 15 - /** 16 - * Factory for a service that'll write to Kafka after batching the writes, 17 - * returns to the caller after the whole batch has been written. 18 - */ 19 - function makeItemQueueBulkWrite( 20 - kafka: Kafka<Pick<KafkaSchemaMap, ITEM_SUBMISSION_SCHEMAS>>, 21 - topic: ITEM_SUBMISSION_SCHEMAS, 22 - ) { 23 - const kafkaProducer = kafka.producer(); 24 - let connectError: Error | undefined; 25 - const initialConnectPromise = kafkaProducer.connect().catch((err: unknown) => { 26 - // Store the error to prevent an unhandled promise rejection from crashing 27 - // the process. We re-throw it when callers attempt to write to Kafka. 28 - connectError = err instanceof Error ? err : new Error(String(err)); 29 - }); 30 - const batchTimeout = 500; 31 - 32 - const loader: DataLoader<ItemSubmissionKafkaMessageValue, void> = 33 - new DataLoader( 34 - async (data) => 35 - bulkWrite(kafkaProducer, data, topic).then(() => 36 - new Array(data.length).fill(undefined), 37 - ), 38 - { 39 - cache: false, 40 - batch: true, 41 - maxBatchSize: 200, 42 - batchScheduleFn(cb) { 43 - setTimeout(cb, batchTimeout); 44 - }, 45 - }, 46 - ); 47 - 48 - async function itemQueueBulkWrite( 49 - items: readonly ItemSubmissionKafkaMessageValue[], 50 - skipBatch: boolean = false, 51 - ) { 52 - await initialConnectPromise; 53 - if (connectError) { 54 - throw connectError; 55 - } 56 - // bulkWrite and loader.loadMany have different return types, so we have to 57 - // handle their returns separately and construct a homogenous return type in 58 - // each case, in addition to the logical difference of using batching or not 59 - if (skipBatch) { 60 - try { 61 - await bulkWrite(kafkaProducer, items, topic); 62 - return { error: false, results: [] }; 63 - } catch (err) { 64 - return { error: true, results: [err] }; 65 - } 66 - } else { 67 - // loader.loadMany never throws, just return error objects in it's 68 - // response 69 - const response = await loader.loadMany(items); 70 - if (response.some((r) => r instanceof Error)) { 71 - return { 72 - error: true, 73 - results: response, 74 - }; 75 - } 76 - return { 77 - error: false, 78 - results: [], 79 - }; 80 - } 81 - } 82 - 83 - itemQueueBulkWrite.close = async () => { 84 - // make sure the latest batch of writes has been flushed to kafka before we 85 - // attempt to disconnect. This should be the last batch, assuming 86 - // bulkWrite isn't called again after `close()` is called. 87 - await sleep(batchTimeout + 1000); 88 - await kafkaProducer.disconnect(); 89 - }; 90 - 91 - return itemQueueBulkWrite; 92 - } 93 - 94 - export type ItemQueueBulkWrite = ReturnType<typeof makeItemQueueBulkWrite>; 95 - 96 - export { makeItemQueueBulkWrite }; 97 - 98 - async function bulkWrite( 99 - kafka: KafkaProducer<Pick<KafkaSchemaMap, ITEM_SUBMISSION_SCHEMAS>>, 100 - data: readonly ItemSubmissionKafkaMessageValue[], 101 - topic: ITEM_SUBMISSION_SCHEMAS, 102 - ) { 103 - if (!data.length) { 104 - return; 105 - } 106 - 107 - await kafka.send({ 108 - topic, 109 - compression: CompressionTypes.ZSTD, 110 - messages: data.map((msg) => ({ 111 - key: { 112 - syntheticThreadId: msg.metadata.syntheticThreadId, 113 - }, 114 - value: msg, 115 - })), 116 - }); 117 - }

-37

server/kafka/logger.ts

··· 1 - /* eslint-disable no-console */ 2 - import util from 'util'; 3 - import { logLevel, type logCreator } from 'kafkajs'; 4 - 5 - import { assertUnreachable } from '../utils/misc.js'; 6 - 7 - // This is almost a carbon copy of the default logger in kafkajs, but it uses 8 - // util.inspect instead of JSON.stringify to avoid errors when logging circular 9 - // objects. See: 10 - // https://github.com/tulios/kafkajs/blob/master/src/loggers/console.js 11 - // https://github.com/tulios/kafkajs/issues/975 12 - const logCreator: logCreator = 13 - () => 14 - ({ namespace, level, label, log }) => { 15 - const prefix = namespace ? `[${namespace}] ` : ''; 16 - const message = util.inspect({ 17 - level: label, 18 - ...log, 19 - message: `${prefix}${log.message}`, 20 - }); 21 - switch (level) { 22 - case logLevel.INFO: 23 - return console.info(message); 24 - case logLevel.ERROR: 25 - return console.error(message); 26 - case logLevel.WARN: 27 - return console.warn(message); 28 - case logLevel.DEBUG: 29 - return console.log(message); 30 - case logLevel.NOTHING: 31 - return; 32 - default: 33 - assertUnreachable(level); 34 - } 35 - }; 36 - 37 - export default logCreator;

+1 -392

server/package-lock.json

··· 19 19 "@graphql-tools/merge": "^8.2.10", 20 20 "@graphql-tools/schema": "^8.5.1", 21 21 "@graphql-tools/utils": "^9.2.1", 22 - "@kafkajs/confluent-schema-registry": "^3.3.0", 23 - "@mongodb-js/zstd": "^7.0.0", 24 22 "@node-saml/passport-saml": "^5.1.0", 25 23 "@opentelemetry/api": "^1.8.0", 26 24 "@opentelemetry/semantic-conventions": "^1.22.0", ··· 49 47 "apollo-datasource": "^3.3.0", 50 48 "apollo-server-core": "^3.11.1", 51 49 "apollo-server-express": "^3.10.3", 52 - "avsc": "^5.7.7", 53 50 "bcryptjs": "^2.4.3", 54 51 "bullmq": "^5.0.0", 55 52 "cassandra-driver": "^4.8.0", ··· 73 70 "helmet": "^4.6.0", 74 71 "ioredis": "^5.2.4", 75 72 "jsonwebtoken": "^9.0.3", 76 - "kafkajs": "^2.1.0", 77 73 "knex": "^2.3.0", 78 74 "kysely": "0.26.1", 79 75 "latlon-geohash": "^2.0.0", ··· 3012 3008 "url": "https://opencollective.com/js-sdsl" 3013 3009 } 3014 3010 }, 3015 - "node_modules/@kafkajs/confluent-schema-registry": { 3016 - "version": "3.3.0", 3017 - "resolved": "https://registry.npmjs.org/@kafkajs/confluent-schema-registry/-/confluent-schema-registry-3.3.0.tgz", 3018 - "integrity": "sha512-ImuqcHdJuJLNvfDgQwPP0EEKY4yzwGJ1t0Jyf0aAwWK2TGExXTQmeXCtcu3rdSHBEwrS7T30c6HNVxBPVA1hHQ==", 3019 - "dependencies": { 3020 - "ajv": "^7.1.0", 3021 - "avsc": ">= 5.4.13 < 6", 3022 - "mappersmith": ">= 2.30.1 < 3", 3023 - "protobufjs": "^6.10.1" 3024 - } 3025 - }, 3026 - "node_modules/@kafkajs/confluent-schema-registry/node_modules/ajv": { 3027 - "version": "7.2.4", 3028 - "resolved": "https://registry.npmjs.org/ajv/-/ajv-7.2.4.tgz", 3029 - "integrity": "sha512-nBeQgg/ZZA3u3SYxyaDvpvDtgZ/EZPF547ARgZBrG9Bhu1vKDwAIjtIf+sDtJUKa2zOcEbmRLBRSyMraS/Oy1A==", 3030 - "dependencies": { 3031 - "fast-deep-equal": "^3.1.1", 3032 - "json-schema-traverse": "^1.0.0", 3033 - "require-from-string": "^2.0.2", 3034 - "uri-js": "^4.2.2" 3035 - }, 3036 - "funding": { 3037 - "type": "github", 3038 - "url": "https://github.com/sponsors/epoberezkin" 3039 - } 3040 - }, 3041 3011 "node_modules/@lukeed/csprng": { 3042 3012 "version": "1.1.0", 3043 3013 "resolved": "https://registry.npmjs.org/@lukeed/csprng/-/csprng-1.1.0.tgz", 3044 3014 "integrity": "sha512-Z7C/xXCiGWsg0KuKsHTKJxbWhpI3Vs5GwLfOean7MGyVFGqdRgBbAjOCh6u4bbjPc/8MJ2pZmK/0DLdCbivLDA==", 3045 3015 "engines": { 3046 3016 "node": ">=8" 3047 - } 3048 - }, 3049 - "node_modules/@mongodb-js/zstd": { 3050 - "version": "7.0.0", 3051 - "resolved": "https://registry.npmjs.org/@mongodb-js/zstd/-/zstd-7.0.0.tgz", 3052 - "integrity": "sha512-mQ2s0pYYiav+tzCDR05Zptem8Ey2v8s11lri5RKGhTtL4COVCvVCk5vtyRYNT+9L8qSfyOqqefF9UtnW8mC5jA==", 3053 - "hasInstallScript": true, 3054 - "license": "Apache-2.0", 3055 - "dependencies": { 3056 - "node-addon-api": "^8.5.0", 3057 - "prebuild-install": "^7.1.3" 3058 - }, 3059 - "engines": { 3060 - "node": ">= 20.19.0" 3061 3017 } 3062 3018 }, 3063 3019 "node_modules/@msgpackr-extract/msgpackr-extract-darwin-arm64": { ··· 12036 11992 "url": "https://github.com/sponsors/ljharb" 12037 11993 } 12038 11994 }, 12039 - "node_modules/avsc": { 12040 - "version": "5.7.7", 12041 - "resolved": "https://registry.npmjs.org/avsc/-/avsc-5.7.7.tgz", 12042 - "integrity": "sha512-9cYNccliXZDByFsFliVwk5GvTq058Fj513CiR4E60ndDwmuXzTJEp/Bp8FyuRmGyYupLjHLs+JA9/CBoVS4/NQ==", 12043 - "engines": { 12044 - "node": ">=0.11" 12045 - } 12046 - }, 12047 11995 "node_modules/axios": { 12048 11996 "version": "1.13.2", 12049 11997 "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.2.tgz", ··· 12194 12142 "node": "*" 12195 12143 } 12196 12144 }, 12197 - "node_modules/bl": { 12198 - "version": "4.1.0", 12199 - "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", 12200 - "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", 12201 - "license": "MIT", 12202 - "dependencies": { 12203 - "buffer": "^5.5.0", 12204 - "inherits": "^2.0.4", 12205 - "readable-stream": "^3.4.0" 12206 - } 12207 - }, 12208 12145 "node_modules/body-parser": { 12209 12146 "version": "1.20.4", 12210 12147 "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz", ··· 12310 12247 "node-int64": "^0.4.0" 12311 12248 } 12312 12249 }, 12313 - "node_modules/buffer": { 12314 - "version": "5.7.1", 12315 - "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", 12316 - "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", 12317 - "funding": [ 12318 - { 12319 - "type": "github", 12320 - "url": "https://github.com/sponsors/feross" 12321 - }, 12322 - { 12323 - "type": "patreon", 12324 - "url": "https://www.patreon.com/feross" 12325 - }, 12326 - { 12327 - "type": "consulting", 12328 - "url": "https://feross.org/support" 12329 - } 12330 - ], 12331 - "license": "MIT", 12332 - "dependencies": { 12333 - "base64-js": "^1.3.1", 12334 - "ieee754": "^1.1.13" 12335 - } 12336 - }, 12337 12250 "node_modules/buffer-equal-constant-time": { 12338 12251 "version": "1.0.1", 12339 12252 "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", ··· 12525 12438 "engines": { 12526 12439 "node": "*" 12527 12440 } 12528 - }, 12529 - "node_modules/chownr": { 12530 - "version": "1.1.4", 12531 - "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", 12532 - "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", 12533 - "license": "ISC" 12534 12441 }, 12535 12442 "node_modules/ci-info": { 12536 12443 "version": "3.8.0", ··· 12955 12862 "node": ">=0.10" 12956 12863 } 12957 12864 }, 12958 - "node_modules/decompress-response": { 12959 - "version": "6.0.0", 12960 - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", 12961 - "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", 12962 - "license": "MIT", 12963 - "dependencies": { 12964 - "mimic-response": "^3.1.0" 12965 - }, 12966 - "engines": { 12967 - "node": ">=10" 12968 - }, 12969 - "funding": { 12970 - "url": "https://github.com/sponsors/sindresorhus" 12971 - } 12972 - }, 12973 12865 "node_modules/dedent": { 12974 12866 "version": "1.5.1", 12975 12867 "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.1.tgz", ··· 12982 12874 "babel-plugin-macros": { 12983 12875 "optional": true 12984 12876 } 12985 - } 12986 - }, 12987 - "node_modules/deep-extend": { 12988 - "version": "0.6.0", 12989 - "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", 12990 - "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", 12991 - "license": "MIT", 12992 - "engines": { 12993 - "node": ">=4.0.0" 12994 12877 } 12995 12878 }, 12996 12879 "node_modules/deep-is": { ··· 13077 12960 "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", 13078 12961 "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", 13079 12962 "license": "Apache-2.0", 12963 + "optional": true, 13080 12964 "engines": { 13081 12965 "node": ">=8" 13082 12966 } ··· 14287 14171 "node": ">= 0.8.0" 14288 14172 } 14289 14173 }, 14290 - "node_modules/expand-template": { 14291 - "version": "2.0.3", 14292 - "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", 14293 - "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", 14294 - "license": "(MIT OR WTFPL)", 14295 - "engines": { 14296 - "node": ">=6" 14297 - } 14298 - }, 14299 14174 "node_modules/expect": { 14300 14175 "version": "29.6.2", 14301 14176 "resolved": "https://registry.npmjs.org/expect/-/expect-29.6.2.tgz", ··· 14755 14630 "integrity": "sha512-twe20eF1OxVxp/ML/kq2p1uc6KvFK/+vs8WjEbeKmV2He22MKm7YF2ANIt+EOqhJ5L3K/SuuPhk0hWQDjOM23g==", 14756 14631 "dev": true 14757 14632 }, 14758 - "node_modules/fs-constants": { 14759 - "version": "1.0.0", 14760 - "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", 14761 - "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", 14762 - "license": "MIT" 14763 - }, 14764 14633 "node_modules/fs.realpath": { 14765 14634 "version": "1.0.0", 14766 14635 "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", ··· 15069 14938 "version": "2.3.0", 15070 14939 "resolved": "https://registry.npmjs.org/getopts/-/getopts-2.3.0.tgz", 15071 14940 "integrity": "sha512-5eDf9fuSXwxBL6q5HX+dhDj+dslFGWzU5thZ9kNKUkcPtaPdatmUFKwHFrLb/uf/WpA4BHET+AX3Scl56cAjpA==" 15072 - }, 15073 - "node_modules/github-from-package": { 15074 - "version": "0.0.0", 15075 - "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", 15076 - "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", 15077 - "license": "MIT" 15078 14941 }, 15079 14942 "node_modules/glob-parent": { 15080 14943 "version": "6.0.2", ··· 15634 15497 "engines": { 15635 15498 "node": ">=0.10.0" 15636 15499 } 15637 - }, 15638 - "node_modules/ieee754": { 15639 - "version": "1.2.1", 15640 - "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", 15641 - "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", 15642 - "funding": [ 15643 - { 15644 - "type": "github", 15645 - "url": "https://github.com/sponsors/feross" 15646 - }, 15647 - { 15648 - "type": "patreon", 15649 - "url": "https://www.patreon.com/feross" 15650 - }, 15651 - { 15652 - "type": "consulting", 15653 - "url": "https://feross.org/support" 15654 - } 15655 - ], 15656 - "license": "BSD-3-Clause" 15657 15500 }, 15658 15501 "node_modules/ignore": { 15659 15502 "version": "5.3.1", ··· 15736 15579 "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", 15737 15580 "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" 15738 15581 }, 15739 - "node_modules/ini": { 15740 - "version": "1.3.8", 15741 - "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", 15742 - "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", 15743 - "license": "ISC" 15744 - }, 15745 15582 "node_modules/internal-slot": { 15746 15583 "version": "1.0.5", 15747 15584 "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.5.tgz", ··· 17030 16867 "safe-buffer": "^5.0.1" 17031 16868 } 17032 16869 }, 17033 - "node_modules/kafkajs": { 17034 - "version": "2.2.4", 17035 - "resolved": "https://registry.npmjs.org/kafkajs/-/kafkajs-2.2.4.tgz", 17036 - "integrity": "sha512-j/YeapB1vfPT2iOIUn/vxdyKEuhuY2PxMBvf5JWux6iSaukAccrMtXEY/Lb7OvavDhOWME589bpLrEdnVHjfjA==", 17037 - "engines": { 17038 - "node": ">=14.0.0" 17039 - } 17040 - }, 17041 16870 "node_modules/kleur": { 17042 16871 "version": "3.0.3", 17043 16872 "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", ··· 17313 17142 "integrity": "sha512-CkYQrPYZfWnu/DAmVCpTSX/xHpKZ80eKh2lAkyA6AJTef6bW+6JpbQZN5rofum7da+SyN1bi5ctTm+lTfcCW3g==", 17314 17143 "dev": true 17315 17144 }, 17316 - "node_modules/mappersmith": { 17317 - "version": "2.41.0", 17318 - "resolved": "https://registry.npmjs.org/mappersmith/-/mappersmith-2.41.0.tgz", 17319 - "integrity": "sha512-kg2PXCFiM0WBMKZSgkvVBhF6SV7vdDEUsql5+DGdEHYZx9cwK+2QCSnsZ2EI4oyIWHMdIN/l+4yiyXJTsH7PcQ==" 17320 - }, 17321 17145 "node_modules/math-intrinsics": { 17322 17146 "version": "1.1.0", 17323 17147 "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", ··· 17429 17253 "node": ">=6" 17430 17254 } 17431 17255 }, 17432 - "node_modules/mimic-response": { 17433 - "version": "3.1.0", 17434 - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", 17435 - "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", 17436 - "license": "MIT", 17437 - "engines": { 17438 - "node": ">=10" 17439 - }, 17440 - "funding": { 17441 - "url": "https://github.com/sponsors/sindresorhus" 17442 - } 17443 - }, 17444 17256 "node_modules/minimatch": { 17445 17257 "version": "3.1.2", 17446 17258 "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", ··· 17479 17291 "engines": { 17480 17292 "node": ">=10" 17481 17293 } 17482 - }, 17483 - "node_modules/mkdirp-classic": { 17484 - "version": "0.5.3", 17485 - "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", 17486 - "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", 17487 - "license": "MIT" 17488 17294 }, 17489 17295 "node_modules/moment": { 17490 17296 "version": "2.29.4", ··· 17541 17347 "@msgpackr-extract/msgpackr-extract-win32-x64": "3.0.3" 17542 17348 } 17543 17349 }, 17544 - "node_modules/napi-build-utils": { 17545 - "version": "2.0.0", 17546 - "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", 17547 - "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", 17548 - "license": "MIT" 17549 - }, 17550 17350 "node_modules/natural-compare": { 17551 17351 "version": "1.4.0", 17552 17352 "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", ··· 17594 17394 "node-gyp-build-test": "build-test.js" 17595 17395 } 17596 17396 }, 17597 - "node_modules/node-abi": { 17598 - "version": "3.87.0", 17599 - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.87.0.tgz", 17600 - "integrity": "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ==", 17601 - "license": "MIT", 17602 - "dependencies": { 17603 - "semver": "^7.3.5" 17604 - }, 17605 - "engines": { 17606 - "node": ">=10" 17607 - } 17608 - }, 17609 17397 "node_modules/node-abort-controller": { 17610 17398 "version": "3.1.1", 17611 17399 "resolved": "https://registry.npmjs.org/node-abort-controller/-/node-abort-controller-3.1.1.tgz", 17612 17400 "integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==" 17613 - }, 17614 - "node_modules/node-addon-api": { 17615 - "version": "8.5.0", 17616 - "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.5.0.tgz", 17617 - "integrity": "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A==", 17618 - "license": "MIT", 17619 - "engines": { 17620 - "node": "^18 || ^20 || >= 21" 17621 - } 17622 17401 }, 17623 17402 "node_modules/node-cleanup": { 17624 17403 "version": "2.1.2", ··· 18478 18257 "node": ">=0.10.0" 18479 18258 } 18480 18259 }, 18481 - "node_modules/prebuild-install": { 18482 - "version": "7.1.3", 18483 - "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", 18484 - "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", 18485 - "license": "MIT", 18486 - "dependencies": { 18487 - "detect-libc": "^2.0.0", 18488 - "expand-template": "^2.0.3", 18489 - "github-from-package": "0.0.0", 18490 - "minimist": "^1.2.3", 18491 - "mkdirp-classic": "^0.5.3", 18492 - "napi-build-utils": "^2.0.0", 18493 - "node-abi": "^3.3.0", 18494 - "pump": "^3.0.0", 18495 - "rc": "^1.2.7", 18496 - "simple-get": "^4.0.0", 18497 - "tar-fs": "^2.0.0", 18498 - "tunnel-agent": "^0.6.0" 18499 - }, 18500 - "bin": { 18501 - "prebuild-install": "bin.js" 18502 - }, 18503 - "engines": { 18504 - "node": ">=10" 18505 - } 18506 - }, 18507 18260 "node_modules/prelude-ls": { 18508 18261 "version": "1.2.1", 18509 18262 "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", ··· 18600 18353 "node": ">=12.0.0" 18601 18354 } 18602 18355 }, 18603 - "node_modules/protobufjs": { 18604 - "version": "6.11.4", 18605 - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.4.tgz", 18606 - "integrity": "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==", 18607 - "hasInstallScript": true, 18608 - "dependencies": { 18609 - "@protobufjs/aspromise": "^1.1.2", 18610 - "@protobufjs/base64": "^1.1.2", 18611 - "@protobufjs/codegen": "^2.0.4", 18612 - "@protobufjs/eventemitter": "^1.1.0", 18613 - "@protobufjs/fetch": "^1.1.0", 18614 - "@protobufjs/float": "^1.0.2", 18615 - "@protobufjs/inquire": "^1.1.0", 18616 - "@protobufjs/path": "^1.1.2", 18617 - "@protobufjs/pool": "^1.1.0", 18618 - "@protobufjs/utf8": "^1.1.0", 18619 - "@types/long": "^4.0.1", 18620 - "@types/node": ">=13.7.0", 18621 - "long": "^4.0.0" 18622 - }, 18623 - "bin": { 18624 - "pbjs": "bin/pbjs", 18625 - "pbts": "bin/pbts" 18626 - } 18627 - }, 18628 18356 "node_modules/proxy-addr": { 18629 18357 "version": "2.0.7", 18630 18358 "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", ··· 18663 18391 }, 18664 18392 "engines": { 18665 18393 "node": ">= 0.10" 18666 - } 18667 - }, 18668 - "node_modules/pump": { 18669 - "version": "3.0.3", 18670 - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz", 18671 - "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==", 18672 - "license": "MIT", 18673 - "dependencies": { 18674 - "end-of-stream": "^1.1.0", 18675 - "once": "^1.3.1" 18676 18394 } 18677 18395 }, 18678 18396 "node_modules/punycode": { ··· 18781 18499 "node": ">= 0.8" 18782 18500 } 18783 18501 }, 18784 - "node_modules/rc": { 18785 - "version": "1.2.8", 18786 - "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", 18787 - "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", 18788 - "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", 18789 - "dependencies": { 18790 - "deep-extend": "^0.6.0", 18791 - "ini": "~1.3.0", 18792 - "minimist": "^1.2.0", 18793 - "strip-json-comments": "~2.0.1" 18794 - }, 18795 - "bin": { 18796 - "rc": "cli.js" 18797 - } 18798 - }, 18799 - "node_modules/rc/node_modules/strip-json-comments": { 18800 - "version": "2.0.1", 18801 - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", 18802 - "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", 18803 - "license": "MIT", 18804 - "engines": { 18805 - "node": ">=0.10.0" 18806 - } 18807 - }, 18808 18502 "node_modules/react-is": { 18809 18503 "version": "18.2.0", 18810 18504 "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz", ··· 19473 19167 "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", 19474 19168 "dev": true 19475 19169 }, 19476 - "node_modules/simple-concat": { 19477 - "version": "1.0.1", 19478 - "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", 19479 - "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", 19480 - "funding": [ 19481 - { 19482 - "type": "github", 19483 - "url": "https://github.com/sponsors/feross" 19484 - }, 19485 - { 19486 - "type": "patreon", 19487 - "url": "https://www.patreon.com/feross" 19488 - }, 19489 - { 19490 - "type": "consulting", 19491 - "url": "https://feross.org/support" 19492 - } 19493 - ], 19494 - "license": "MIT" 19495 - }, 19496 - "node_modules/simple-get": { 19497 - "version": "4.0.1", 19498 - "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", 19499 - "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", 19500 - "funding": [ 19501 - { 19502 - "type": "github", 19503 - "url": "https://github.com/sponsors/feross" 19504 - }, 19505 - { 19506 - "type": "patreon", 19507 - "url": "https://www.patreon.com/feross" 19508 - }, 19509 - { 19510 - "type": "consulting", 19511 - "url": "https://feross.org/support" 19512 - } 19513 - ], 19514 - "license": "MIT", 19515 - "dependencies": { 19516 - "decompress-response": "^6.0.0", 19517 - "once": "^1.3.1", 19518 - "simple-concat": "^1.0.0" 19519 - } 19520 - }, 19521 19170 "node_modules/sisteransi": { 19522 19171 "version": "1.0.5", 19523 19172 "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", ··· 20032 19681 "node": ">=6" 20033 19682 } 20034 19683 }, 20035 - "node_modules/tar-fs": { 20036 - "version": "2.1.4", 20037 - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", 20038 - "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", 20039 - "license": "MIT", 20040 - "dependencies": { 20041 - "chownr": "^1.1.1", 20042 - "mkdirp-classic": "^0.5.2", 20043 - "pump": "^3.0.0", 20044 - "tar-stream": "^2.1.4" 20045 - } 20046 - }, 20047 - "node_modules/tar-stream": { 20048 - "version": "2.2.0", 20049 - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", 20050 - "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", 20051 - "license": "MIT", 20052 - "dependencies": { 20053 - "bl": "^4.0.3", 20054 - "end-of-stream": "^1.4.1", 20055 - "fs-constants": "^1.0.0", 20056 - "inherits": "^2.0.3", 20057 - "readable-stream": "^3.1.1" 20058 - }, 20059 - "engines": { 20060 - "node": ">=6" 20061 - } 20062 - }, 20063 19684 "node_modules/tarn": { 20064 19685 "version": "3.0.2", 20065 19686 "resolved": "https://registry.npmjs.org/tarn/-/tarn-3.0.2.tgz", ··· 20428 20049 "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", 20429 20050 "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", 20430 20051 "dev": true 20431 - }, 20432 - "node_modules/tunnel-agent": { 20433 - "version": "0.6.0", 20434 - "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", 20435 - "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", 20436 - "license": "Apache-2.0", 20437 - "dependencies": { 20438 - "safe-buffer": "^5.0.1" 20439 - }, 20440 - "engines": { 20441 - "node": "*" 20442 - } 20443 20052 }, 20444 20053 "node_modules/type-check": { 20445 20054 "version": "0.4.0",

-4

server/package.json

··· 33 33 "@graphql-tools/merge": "^8.2.10", 34 34 "@graphql-tools/schema": "^8.5.1", 35 35 "@graphql-tools/utils": "^9.2.1", 36 - "@kafkajs/confluent-schema-registry": "^3.3.0", 37 - "@mongodb-js/zstd": "^7.0.0", 38 36 "@node-saml/passport-saml": "^5.1.0", 39 37 "@opentelemetry/api": "^1.8.0", 40 38 "@opentelemetry/semantic-conventions": "^1.22.0", ··· 63 61 "apollo-datasource": "^3.3.0", 64 62 "apollo-server-core": "^3.11.1", 65 63 "apollo-server-express": "^3.10.3", 66 - "avsc": "^5.7.7", 67 64 "bcryptjs": "^2.4.3", 68 65 "bullmq": "^5.0.0", 69 66 "cassandra-driver": "^4.8.0", ··· 87 84 "helmet": "^4.6.0", 88 85 "ioredis": "^5.2.4", 89 86 "jsonwebtoken": "^9.0.3", 90 - "kafkajs": "^2.1.0", 91 87 "knex": "^2.3.0", 92 88 "kysely": "0.26.1", 93 89 "latlon-geohash": "^2.0.0",

+95

server/queues/itemSubmissionQueue.ts

··· 1 + import { Queue } from 'bullmq'; 2 + import DataLoader from 'dataloader'; 3 + import type IORedis from 'ioredis'; 4 + import { type Cluster } from 'ioredis'; 5 + 6 + import { type ItemSubmissionMessageValue } from '../iocContainer/index.js'; 7 + import { sleep } from '../utils/misc.js'; 8 + 9 + export const ITEM_SUBMISSION_QUEUE_NAME = 'item-submission'; 10 + export const ITEM_SUBMISSION_DLQ_NAME = 'item-submission-dlq'; 11 + 12 + type RedisConnection = IORedis.Redis | Cluster; 13 + 14 + /** 15 + * Factory for a service that writes item submissions to BullMQ after batching, 16 + * returning to the caller after the whole batch has been enqueued. 17 + */ 18 + function makeItemSubmissionBulkWrite( 19 + redis: RedisConnection, 20 + queueName: string, 21 + ) { 22 + const queue = new Queue(queueName, { connection: redis }); 23 + 24 + const batchTimeout = 500; 25 + 26 + const loader: DataLoader<ItemSubmissionMessageValue, void> = new DataLoader( 27 + async (data) => 28 + bulkWrite(queue, data).then(() => 29 + new Array(data.length).fill(undefined), 30 + ), 31 + { 32 + cache: false, 33 + batch: true, 34 + maxBatchSize: 200, 35 + batchScheduleFn(cb) { 36 + setTimeout(cb, batchTimeout); 37 + }, 38 + }, 39 + ); 40 + 41 + async function itemSubmissionBulkWrite( 42 + items: readonly ItemSubmissionMessageValue[], 43 + skipBatch: boolean = false, 44 + ) { 45 + if (skipBatch) { 46 + try { 47 + await bulkWrite(queue, items); 48 + return { error: false, results: [] }; 49 + } catch (err) { 50 + return { error: true, results: [err] }; 51 + } 52 + } else { 53 + const response = await loader.loadMany(items); 54 + if (response.some((r) => r instanceof Error)) { 55 + return { 56 + error: true, 57 + results: response, 58 + }; 59 + } 60 + return { 61 + error: false, 62 + results: [], 63 + }; 64 + } 65 + } 66 + 67 + itemSubmissionBulkWrite.close = async () => { 68 + await sleep(batchTimeout + 1000); 69 + await queue.close(); 70 + }; 71 + 72 + return itemSubmissionBulkWrite; 73 + } 74 + 75 + export type ItemSubmissionBulkWrite = ReturnType< 76 + typeof makeItemSubmissionBulkWrite 77 + >; 78 + 79 + export { makeItemSubmissionBulkWrite }; 80 + 81 + async function bulkWrite( 82 + queue: Queue<ItemSubmissionMessageValue>, 83 + data: readonly ItemSubmissionMessageValue[], 84 + ) { 85 + if (!data.length) { 86 + return; 87 + } 88 + 89 + await queue.addBulk( 90 + data.map((msg) => ({ 91 + name: 'item-submission', 92 + data: msg, 93 + })), 94 + ); 95 + }

+6 -7

server/routes/items/submitItems.ts

··· 4 4 5 5 import { 6 6 type Dependencies, 7 - type ItemSubmissionKafkaMessageValue, 7 + type ItemSubmissionMessageValue, 8 8 } from '../../iocContainer/index.js'; 9 9 import { safeGetEnvVar } from '../../iocContainer/utils.js'; 10 10 import { ··· 232 232 return next(new AggregateError(errors)); 233 233 } 234 234 235 - // Send 5% of traffic to the async processing queue, otherwise handle in 236 - // the traditional way (in this process, immediately after returning 202 to 237 - // the user) 235 + // Send a configurable percentage of traffic to the async processing queue 236 + // (BullMQ), otherwise handle inline (in this process, immediately after 237 + // returning 202 to the user). Set to 1 to route all traffic through the queue. 238 238 const trafficPercentage = Number( 239 239 safeGetEnvVar('ITEM_QUEUE_TRAFFIC_PERCENTAGE'), 240 240 ); ··· 243 243 // valid Date, but due to legacy data the type returned, ItemSubmission, an 244 244 // optional `submissionTime` property. this variable is used to convince 245 245 // typescript that the value we subsequently pass to itemSubmissionQueueBulkWrite has 246 - // a valid Date in the `submissionTime` property, which is specified in the 247 - // schema for the kafka topic that item submissions get written to. 246 + // a valid Date in the `submissionTime` property. 248 247 const backupSubmissiontime = new Date(); 249 248 const submissionsToProcess = itemSubmissionsOrErrors.map((it) => { 250 249 // We checked for errors earlier so this should never happen ··· 290 289 // toItemSubmission specifies it is optional, as noted above 291 290 it.itemSubmission.submissionTime ?? backupSubmissiontime, 292 291 }, 293 - } satisfies ItemSubmissionKafkaMessageValue; 292 + } satisfies ItemSubmissionMessageValue; 294 293 }); 295 294 296 295 Meter.itemsEnqueued.add(submissionsToProcess.length);

+1 -1

server/services/manualReviewToolService/modules/JobDecisioning.ts

··· 332 332 // TODO: start sending automatic close decisions when we are sending the 333 333 // report decision callbacks 334 334 if (newDecisionStored && automaticCloseDecision === undefined) { 335 - // TODO: use proper publishing to a durable queue (kafka?) and retry 335 + // TODO: use proper publishing to a durable queue and retry 336 336 this.onRecordDecision({ 337 337 decisionComponents, 338 338 relatedActions,

+1 -1

server/storage/dataWarehouse/warehouseSchema.ts

··· 81 81 82 82 /** 83 83 * Transforms a table row type into the shape expected for bulk/eventual writes 84 - * (e.g. via Kafka ingestion). Keys are lowercased and nullable keys become 84 + * (e.g. via bulk ingestion). Keys are lowercased and nullable keys become 85 85 * optional. The `AcceptSlowQueries` flag controls whether JSON null values are 86 86 * permitted (they can degrade columnar storage performance). 87 87 */

+9 -12

server/utils/CoopMeter.ts

··· 20 20 // indicates a bug in the processing code or an infrastructure/network issue 21 21 // that is preventing progress from being made 22 22 public readonly itemProcessingFailuresCounter: opentelemetry.Counter; 23 - // Used to track the amount of time a worker spends processing one batch of 24 - // item submissions (batch as seen by Kafka, not a user batch). Gives a 25 - // rough idea of item processing performance. 26 - public readonly itemProcessingBatchTime: opentelemetry.Histogram; 27 - // Tracks the batch size for each batch of items processed by a worker. 28 - // This metric can help tune # of workers, # of partitions assigned per 29 - // worker, and timeouts for kafka batch writes 30 - public readonly itemProcessingBatchSize: opentelemetry.Histogram; 23 + // Tracks the time a worker spends processing a single job. 24 + public readonly itemProcessingJobTime: opentelemetry.Histogram; 25 + // Snapshot of waiting + active jobs in the queue, sampled after each 26 + // job completes. Useful for detecting backpressure. 27 + public readonly itemProcessingQueueDepth: opentelemetry.Histogram; 31 28 // Counts the number of items sent to the processing queue 32 29 // this is mostly for debugging, and should allow us to confirm 33 30 // the percentage of traffic we are sending to the queue and ··· 72 69 this.itemsEnqueued = myMeter.createCounter( 73 70 `${metricNamespace}.items.enqueued-to-processing-queue.counter`, 74 71 ); 75 - this.itemProcessingBatchTime = myMeter.createHistogram( 76 - `${metricNamespace}.items.batch-processing-time-ms.histogram`, 72 + this.itemProcessingJobTime = myMeter.createHistogram( 73 + `${metricNamespace}.items.job-processing-time-ms.histogram`, 77 74 ); 78 - this.itemProcessingBatchSize = myMeter.createHistogram( 79 - `${metricNamespace}.items.batch-size.histogram`, 75 + this.itemProcessingQueueDepth = myMeter.createHistogram( 76 + `${metricNamespace}.items.queue-depth.histogram`, 80 77 ); 81 78 } 82 79 }

+137 -334

server/workers_jobs/ItemProcessingWorker.ts

··· 1 1 2 - import { type KafkaSchemaMap } from '../iocContainer/index.js'; 2 + import { Queue, Worker as BullWorker, type Job as BullJob } from 'bullmq'; 3 + import { type Cluster } from 'ioredis'; 4 + import type IORedis from 'ioredis'; 5 + 6 + import { type ItemSubmissionMessageValue } from '../iocContainer/index.js'; 3 7 import { inject } from '../iocContainer/utils.js'; 4 - import { type Kafka, type KafkaConsumerRunConfig } from '../kafka/index.js'; 8 + import { ITEM_SUBMISSION_QUEUE_NAME } from '../queues/itemSubmissionQueue.js'; 5 9 import { 6 10 submissionDataToItemSubmission, 7 11 type ItemSubmission, ··· 11 15 import { withRetries } from '../utils/misc.js'; 12 16 import { type Worker } from './index.js'; 13 17 14 - const topicsToConsume = ['ITEM_SUBMISSION_EVENTS'] as const; 15 - 16 - type ConsumedTopic = (typeof topicsToConsume)[number]; 17 - 18 18 export default inject( 19 19 [ 20 - 'Kafka', 20 + 'IORedis', 21 21 'Tracer', 22 22 'RuleEngine', 23 23 'ContentApiLogger', ··· 27 27 'itemSubmissionRetryQueueBulkWrite', 28 28 ], 29 29 ( 30 - kafka: Kafka<Pick<KafkaSchemaMap, ConsumedTopic>>, 30 + redis: IORedis.Redis | Cluster, 31 31 tracer, 32 32 ruleEngine, 33 33 contentApiLogger, ··· 36 36 Meter, 37 37 itemSubmissionRetryQueueBulkWrite, 38 38 ) => { 39 - let consumer: ReturnType<typeof kafka.consumer<ConsumedTopic>>; 39 + let worker: BullWorker<ItemSubmissionMessageValue> | undefined; 40 + let queue: Queue<ItemSubmissionMessageValue> | undefined; 40 41 41 42 return { 42 43 type: 'Worker' as const, 43 44 async run(_signal) { 44 - consumer = kafka.consumer<ConsumedTopic>({ 45 - // NB: don't rename lightly, as this has permissions 46 - // associated w/ it through Kafka ACLS. 47 - groupId: 'item-submission-worker', 48 - maxBytesPerPartition: 1024 * 1024, // 1 mb = Default 49 - sessionTimeout: 90_000, 50 - }); 45 + queue = new Queue(ITEM_SUBMISSION_QUEUE_NAME, { connection: redis }); 46 + const insertWithRetries = tracer.traced( 47 + { 48 + resource: 'itemProcessingWorker', 49 + operation: 'ItemInvestigationService.insertItem', 50 + }, 51 + withRetries( 52 + { 53 + maxRetries: 1, 54 + initialTimeMsBetweenRetries: 75, 55 + maxTimeMsBetweenRetries: 250, 56 + }, 57 + ItemInvestigationService.insertItem.bind(ItemInvestigationService), 58 + ), 59 + ); 51 60 52 - await consumer.connect(); 53 - await consumer.subscribe({ topics: topicsToConsume }); 61 + worker = new BullWorker<ItemSubmissionMessageValue>( 62 + ITEM_SUBMISSION_QUEUE_NAME, 63 + async (job: BullJob<ItemSubmissionMessageValue>) => { 64 + const processJob = tracer.traced( 65 + { 66 + operation: 'processJob', 67 + resource: 'itemsProcessingWorker', 68 + }, 69 + async () => { 70 + const jobStartTime = performance.now(); 71 + const { itemSubmissionWithTypeIdentifier, metadata } = job.data; 54 72 55 - // An error thrown within eachBatch does not lead the promise returned 56 - // by `consumer.run()` to reject. Instead, that promise resolves 57 - // immediately once the consumer starts running and, if an error occurs 58 - // within `eachBatch`, kafkajs will simply retry the `eachBatch` 59 - // callback a few times (the exact number is configurable). However, 60 - // _even once that retry count limit is exhausted_, the `consumer.run()` 61 - // call still does not reject, as you might expect. 62 - // 63 - // Instead, once that retry count is exhausted, kafkajs switches from 64 - // silently + automatically retrying `eachBatch` to emitting a `crash` 65 - // event on the consumer. However, even after this `crash` event is 66 - // emitted, kafkajs does not stop the consumer or raise an exception. 67 - // Instead, kafkajs's default behavior is to simply restart the consumer 68 - // after the crash event. So the overall nodejs process will, by 69 - // default, basically never crash. 70 - // 71 - // However, for now, we _want_ Node to crash if we're getting repeated 72 - // errors (even after retrying) within `eachBatch`, so that we can take 73 - // advantage of simple, out-of-the-box monitoring to see these crashes. 74 - // Therefore, we register a crash listener that throws unconditionally 75 - // (again, this only applies once the automatic retries have failed and 76 - // the crash event is emitted). The unconditional part means that we're 77 - // ignoring `event.payload.restart`, which is the flag for whether 78 - // kafkajs should restart the consumer after the crash, and which is 79 - // always true by default. Kafkajs takes a `retryOnFailure` setting for 80 - // configuring that, but we don't even bother, because we always want to 81 - // crash nodejs/the whole process once the consumer crash event is 82 - // emitted. 83 - consumer.on('consumer.crash', (event) => { 84 - const { error } = event.payload; 85 - tracer.logActiveSpanFailedIfAny(error); 86 - throw error; 87 - }); 73 + Meter.itemProcessingAttemptsCounter.add(1, { 74 + process: 'item-processing-worker', 75 + }); 88 76 89 - // Error Cases 90 - // 91 - // 1. If the worker is shut down by kubernetes (eg. a new version is 92 - // deployed and being rolled out, or because k8s decides this pod needs 93 - // to be evicted/moved to another node), then shutdown() will run, 94 - // which will call `consumer.disconnect()`, which also makes the 95 - // consumer stop pulling new messages, so there's nothing else we have 96 - // to do. If a batch is interrupted by shutdown, KafkaJS will 97 - // automatically commit any resolved offsets so that we don’t lose 98 - // progress another worker doesn’t re-process messages that this worker 99 - // has already seen, while the rest of the batches messages should be 100 - // picked up by another worker and processed eventually. 101 - // 102 - // 2. If Kafka is unavailable when the worker starts, then the consumer 103 - // will fail to connect or subscribe, the worker will throw an 104 - // exception (after some internal kafkajs retrying), no state will get 105 - // messed up, and k8s can restart the worker. 106 - // 107 - // 3. If Kafka becomes unavailable while the worker is running, 108 - // there are two cases: 109 - // a) KafkaJS detects that Kafka is unavailable when it tries to 110 - // fetch the next batch. By default kafkajs will try to reconnect 111 - // for a while; if that fails, I think an error is eventually 112 - // raised (either thrown or as the "CRASH" event), in which case 113 - // there shouldn’t be any buffered messages (because we only 114 - // request a new batch when the current one is completely finished 115 - // processing and the offset is committed) so when Kafka becomes 116 - // available again we should be able to start making progress with 117 - // no weird state. 118 - // b) kafkajs detects that kafka is unavailable when it tries to 119 - // commit the offsets, _after processing items and publishing 120 - // actions_. Kafkajs will already retry committing the offsets 121 - // but, if that fails, what do we do? If we shut down the worker, 122 - // the consumer will restart and reprocess the messages that 123 - // didn’t get their offsets committed. It is not a catastrophic 124 - // failure if one batch of messages is re-processed when there is 125 - // a connection issue with kafka, so this worker does not have 126 - // logic to prevent this situation. The main issue with this 127 - // failure is we may publish actions for those items more than 128 - // once, which again is not catastrophic but also not ideal. To 129 - // prevent this we can add an idempotency mechanism to the action 130 - // publisher that stores a key of either `topic:partition:offset` 131 - // or `requestId:SubmissionId` for each action with some 132 - // reasonable TTL, and also checks for that keys existence before 133 - // sending a request to a custom action callback API. 134 - // 135 - // 4. If a partition gets reassigned while a batch is in the middle of 136 - // processing,kafkaJS will automatically commit the resolved offsets 137 - // for the current batch, similar to case 1. 138 - // 139 - // 5. If the item processing takes a long time (which is very 140 - // possible since much of the rule engine is network I/O), we want to 141 - // make sure that Kafka doesn't think the consumer is dead and 142 - // needlessly reassign its partitions. So, we set a 5 second heartbeat 143 - // interval outside of KafkaJS’s automatic heartbeat flow. 144 - // 145 - // 6. An error is thrown while processing a message. This can happen 146 - // if any one of `itemDataToItemSubmission`, `runEnabledRules`, or 147 - // `logContentAPIRequest` throws. In all these cases we choose to 148 - // block the queue (or at least the current partition) from 149 - // progressing until the error is resolved, for the reasons explained 150 - // below: 151 - // a) `itemDataToItemSubmission` throws. This could happen if there 152 - // is an issue connecting to postgres, in which case we should retry 153 - // until it succeeds (this can be handled by simply throwing and 154 - // causing the batch to retry). If data is fundamentally malformed 155 - // and will always cause this error to throw, this is likely 156 - // due to a bug in the Kafka producer code, or somehow bad data 157 - // got through validation and is not reconstructible. In this 158 - // case we write to a separate queue to allow processing of 159 - // other messages to continue, and these bad messages can be 160 - // inspected from the dead letter queue 161 - // 162 - // b) `runEnabledRules` throws. This does not happen in the usual 163 - // lifecycle of our application, even if all signals associated with 164 - // a given rule fail. This usually happens when we push a bug or bad 165 - // code, or if some other infrastructure is down (e.g. postgres). In 166 - // this case we want to block progress until the external dependency 167 - // is back up or we deploy a fix for the bug. This ensures that all 168 - // items are processed normally when the issue is resolved. 169 - // 170 - // c) `logContentAPIRequest` throws. This will happen if a 171 - // connection to kafka is unavailable, in which case we generally 172 - // can’t make progress, or if this function throws. This is likely 173 - // to be a transient error and we can throw this error, causing the 174 - // batch to be retried. Although this is the same strategy as 6.a 175 - // and 6.b, in this case we have already processed the given item 176 - // and may have published actions related to it so we risk 177 - // publishing actions more than once (as well as doing duplicate 178 - // work more generally). This can be mitigated with the same 179 - // idempotency strategy described in 3.b, and again we don’t take 180 - // pains to prevent duplicate work in this case in the code for this 181 - // worker. 77 + let itemSubmission; 78 + try { 79 + const { itemTypeIdentifier } = 80 + itemSubmissionWithTypeIdentifier; 182 81 183 - const eachBatchTraced = tracer.traced( 184 - { operation: 'processBatch', resource: 'itemsProcessingWorker' }, 185 - async function ({ batch, heartbeat }) { 186 - // Heartbeat every 5s while the upload is in progress/being retried 187 - // (kafkajs will dedupe these if we're calling heartbeat more often 188 - // than heartbeat interval), to avoid 30s session timeout if s3 189 - // upload has to be retried a few times or takes a long time. 190 - // Handles case (6) above. 191 - const heartbeatInverval = setInterval(() => { 192 - heartbeat().catch((reason) => { 193 - tracer.traced( 194 - { 195 - operation: 'consumerHeartbeat', 196 - resource: 'itemsProcessingWorker', 197 - }, 198 - () => { 199 - tracer.logActiveSpanFailedIfAny(reason); 200 - }, 201 - ); 202 - }); 203 - }, 5_000); 82 + // BullMQ serializes job data as JSON, which converts Date 83 + // objects to ISO strings. Re-hydrate here. 84 + const submissionTime = new Date( 85 + itemSubmissionWithTypeIdentifier.submissionTime, 86 + ); 204 87 205 - try { 206 - const { decodedMessages: messages, topic, partition } = batch; 207 - const insertWithRetries = tracer.traced( 208 - { 209 - resource: 'itemProcessingWorker', 210 - operation: 'ItemInvestigationService.insertItem', 211 - }, 212 - withRetries( 213 - { 214 - maxRetries: 1, 215 - initialTimeMsBetweenRetries: 75, 216 - maxTimeMsBetweenRetries: 250, 217 - }, 218 - ItemInvestigationService.insertItem.bind( 219 - ItemInvestigationService, 220 - ), 221 - ), 222 - ); 88 + try { 89 + itemSubmission = (await submissionDataToItemSubmission( 90 + async ({ typeSelector, orgId }) => 91 + moderationConfigService.getItemType({ 92 + orgId, 93 + itemTypeSelector: typeSelector, 94 + }), 95 + { 96 + orgId: metadata.orgId, 97 + submissionId: 98 + itemSubmissionWithTypeIdentifier.submissionId satisfies string as SubmissionId, 99 + submissionTime, 100 + itemId: itemSubmissionWithTypeIdentifier.itemId, 101 + itemTypeId: itemTypeIdentifier.id, 102 + itemTypeVersion: itemTypeIdentifier.version, 103 + itemTypeSchemaVariant: itemTypeIdentifier.schemaVariant, 104 + data: jsonParse( 105 + itemSubmissionWithTypeIdentifier.dataJSON, 106 + ), 107 + creatorId: null, 108 + creatorTypeId: null, 109 + }, 110 + )) as ItemSubmission & { submissionTime: Date }; 111 + } catch { 112 + // If we can't reconstruct a message, it likely has 113 + // bad data or was written in a bad format. Write to 114 + // the DLQ for inspection and return without throwing 115 + // so BullMQ marks this job as complete (not retried). 116 + await itemSubmissionRetryQueueBulkWrite([job.data]); 117 + return; 118 + } 223 119 224 - Meter.itemProcessingBatchSize.record(messages.length); 225 - const batchStartTime = performance.now(); 226 - await Promise.all( 227 - messages.map(async (data) => { 228 - // TODO: what to do if value is missing cuz we wrote incorrectly? 229 - // Add metric to count occurences of this, ideally we would only see this 230 - // failure on first deploy and quickly fix it. 231 - const { itemSubmissionWithTypeIdentifier, metadata } = 232 - data.value!; 233 - 234 - Meter.itemProcessingAttemptsCounter.add(1, { 235 - process: 'item-processing-worker', 236 - }); 237 - 238 - // TODO: better way to do this? 239 - let itemSubmission; 240 120 try { 241 - const { itemTypeIdentifier } = 242 - itemSubmissionWithTypeIdentifier; 121 + await insertWithRetries({ 122 + requestId: metadata.requestId, 123 + orgId: metadata.orgId, 124 + itemSubmission, 125 + }); 126 + } catch (e: unknown) { 127 + // swallow error for now if an item fails to make it into 128 + // scylla; it shouldn't prevent processing 129 + } 243 130 244 - try { 245 - // NB: could throw if item type can't be found (e.g., 246 - // postgres briefly down) 247 - itemSubmission = (await submissionDataToItemSubmission( 248 - async ({ typeSelector, orgId }) => 249 - moderationConfigService.getItemType({ 250 - orgId, 251 - itemTypeSelector: typeSelector, 252 - }), 253 - { 254 - orgId: metadata.orgId, 255 - submissionId: 256 - itemSubmissionWithTypeIdentifier.submissionId satisfies string as SubmissionId, 257 - submissionTime: 258 - itemSubmissionWithTypeIdentifier.submissionTime, 259 - itemId: itemSubmissionWithTypeIdentifier.itemId, 260 - itemTypeId: itemTypeIdentifier.id, 261 - itemTypeVersion: itemTypeIdentifier.version, 262 - itemTypeSchemaVariant: 263 - itemTypeIdentifier.schemaVariant, 264 - data: jsonParse( 265 - itemSubmissionWithTypeIdentifier.dataJSON, 266 - ), 267 - creatorId: null, 268 - creatorTypeId: null, 269 - }, 270 - // this cast is safe since new ItemSubmissions are 271 - // always written with a submissionTime, despite the 272 - // `...toItemSubmission` function annotation implying they 273 - // could have an undefined submissionTime. This is to support 274 - // legacy submissions, but none of those will end up in 275 - // Kafka 276 - )) as ItemSubmission & { submissionTime: Date }; 277 - } catch { 278 - // If we can't reconstruct a message, it is likely has 279 - // made it past validation with some bad data (shouldn't happen) 280 - // or the kafka message was written in a bad format. In this case 281 - // we hope it is not a problem with every single item, so we don't want 282 - // to block progress on the item submission queue - so we write to a 283 - // retry queue which can be inspected and optionally retried 284 - if (data.value) { 285 - await itemSubmissionRetryQueueBulkWrite([data.value]); 286 - } 287 - return; 288 - } 131 + await ruleEngine.runEnabledRules( 132 + itemSubmission, 133 + metadata.requestId, 134 + ); 289 135 290 - try { 291 - await insertWithRetries({ 292 - requestId: metadata.requestId, 293 - orgId: metadata.orgId, 294 - itemSubmission, 295 - }); 296 - } catch (e: unknown) { 297 - //swallow error for now if an item fails to make it into 298 - //scylla, it is not really an issue for running most 299 - //rules and shouldn't prevent processing 300 - } 136 + await contentApiLogger.logContentApiRequest( 137 + { 138 + requestId: metadata.requestId, 139 + orgId: metadata.orgId, 140 + itemSubmission, 141 + failureReason: undefined, 142 + }, 143 + false, 144 + ); 301 145 302 - await ruleEngine.runEnabledRules( 303 - itemSubmission, 304 - metadata.requestId, 305 - ); 146 + Meter.itemProcessingJobTime.record( 147 + performance.now() - jobStartTime, 148 + ); 306 149 307 - // This returns as soon as the item is loaded, not when the 308 - // batch is actually written, so it can be 309 - // safely/efficiently awaited on each message 310 - await contentApiLogger.logContentApiRequest( 311 - { 312 - requestId: metadata.requestId, 313 - orgId: metadata.orgId, 314 - itemSubmission, 315 - failureReason: undefined, 316 - }, 317 - false, 150 + queue!.getJobCounts('waiting', 'active').then((counts) => { 151 + Meter.itemProcessingQueueDepth.record( 152 + counts.waiting + counts.active, 318 153 ); 154 + }).catch(() => {}); 155 + } catch (e: unknown) { 156 + tracer.logActiveSpanFailedIfAny(e); 157 + Meter.itemProcessingFailuresCounter.add(1, { 158 + process: 'item-processing-worker', 159 + }); 319 160 320 - } catch (e: unknown) { 321 - tracer.logActiveSpanFailedIfAny(e); 322 - Meter.itemProcessingFailuresCounter.add(1, { 323 - process: 'item-processing-worker', 324 - }); 161 + // Transient errors (postgres down, etc.) are retried by 162 + // BullMQ automatically. Bugs or infrastructure outages 163 + // will exhaust retries and the job moves to failed state, 164 + // preserving it for inspection. 165 + throw e; 166 + } 167 + }, 168 + ); 325 169 326 - // When we reach this catch block we have hit one of the errors in 327 - // case 6 a, b, or c. These fall into two categories: 328 - // 329 - // Transient Errors: errors in connection to postgres, or 330 - // writing to ContentAPIRequests. these are cheaply retried 331 - // by throwing, which triggers another call to `eachBatch`. 332 - // 333 - // Bugs or Infrastructure outages: In these cases we want 334 - // to stop progressing through the queue until the issue is 335 - // resolved, either by deploying updated code which fixes 336 - // the issue, or when some external infrastructure (most 337 - // likely Kafka itself) is available and we can establish a 338 - // connection. We can also handle this by throwing, which 339 - // will retry continually until the process crashes. 340 - // 341 - // In both cases (if Kafka is available) KafkaJS will 342 - // automatically commit the offsets for any messages in the 343 - // batch that have already been processed, so we are not at 344 - // risk of re-processing them and duplicating effor 345 - throw e; 346 - } 347 - }), 348 - ); 349 - Meter.itemProcessingBatchTime.record( 350 - performance.now() - batchStartTime, 351 - ); 170 + await processJob(); 171 + }, 172 + { 173 + connection: redis, 174 + concurrency: 30, 175 + removeOnComplete: { count: 0 }, 176 + removeOnFail: { count: 1000 }, 177 + }, 178 + ); 352 179 353 - // commit offset only after processing successfully. The +1 here 354 - // _is_ necessary, after extensive testing, because kafka starts 355 - // consuming at the committed offset so, if we commit the last 356 - // offset that was successfully processed, and then the worker 357 - // restarts or there's a rebalance, that message will get 358 - // processed twice. We use BigInt here since the offset is 359 - // uint64. 360 - await consumer.commitOffsets([ 361 - { 362 - topic, 363 - partition, 364 - offset: (BigInt(batch.lastOffset()) + 1n).toString(), 365 - }, 366 - ]); 180 + // BullMQ Worker runs continuously; wait for it to be ready 181 + await worker.waitUntilReady(); 367 182 368 - // NB: no catch block means the error's rethrown, which addresses 369 - // Error cases 6 a, b, c 370 - // This will trigger eachBatch to be retried, until the process 371 - // eventually crashes. See comment on the crash listener. 372 - } finally { 373 - clearInterval(heartbeatInverval); 374 - } 375 - } satisfies KafkaConsumerRunConfig< 376 - Pick<KafkaSchemaMap, ConsumedTopic> 377 - >['eachBatch'], 378 - ); 379 - 380 - await consumer.run({ 381 - autoCommit: false, 382 - partitionsConsumedConcurrently: 30, 383 - eachBatch: eachBatchTraced, 183 + // Keep the run() promise pending until the worker is closed. 184 + await new Promise<void>((resolve) => { 185 + worker!.on('closed', () => resolve()); 384 186 }); 385 187 }, 386 188 async shutdown() { 387 - await consumer.disconnect(); 189 + await worker?.close(); 190 + await queue?.close(); 388 191 }, 389 192 } satisfies Worker; 390 193 },

Configure Feed

Configure Feed