Mirror of https://github.com/roostorg/coop
github.com/roostorg/coop
1import {
2 SpanKind,
3 SpanStatusCode,
4 trace,
5 type Span,
6 type SpanOptions,
7 type Tracer,
8} from '@opentelemetry/api';
9
10import { CoopError } from './errors.js';
11import { thrownValueToString } from './misc.js';
12
13// The built-in Otel type for attributes incorrectly uses mutable array types
14// for array-valued attributes (suggesting that the otel sdk might mutate the
15// array given as the attributes, when it won't); this makes it
16// impossible/annoying to pass readonly arrays as attributes. The built-in otel
17// attributes type also indicates that null/undefined are legal attribute
18// values, when they're in fact invalid and have undefined behavior.
19type CorrectedAttributes = {
20 [attributeKey: string]:
21 | string
22 | number
23 | boolean
24 | readonly string[]
25 | readonly number[]
26 | readonly boolean[];
27};
28
29type CorrectedSpanOptions = Omit<SpanOptions, 'attributes'> & {
30 attributes?: CorrectedAttributes;
31};
32
33/**
34 * In OpenTelemetry, a span represents a unit of work that's part of a larger
35 * trace. OpenTelemetry spans all have a name, which can be an arbitrary string,
36 * but the idea is to use a name that makes it easy to identify the "same"
37 * operation across different traces by grouping by name. This lets you easily
38 * get stats for the operation (e.g., median latency). So, "getUserById" might
39 * be a good span name, but including the id of the user in the span name would
40 * not be good (there'd be too many distinct span names for useful grouping).
41 *
42 * We use a structured (resource, operation) pair for spans. The idea is that
43 * multiple resources can support the same operations, and observability tools
44 * can let you group your spans by resource or by operation. E.g., there might
45 * be an operation called `http.request`, which is the operation name the server
46 * uses to refer to it handling of an incoming HTTP request. Then, each endpoint
47 * might be a different resource. So, you could have a span for `(POST /content,
48 * http.request)` and one for `(POST /report, http.request)`.
49 *
50 * This type takes a resource and operation, which is used to generate a
51 * plain-string span name, but also capture this underlying (resource,
52 * operation) structure so we can get the full value out of observability UIs.
53 */
54type StructuredSpanName = { resource: string; operation: string };
55
56/**
57 * As a convenience for callers, we exploit the fact that there's (currently) no
58 * overlapping keys between StructuredSpanName and SpanOptions (and this is
59 * unlikely to change) to allow callers to pass all the data in one blob.
60 */
61type SpanDescriptor = StructuredSpanName & CorrectedSpanOptions;
62
63/**
64 * This class builds on OpenTelemetry's built-in Tracer, but it exposes methods
65 * that take care of a lot of fiddly error handling details automatically, so
66 * that we don't have to duplicate + get those details right everywhere.
67 */
68export default class SafeTracer {
69 constructor(private readonly tracer: Tracer) {}
70
71 #onSpanSuccess<T>(span: Span, returnValue: T) {
72 // NB: we intentionally don't set SpanStatus.OK here, as we wouldn't want to
73 // that override the span's status if it's been set explicitly to ERROR
74 // (e.g., to indicate an error that was recovered from or swallowed, which
75 // led the span-creation function to still return/resolve successfully). See
76 // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#set-status
77 span.end();
78 return returnValue;
79 }
80
81 #onSpanFailure(span: Span, thrownValue: unknown): never {
82 this.logSpanFailed(span, thrownValue);
83 span.end();
84 throw thrownValue;
85 }
86
87 #wrapSpanFn<T>(fn: (span: Span) => T) {
88 return (span: Span) => {
89 try {
90 const res = fn(span);
91 const resIsPromiseLike =
92 res &&
93 typeof res === 'object' &&
94 'then' in res &&
95 typeof res.then === 'function';
96
97 return resIsPromiseLike
98 ? ((res as { then: Promise<Awaited<T>>['then'] }).then(
99 (ultimateResult) => this.#onSpanSuccess(span, ultimateResult),
100 (e) => this.#onSpanFailure(span, e),
101 ) as T)
102 : this.#onSpanSuccess(span, res);
103 } catch (error) {
104 this.#onSpanFailure(span, error);
105 }
106 };
107 }
108
109 /**
110 * In OTel, spans have a name + kind. Our app forms spans using resource
111 * and operation components, to make it easier to review traces. We turn
112 * those components into a valid, unstructured Otel span name, while
113 * preserving the components as attributes in that span. That's what this
114 * function does.
115 */
116 #spanDescriptorToOtelData(it: SpanDescriptor) {
117 const { operation, resource, ...spanOpts } = it;
118
119 // We prefix the final operation name w/ "app." to differentiate these
120 // manual operations from ones added by auto instrumentation libs.
121 const appOperation = `app.${operation}`;
122
123 return {
124 name: `${operation}:${resource}`,
125 options: {
126 attributes: {
127 'resource.name': resource,
128 'operation.name': appOperation,
129 ...spanOpts.attributes,
130 },
131 // Default span kind to internal, though the opts on an
132 // individual span can override this.
133 kind: SpanKind.INTERNAL,
134 ...spanOpts,
135 },
136 };
137 }
138
139 addActiveSpan<T>(spanDescriptor: SpanDescriptor, fn: (span: Span) => T): T {
140 const { name, options } = this.#spanDescriptorToOtelData(spanDescriptor);
141
142 return this.tracer.startActiveSpan(
143 name,
144 options satisfies CorrectedSpanOptions as SpanOptions,
145 this.#wrapSpanFn(fn),
146 );
147 }
148
149 addSpan<T>(spanDescriptor: SpanDescriptor, fn: (span: Span) => T): T {
150 const { name, options } = this.#spanDescriptorToOtelData(spanDescriptor);
151
152 const span = this.tracer.startSpan(
153 name,
154 options satisfies CorrectedSpanOptions as SpanOptions,
155 );
156 return this.#wrapSpanFn(fn)(span);
157 }
158
159 getActiveSpan(): Span | undefined {
160 return trace.getActiveSpan();
161 }
162
163 /**
164 * Takes a function and returns a new function that will run the original
165 * function, but trace its work as the active span.
166 *
167 * DO NOT USE THIS IF THE ORIGINAL FUNCTION IS GENERIC (i.e., has type
168 * parameters), as TS will likely lose the parameteric-ness of the function's
169 * signature and have to type each parameter using its constraint.
170 *
171 * The original function doesn't receive the span as an argument, so doesn't
172 * need to know (and can't easily know) that it's being traced. If the
173 * function does need the span (e.g., to set other attributes on it or log
174 * failure in a custom way), use {@link addActiveSpan} instead.
175 *
176 * @param spanDescriptor Describes the span to create. Allows an extra field,
177 * `attributesFromArgs`, which can return attributes to add to the span
178 * dynamically, based on the arguments passed to the wrapped function.
179 * @param fn The function to wrap
180 * @returns A new function that will run the original function, but track its
181 * work as the active span.
182 */
183 traced<Args extends unknown[], Return>(
184 spanDescriptor: SpanDescriptor & {
185 attributesFromArgs?: (args: Args) => CorrectedAttributes;
186 },
187 fn: (this: void, ...args: Args) => Return,
188 ): (...args: Args) => Return {
189 return (...args: Args) => {
190 const finalDescriptor = spanDescriptor.attributesFromArgs
191 ? {
192 ...spanDescriptor,
193 attributes: {
194 ...spanDescriptor.attributes,
195 ...spanDescriptor.attributesFromArgs(args),
196 },
197 }
198 : spanDescriptor;
199
200 return this.addActiveSpan(finalDescriptor, () => fn(...args));
201 };
202 }
203
204 /**
205 * Use this function in error cases within spans. Specifically, this function
206 * records the exception on the span as well as setting the span's status code
207 * to ERROR. This means that we're not only recording what the error is, but
208 * our observability tools will know that the span itself errored out.
209 *
210 * You don't need to use this if your function was wrapped in {@link addSpan}
211 * or {@link addActiveSpan} _and it throws/rejects when it fails_, as those
212 * functions will automatically call this function in those cases w/ the
213 * thrown or rejection value.
214 */
215 logSpanFailed(span: Span, error: unknown) {
216 if (error instanceof Error) {
217 span.recordException(error);
218 }
219 // when we explicitly indicate the exception shouldn't mark span status as ERROR, we return early
220 if (error instanceof CoopError && !error.shouldErrorSpan) {
221 return;
222 }
223
224 // otherwise we set the span status to ERROR
225 span.setStatus({
226 code: SpanStatusCode.ERROR,
227 message: thrownValueToString(error),
228 });
229 }
230
231 logActiveSpanFailedIfAny(error: unknown) {
232 const span = this.getActiveSpan();
233 span && span.isRecording() && this.logSpanFailed(span, error);
234 }
235}