Skip to content

feat(node): Sanitize URLs in Span descriptions and breadcrumbs #7667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 30, 2023
47 changes: 39 additions & 8 deletions packages/node/src/integrations/http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { LRUMap } from 'lru_map';

import type { NodeClient } from '../client';
import type { RequestMethod, RequestMethodArgs } from './utils/http';
import { cleanSpanDescription, extractUrl, isSentryRequest, normalizeRequestArgs } from './utils/http';
import { cleanSpanDescription, extractRawUrl, extractUrl, isSentryRequest, normalizeRequestArgs } from './utils/http';

const NODE_VERSION = parseSemver(process.versions.node);

Expand Down Expand Up @@ -129,6 +129,16 @@ type OriginalRequestMethod = RequestMethod;
type WrappedRequestMethod = RequestMethod;
type WrappedRequestMethodFactory = (original: OriginalRequestMethod) => WrappedRequestMethod;

/**
* See https://develop.sentry.dev/sdk/data-handling/#structuring-data
*/
type RequestSpanData = {
url: string;
method: string;
'http.fragment'?: string;
'http.query'?: string;
};

/**
* Function which creates a function which creates wrapped versions of internal `request` and `get` calls within `http`
* and `https` modules. (NB: Not a typo - this is a creator^2!)
Expand Down Expand Up @@ -180,6 +190,8 @@ function _createWrappedRequestMethodFactory(
return function wrappedMethod(this: unknown, ...args: RequestMethodArgs): http.ClientRequest {
const requestArgs = normalizeRequestArgs(httpModule, args);
const requestOptions = requestArgs[0];
// eslint-disable-next-line deprecation/deprecation
const rawRequestUrl = extractRawUrl(requestOptions);
const requestUrl = extractUrl(requestOptions);

// we don't want to record requests to Sentry as either breadcrumbs or spans, so just use the original method
Expand All @@ -192,16 +204,30 @@ function _createWrappedRequestMethodFactory(

const scope = getCurrentHub().getScope();

if (scope && tracingOptions && shouldCreateSpan(requestUrl)) {
const requestSpanData: RequestSpanData = {
url: requestUrl,
method: requestOptions.method || 'GET',
};
if (requestOptions.hash) {
// strip leading "#"
requestSpanData['http.fragment'] = requestOptions.hash.substring(1);
}
if (requestOptions.search) {
// strip leading "?"
requestSpanData['http.query'] = requestOptions.search.substring(1);
}

if (scope && tracingOptions && shouldCreateSpan(rawRequestUrl)) {
parentSpan = scope.getSpan();

if (parentSpan) {
requestSpan = parentSpan.startChild({
description: `${requestOptions.method || 'GET'} ${requestUrl}`,
description: `${requestSpanData.method} ${requestSpanData.url}`,
op: 'http.client',
data: requestSpanData,
});

if (shouldAttachTraceData(requestUrl)) {
if (shouldAttachTraceData(rawRequestUrl)) {
const sentryTraceHeader = requestSpan.toTraceparent();
__DEBUG_BUILD__ &&
logger.log(
Expand Down Expand Up @@ -253,7 +279,7 @@ function _createWrappedRequestMethodFactory(
// eslint-disable-next-line @typescript-eslint/no-this-alias
const req = this;
if (breadcrumbsEnabled) {
addRequestBreadcrumb('response', requestUrl, req, res);
addRequestBreadcrumb('response', requestSpanData, req, res);
}
if (requestSpan) {
if (res.statusCode) {
Expand All @@ -268,7 +294,7 @@ function _createWrappedRequestMethodFactory(
const req = this;

if (breadcrumbsEnabled) {
addRequestBreadcrumb('error', requestUrl, req);
addRequestBreadcrumb('error', requestSpanData, req);
}
if (requestSpan) {
requestSpan.setHttpStatus(500);
Expand All @@ -283,7 +309,12 @@ function _createWrappedRequestMethodFactory(
/**
* Captures Breadcrumb based on provided request/response pair
*/
function addRequestBreadcrumb(event: string, url: string, req: http.ClientRequest, res?: http.IncomingMessage): void {
function addRequestBreadcrumb(
event: string,
requestSpanData: RequestSpanData,
req: http.ClientRequest,
res?: http.IncomingMessage,
): void {
if (!getCurrentHub().getIntegration(Http)) {
return;
}
Expand All @@ -294,7 +325,7 @@ function addRequestBreadcrumb(event: string, url: string, req: http.ClientReques
data: {
method: req.method,
status_code: res && res.statusCode,
url,
...requestSpanData,
},
type: 'http',
},
Expand Down
29 changes: 26 additions & 3 deletions packages/node/src/integrations/utils/http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,25 @@ export function isSentryRequest(url: string): boolean {
return dsn ? url.includes(dsn.host) : false;
}

/**
* Assembles a URL that's passed to the users to filter on.
* It can include raw (potentially PII containing) data, which we'll allow users to access to filter
* but won't include in spans or breadcrumbs.
*
* @param requestOptions RequestOptions object containing the component parts for a URL
* @returns Fully-formed URL
*/
// TODO (v8): This function should include auth, query and fragment (it's breaking, so we need to wait for v8)
export function extractRawUrl(requestOptions: RequestOptions): string {
const protocol = requestOptions.protocol || '';
const hostname = requestOptions.hostname || requestOptions.host || '';
// Don't log standard :80 (http) and :443 (https) ports to reduce the noise
const port =
!requestOptions.port || requestOptions.port === 80 || requestOptions.port === 443 ? '' : `:${requestOptions.port}`;
const path = requestOptions.path ? requestOptions.path : '/';
return `${protocol}//${hostname}${port}${path}`;
}

/**
* Assemble a URL to be used for breadcrumbs and spans.
*
Expand All @@ -27,9 +46,11 @@ export function extractUrl(requestOptions: RequestOptions): string {
// Don't log standard :80 (http) and :443 (https) ports to reduce the noise
const port =
!requestOptions.port || requestOptions.port === 80 || requestOptions.port === 443 ? '' : `:${requestOptions.port}`;
const path = requestOptions.path ? requestOptions.path : '/';
// do not include search or hash in span descriptions, per https://develop.sentry.dev/sdk/data-handling/#structuring-data
const path = requestOptions.pathname || '/';
const authority = requestOptions.auth ? `${requestOptions.auth}@` : '';

return `${protocol}//${hostname}${port}${path}`;
return `${protocol}//${authority}${hostname}${port}${path}`;
}

/**
Expand Down Expand Up @@ -59,6 +80,7 @@ export function cleanSpanDescription(
if (requestOptions.host && !requestOptions.protocol) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-explicit-any
requestOptions.protocol = (request as any)?.agent?.protocol; // worst comes to worst, this is undefined and nothing changes
// This URL contains the filtered authority ([filtered]:[filtered]@example.com) but no fragment or query params
requestUrl = extractUrl(requestOptions);
}

Expand Down Expand Up @@ -101,7 +123,8 @@ export function urlToOptions(url: URL): RequestOptions {
options.port = Number(url.port);
}
if (url.username || url.password) {
options.auth = `${url.username}:${url.password}`;
// always filter authority, see https://develop.sentry.dev/sdk/data-handling/#structuring-data
options.auth = '[Filtered]:[Filtered]';
}
return options;
}
Expand Down
33 changes: 33 additions & 0 deletions packages/node/test/integrations/http.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,39 @@ describe('tracing', () => {
expect(loggerLogSpy).toBeCalledWith('HTTP Integration is skipped because of instrumenter configuration.');
});

it('omits query and fragment from description and adds to span data instead', () => {
nock('http://dogs.are.great').get('/spaniel?tail=wag&cute=true#learn-more').reply(200);

const transaction = createTransactionOnScope();
const spans = (transaction as unknown as Span).spanRecorder?.spans as Span[];

http.get('http://dogs.are.great/spaniel?tail=wag&cute=true#learn-more');

expect(spans.length).toEqual(2);

// our span is at index 1 because the transaction itself is at index 0
expect(spans[1].description).toEqual('GET http://dogs.are.great/spaniel');
expect(spans[1].op).toEqual('http.client');
expect(spans[1].data.method).toEqual('GET');
expect(spans[1].data.url).toEqual('http://dogs.are.great/spaniel');
expect(spans[1].data['http.query']).toEqual('tail=wag&cute=true');
expect(spans[1].data['http.fragment']).toEqual('learn-more');
});

it('filters the authority (username and password) in span description', () => {
nock('http://username:[email protected]').get('/').reply(200);

const transaction = createTransactionOnScope();
const spans = (transaction as unknown as Span).spanRecorder?.spans as Span[];

http.get('http://username:[email protected]/');

expect(spans.length).toEqual(2);

// our span is at index 1 because the transaction itself is at index 0
expect(spans[1].description).toEqual('GET http://[Filtered]:[Filtered]@dogs.are.great/');
});

describe('Tracing options', () => {
beforeEach(() => {
// hacky way of restoring monkey patched functions
Expand Down