Skip to content

Commit 981faf2

Browse files
committed
feat(backend): implement sanitization utilities for XSS prevention
chore(backend): update dependencies for security and performance refactor(backend): replace DOMPurify with sanitize-html for sanitization test(backend): add unit tests for sanitization utilities fix(backend): update OAuth state cookie format in tests
1 parent 4420743 commit 981faf2

File tree

9 files changed

+795
-697
lines changed

9 files changed

+795
-697
lines changed

package-lock.json

Lines changed: 228 additions & 542 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"devDependencies": {
2828
"markdownlint-cli2": "^0.20.0",
29-
"webpack": "^5.102.1"
29+
"webpack": "^5.104.1"
3030
},
3131
"overrides": {
3232
"glob": "^10.4.5",

services/backend/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
"drizzle-orm": "^0.45.1",
3939
"fastify": "^5.6.2",
4040
"fastify-favicon": "^5.0.0",
41-
"isomorphic-dompurify": "^2.35.0",
4241
"lucia": "^3.2.2",
4342
"nanoid": "^5.1.6",
4443
"node-cron": "^4.2.1",
@@ -60,6 +59,7 @@
6059
"@types/nodemailer": "^7.0.3",
6160
"@types/pg": "^8.16.0",
6261
"@types/pug": "^2.0.10",
62+
"@types/sanitize-html": "^2.16.0",
6363
"@types/supertest": "^6.0.3",
6464
"@typescript-eslint/eslint-plugin": "^8.49.0",
6565
"@typescript-eslint/parser": "^8.52.0",
@@ -73,6 +73,7 @@
7373
"node-loader": "^2.1.0",
7474
"nodemon": "^3.1.11",
7575
"release-it": "^19.2.3",
76+
"sanitize-html": "^2.17.0",
7677
"supertest": "^7.1.4",
7778
"ts-jest": "^29.4.6",
7879
"ts-loader": "^9.5.4",

services/backend/src/routes/mcp/installations/callback.ts

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { encrypt, decrypt } from '../../../utils/encryption';
66
import { GlobalSettingsInitService } from '../../../global-settings';
77
import { GlobalSettings } from '../../../global-settings';
88
import { nanoid } from 'nanoid';
9+
import { sanitizeText } from '../../../utils/sanitization';
910
import {
1011
OAUTH_CALLBACK_QUERY_SCHEMA,
1112
FLOW_ID_PARAM_SCHEMA,
@@ -70,15 +71,15 @@ export default async function oauthCallbackRoute(server: FastifyInstance) {
7071
</head>
7172
<body>
7273
<h1>Authorization Failed</h1>
73-
<p><strong>Error:</strong> ${escapeHtml(query.error)}</p>
74-
${query.error_description ? `<p>${escapeHtml(query.error_description)}</p>` : ''}
74+
<p><strong>Error:</strong> ${sanitizeText(query.error)}</p>
75+
${query.error_description ? `<p>${sanitizeText(query.error_description)}</p>` : ''}
7576
<p>Closing window...</p>
7677
<script>
7778
// Post error message to parent window
7879
if (window.opener) {
7980
window.opener.postMessage({
8081
type: 'oauth_error',
81-
error: '${escapeHtml(errorMsg)}'
82+
error: '${sanitizeText(errorMsg)}'
8283
}, '${frontendUrl}');
8384
}
8485
@@ -589,7 +590,7 @@ export default async function oauthCallbackRoute(server: FastifyInstance) {
589590
if (window.opener) {
590591
window.opener.postMessage({
591592
type: 'oauth_error',
592-
error: '${escapeHtml(errorMessage)}'
593+
error: '${sanitizeText(errorMessage)}'
593594
}, '${frontendUrl}');
594595
}
595596
@@ -604,18 +605,4 @@ export default async function oauthCallbackRoute(server: FastifyInstance) {
604605
}
605606
}
606607
);
607-
}
608-
609-
/**
610-
* Escape HTML to prevent XSS
611-
*/
612-
function escapeHtml(text: string): string {
613-
const map: Record<string, string> = {
614-
'&': '&amp;',
615-
'<': '&lt;',
616-
'>': '&gt;',
617-
'"': '&quot;',
618-
"'": '&#039;',
619-
};
620-
return text.replace(/[&<>"']/g, (m) => map[m]);
621-
}
608+
}

services/backend/src/services/githubReadmeService.ts

Lines changed: 16 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* eslint-disable @typescript-eslint/no-explicit-any */
22
import { GlobalSettings } from '../global-settings';
33
import type { FastifyBaseLogger } from 'fastify';
4-
import DOMPurify from 'isomorphic-dompurify';
4+
import { sanitizeMarkdown } from '../utils/sanitization';
55

66
export interface GitHubReadmeResult {
77
content: string;
@@ -11,52 +11,6 @@ export interface GitHubReadmeResult {
1111
// Maximum README size: 2MB (prevents DoS attacks)
1212
const MAX_README_SIZE_BYTES = 2 * 1024 * 1024;
1313

14-
// DOMPurify configuration optimized for GitHub Markdown
15-
const SANITIZE_CONFIG = {
16-
// Allow common markdown/HTML tags
17-
ALLOWED_TAGS: [
18-
// Text formatting
19-
'p', 'br', 'strong', 'em', 'u', 's', 'del', 'ins',
20-
// Headings
21-
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
22-
// Lists
23-
'ul', 'ol', 'li', 'dl', 'dt', 'dd',
24-
// Links and code
25-
'a', 'code', 'pre', 'blockquote',
26-
// Images
27-
'img', 'picture', 'source',
28-
// Tables
29-
'table', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th',
30-
// Containers
31-
'div', 'span', 'section', 'article',
32-
// GitHub-specific
33-
'details', 'summary',
34-
// Other
35-
'hr', 'sup', 'sub'
36-
],
37-
38-
// Allow safe attributes
39-
ALLOWED_ATTR: [
40-
'href', 'src', 'alt', 'title', 'class', 'id',
41-
'width', 'height', 'align',
42-
'colspan', 'rowspan',
43-
'type', 'start', 'reversed',
44-
'open' // for <details> tag
45-
],
46-
47-
// Only allow safe URL protocols (prevents javascript: attacks)
48-
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i,
49-
50-
// Keep content of removed tags (don't lose text)
51-
KEEP_CONTENT: true,
52-
53-
// Security settings
54-
ALLOW_DATA_ATTR: false, // No data-* attributes
55-
ALLOW_UNKNOWN_PROTOCOLS: false, // Block unknown protocols
56-
SANITIZE_DOM: true, // Enable DOM Clobbering protection
57-
FORCE_BODY: false, // Don't force body wrapper
58-
};
59-
6014
export class GitHubReadmeService {
6115

6216
/**
@@ -351,43 +305,39 @@ export class GitHubReadmeService {
351305
repo,
352306
original_size: decodedContent.length
353307
}, 'Starting README sanitization');
354-
355-
const sanitizedContent = DOMPurify.sanitize(decodedContent, SANITIZE_CONFIG);
356-
357-
// Calculate how much content was removed
358-
const removalBytes = decodedContent.length - sanitizedContent.length;
359-
const removalPercentage = ((removalBytes / decodedContent.length) * 100);
360-
308+
309+
const sanitizationResult = sanitizeMarkdown(decodedContent);
310+
361311
// Log warning if significant content was removed (possible malicious content)
362-
if (removalPercentage > 10) {
312+
if (sanitizationResult.removalPercentage > 10) {
363313
logger.warn({
364314
operation: 'github_readme_get_content',
365315
step: 'high_sanitization_removal',
366316
owner,
367317
repo,
368-
removal_percentage: removalPercentage.toFixed(2),
369-
removal_bytes: removalBytes,
318+
removal_percentage: sanitizationResult.removalPercentage.toFixed(2),
319+
removal_bytes: sanitizationResult.removalBytes,
370320
repository_url: repositoryUrl
371-
}, `High sanitization removal rate detected: ${removalPercentage.toFixed(2)}% of content removed`);
321+
}, `High sanitization removal rate detected: ${sanitizationResult.removalPercentage.toFixed(2)}% of content removed`);
372322
}
373-
323+
374324
const duration = Date.now() - startTime;
375-
325+
376326
logger.debug({
377327
operation: 'github_readme_get_content',
378328
step: 'complete',
379329
owner,
380330
repo,
381-
original_size: decodedContent.length,
382-
sanitized_size: sanitizedContent.length,
383-
removal_bytes: removalBytes,
384-
removal_percentage: removalPercentage.toFixed(2),
331+
original_size: sanitizationResult.originalLength,
332+
sanitized_size: sanitizationResult.sanitizedLength,
333+
removal_bytes: sanitizationResult.removalBytes,
334+
removal_percentage: sanitizationResult.removalPercentage.toFixed(2),
385335
duration_ms: duration,
386336
readme_name: data.name
387337
}, `Successfully fetched and sanitized README for ${owner}/${repo}`);
388-
338+
389339
return {
390-
content: sanitizedContent,
340+
content: sanitizationResult.content,
391341
encoding: 'utf8'
392342
};
393343

Lines changed: 9 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,23 @@
1-
import DOMPurify from 'isomorphic-dompurify';
2-
3-
/**
4-
* DOMPurify configuration for email text sanitization
5-
* - Only allows <br> tags (for newline preservation)
6-
* - Strips all other HTML and attributes
7-
* - Enables DOM sanitization for additional security
8-
*/
9-
const EMAIL_TEXT_SANITIZE_CONFIG = {
10-
ALLOWED_TAGS: ['br'],
11-
ALLOWED_ATTR: [],
12-
KEEP_CONTENT: true,
13-
ALLOW_DATA_ATTR: false,
14-
ALLOW_UNKNOWN_PROTOCOLS: false,
15-
SANITIZE_DOM: true,
16-
FORCE_BODY: false
17-
};
18-
19-
/**
20-
* Escape HTML entities to prevent XSS attacks
21-
*
22-
* This function converts potentially dangerous HTML characters into their
23-
* HTML entity equivalents, making them display as literal text rather than
24-
* being interpreted as HTML.
25-
*
26-
* @param text - Raw text that may contain HTML characters
27-
* @returns Text with HTML entities escaped
28-
*
29-
* @example
30-
* escapeHtml('<script>alert("xss")</script>')
31-
* // Returns: '&lt;script&gt;alert(&quot;xss&quot;)&lt;/script&gt;'
32-
*/
33-
function escapeHtml(text: string): string {
34-
return text
35-
.replace(/&/g, '&amp;') // Must be first to avoid double-escaping
36-
.replace(/</g, '&lt;')
37-
.replace(/>/g, '&gt;')
38-
.replace(/"/g, '&quot;')
39-
.replace(/'/g, '&#39;');
40-
}
1+
import { sanitizeForEmail } from './sanitization';
412

423
/**
434
* Sanitize user-provided text for safe email rendering
445
*
6+
* This is a wrapper around the centralized sanitizeForEmail() function.
7+
* Kept for backwards compatibility with existing imports.
8+
*
459
* This function implements a defense-in-depth approach to prevent XSS attacks
4610
* while preserving newlines for better email formatting:
4711
*
4812
* 1. Normalizes line endings (Windows \r\n → Unix \n)
4913
* 2. Trims leading/trailing whitespace
5014
* 3. Escapes HTML entities (prevents XSS)
5115
* 4. Converts newlines to <br> tags (preserves formatting)
52-
* 5. Sanitizes with DOMPurify (additional security layer)
16+
* 5. Sanitizes with sanitize-html (additional security layer)
5317
*
5418
* **Security Features**:
5519
* - Primary defense: HTML entity escaping prevents all XSS vectors
56-
* - Secondary defense: DOMPurify catches any escaping bugs
20+
* - Secondary defense: sanitize-html catches any escaping bugs
5721
* - Preserves user intent: Users can type HTML-like text (e.g., "The <button> component")
5822
* and it displays literally, not stripped
5923
*
@@ -65,6 +29,8 @@ function escapeHtml(text: string): string {
6529
* @param text - Raw user input from textarea (may contain newlines, HTML, etc.)
6630
* @returns Sanitized HTML string with <br> tags, safe for email rendering
6731
*
32+
* @see sanitization.ts for implementation details
33+
*
6834
* @example
6935
* sanitizeUserTextForEmail('Hello\nWorld')
7036
* // Returns: 'Hello<br>World'
@@ -78,28 +44,5 @@ function escapeHtml(text: string): string {
7844
* // Returns: 'The &lt;div&gt; tag<br>is broken'
7945
*/
8046
export function sanitizeUserTextForEmail(text: string): string {
81-
if (!text) return '';
82-
83-
// 1. Normalize line endings (Windows \r\n → Unix \n)
84-
let sanitized = text.replace(/\r\n/g, '\n');
85-
86-
// 2. Trim leading/trailing whitespace
87-
sanitized = sanitized.trim();
88-
89-
// Return empty string if only whitespace
90-
if (!sanitized) return '';
91-
92-
// 3. Escape HTML entities (CRITICAL: do this BEFORE adding <br> tags)
93-
// This ensures user-typed HTML displays as literal text
94-
sanitized = escapeHtml(sanitized);
95-
96-
// 4. Convert newlines to <br> tags for email rendering
97-
// After escaping, we can safely add our trusted <br> tags
98-
sanitized = sanitized.replace(/\n/g, '<br>');
99-
100-
// 5. Sanitize with DOMPurify as defense-in-depth
101-
// This catches any potential bugs in our escaping logic
102-
sanitized = DOMPurify.sanitize(sanitized, EMAIL_TEXT_SANITIZE_CONFIG);
103-
104-
return sanitized;
47+
return sanitizeForEmail(text);
10548
}

0 commit comments

Comments
 (0)