tools/BashTool/sedValidation.ts at main

oppi.li / claude-code
fork
source dump of claude code
fork
claude-code / tools / BashTool / sedValidation.ts
at main 684 lines 22 kB view raw
wrap content
Akshay Oppiliappan dump from zip 5w ago
63aada3f
  1import type { ToolPermissionContext } from '../../Tool.js'
  2import { splitCommand_DEPRECATED } from '../../utils/bash/commands.js'
  3import { tryParseShellCommand } from '../../utils/bash/shellQuote.js'
  4import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'
  5
  6/**
  7 * Helper: Validate flags against an allowlist
  8 * Handles both single flags and combined flags (e.g., -nE)
  9 * @param flags Array of flags to validate
 10 * @param allowedFlags Array of allowed single-character and long flags
 11 * @returns true if all flags are valid, false otherwise
 12 */
 13function validateFlagsAgainstAllowlist(
 14  flags: string[],
 15  allowedFlags: string[],
 16): boolean {
 17  for (const flag of flags) {
 18    // Handle combined flags like -nE or -Er
 19    if (flag.startsWith('-') && !flag.startsWith('--') && flag.length > 2) {
 20      // Check each character in combined flag
 21      for (let i = 1; i < flag.length; i++) {
 22        const singleFlag = '-' + flag[i]
 23        if (!allowedFlags.includes(singleFlag)) {
 24          return false
 25        }
 26      }
 27    } else {
 28      // Single flag or long flag
 29      if (!allowedFlags.includes(flag)) {
 30        return false
 31      }
 32    }
 33  }
 34  return true
 35}
 36
 37/**
 38 * Pattern 1: Check if this is a line printing command with -n flag
 39 * Allows: sed -n 'N' | sed -n 'N,M' with optional -E, -r, -z flags
 40 * Allows semicolon-separated print commands like: sed -n '1p;2p;3p'
 41 * File arguments are ALLOWED for this pattern
 42 * @internal Exported for testing
 43 */
 44export function isLinePrintingCommand(
 45  command: string,
 46  expressions: string[],
 47): boolean {
 48  const sedMatch = command.match(/^\s*sed\s+/)
 49  if (!sedMatch) return false
 50
 51  const withoutSed = command.slice(sedMatch[0].length)
 52  const parseResult = tryParseShellCommand(withoutSed)
 53  if (!parseResult.success) return false
 54  const parsed = parseResult.tokens
 55
 56  // Extract all flags
 57  const flags: string[] = []
 58  for (const arg of parsed) {
 59    if (typeof arg === 'string' && arg.startsWith('-') && arg !== '--') {
 60      flags.push(arg)
 61    }
 62  }
 63
 64  // Validate flags - only allow -n, -E, -r, -z and their long forms
 65  const allowedFlags = [
 66    '-n',
 67    '--quiet',
 68    '--silent',
 69    '-E',
 70    '--regexp-extended',
 71    '-r',
 72    '-z',
 73    '--zero-terminated',
 74    '--posix',
 75  ]
 76
 77  if (!validateFlagsAgainstAllowlist(flags, allowedFlags)) {
 78    return false
 79  }
 80
 81  // Check if -n flag is present (required for Pattern 1)
 82  let hasNFlag = false
 83  for (const flag of flags) {
 84    if (flag === '-n' || flag === '--quiet' || flag === '--silent') {
 85      hasNFlag = true
 86      break
 87    }
 88    // Check in combined flags
 89    if (flag.startsWith('-') && !flag.startsWith('--') && flag.includes('n')) {
 90      hasNFlag = true
 91      break
 92    }
 93  }
 94
 95  // Must have -n flag for Pattern 1
 96  if (!hasNFlag) {
 97    return false
 98  }
 99
100  // Must have at least one expression
101  if (expressions.length === 0) {
102    return false
103  }
104
105  // All expressions must be print commands (strict allowlist)
106  // Allow semicolon-separated commands
107  for (const expr of expressions) {
108    const commands = expr.split(';')
109    for (const cmd of commands) {
110      if (!isPrintCommand(cmd.trim())) {
111        return false
112      }
113    }
114  }
115
116  return true
117}
118
119/**
120 * Helper: Check if a single command is a valid print command
121 * STRICT ALLOWLIST - only these exact forms are allowed:
122 * - p (print all)
123 * - Np (print line N, where N is digits)
124 * - N,Mp (print lines N through M)
125 * Anything else (including w, W, e, E commands) is rejected.
126 * @internal Exported for testing
127 */
128export function isPrintCommand(cmd: string): boolean {
129  if (!cmd) return false
130  // Single strict regex that only matches allowed print commands
131  // ^(?:\d+|\d+,\d+)?p$ matches: p, 1p, 123p, 1,5p, 10,200p
132  return /^(?:\d+|\d+,\d+)?p$/.test(cmd)
133}
134
135/**
136 * Pattern 2: Check if this is a substitution command
137 * Allows: sed 's/pattern/replacement/flags' where flags are only: g, p, i, I, m, M, 1-9
138 * When allowFileWrites is true, allows -i flag and file arguments for in-place editing
139 * When allowFileWrites is false (default), requires stdout-only (no file arguments, no -i flag)
140 * @internal Exported for testing
141 */
142function isSubstitutionCommand(
143  command: string,
144  expressions: string[],
145  hasFileArguments: boolean,
146  options?: { allowFileWrites?: boolean },
147): boolean {
148  const allowFileWrites = options?.allowFileWrites ?? false
149
150  // When not allowing file writes, must NOT have file arguments
151  if (!allowFileWrites && hasFileArguments) {
152    return false
153  }
154
155  const sedMatch = command.match(/^\s*sed\s+/)
156  if (!sedMatch) return false
157
158  const withoutSed = command.slice(sedMatch[0].length)
159  const parseResult = tryParseShellCommand(withoutSed)
160  if (!parseResult.success) return false
161  const parsed = parseResult.tokens
162
163  // Extract all flags
164  const flags: string[] = []
165  for (const arg of parsed) {
166    if (typeof arg === 'string' && arg.startsWith('-') && arg !== '--') {
167      flags.push(arg)
168    }
169  }
170
171  // Validate flags based on mode
172  // Base allowed flags for both modes
173  const allowedFlags = ['-E', '--regexp-extended', '-r', '--posix']
174
175  // When allowing file writes, also permit -i and --in-place
176  if (allowFileWrites) {
177    allowedFlags.push('-i', '--in-place')
178  }
179
180  if (!validateFlagsAgainstAllowlist(flags, allowedFlags)) {
181    return false
182  }
183
184  // Must have exactly one expression
185  if (expressions.length !== 1) {
186    return false
187  }
188
189  const expr = expressions[0]!.trim()
190
191  // STRICT ALLOWLIST: Must be exactly a substitution command starting with 's'
192  // This rejects standalone commands like 'e', 'w file', etc.
193  if (!expr.startsWith('s')) {
194    return false
195  }
196
197  // Parse substitution: s/pattern/replacement/flags
198  // Only allow / as delimiter (strict)
199  const substitutionMatch = expr.match(/^s\/(.*?)$/)
200  if (!substitutionMatch) {
201    return false
202  }
203
204  const rest = substitutionMatch[1]!
205
206  // Find the positions of / delimiters
207  let delimiterCount = 0
208  let lastDelimiterPos = -1
209  let i = 0
210  while (i < rest.length) {
211    if (rest[i] === '\\') {
212      // Skip escaped character
213      i += 2
214      continue
215    }
216    if (rest[i] === '/') {
217      delimiterCount++
218      lastDelimiterPos = i
219    }
220    i++
221  }
222
223  // Must have found exactly 2 delimiters (pattern and replacement)
224  if (delimiterCount !== 2) {
225    return false
226  }
227
228  // Extract flags (everything after the last delimiter)
229  const exprFlags = rest.slice(lastDelimiterPos + 1)
230
231  // Validate flags: only allow g, p, i, I, m, M, and optionally ONE digit 1-9
232  const allowedFlagChars = /^[gpimIM]*[1-9]?[gpimIM]*$/
233  if (!allowedFlagChars.test(exprFlags)) {
234    return false
235  }
236
237  return true
238}
239
240/**
241 * Checks if a sed command is allowed by the allowlist.
242 * The allowlist patterns themselves are strict enough to reject dangerous operations.
243 * @param command The sed command to check
244 * @param options.allowFileWrites When true, allows -i flag and file arguments for substitution commands
245 * @returns true if the command is allowed (matches allowlist and passes denylist check), false otherwise
246 */
247export function sedCommandIsAllowedByAllowlist(
248  command: string,
249  options?: { allowFileWrites?: boolean },
250): boolean {
251  const allowFileWrites = options?.allowFileWrites ?? false
252
253  // Extract sed expressions (content inside quotes where actual sed commands live)
254  let expressions: string[]
255  try {
256    expressions = extractSedExpressions(command)
257  } catch (_error) {
258    // If parsing failed, treat as not allowed
259    return false
260  }
261
262  // Check if sed command has file arguments
263  const hasFileArguments = hasFileArgs(command)
264
265  // Check if command matches allowlist patterns
266  let isPattern1 = false
267  let isPattern2 = false
268
269  if (allowFileWrites) {
270    // When allowing file writes, only check substitution commands (Pattern 2 variant)
271    // Pattern 1 (line printing) doesn't need file writes
272    isPattern2 = isSubstitutionCommand(command, expressions, hasFileArguments, {
273      allowFileWrites: true,
274    })
275  } else {
276    // Standard read-only mode: check both patterns
277    isPattern1 = isLinePrintingCommand(command, expressions)
278    isPattern2 = isSubstitutionCommand(command, expressions, hasFileArguments)
279  }
280
281  if (!isPattern1 && !isPattern2) {
282    return false
283  }
284
285  // Pattern 2 does not allow semicolons (command separators)
286  // Pattern 1 allows semicolons for separating print commands
287  for (const expr of expressions) {
288    if (isPattern2 && expr.includes(';')) {
289      return false
290    }
291  }
292
293  // Defense-in-depth: Even if allowlist matches, check denylist
294  for (const expr of expressions) {
295    if (containsDangerousOperations(expr)) {
296      return false
297    }
298  }
299
300  return true
301}
302
303/**
304 * Check if a sed command has file arguments (not just stdin)
305 * @internal Exported for testing
306 */
307export function hasFileArgs(command: string): boolean {
308  const sedMatch = command.match(/^\s*sed\s+/)
309  if (!sedMatch) return false
310
311  const withoutSed = command.slice(sedMatch[0].length)
312  const parseResult = tryParseShellCommand(withoutSed)
313  if (!parseResult.success) return true
314  const parsed = parseResult.tokens
315
316  try {
317    let argCount = 0
318    let hasEFlag = false
319
320    for (let i = 0; i < parsed.length; i++) {
321      const arg = parsed[i]
322
323      // Handle both string arguments and glob patterns (like *.log)
324      if (typeof arg !== 'string' && typeof arg !== 'object') continue
325
326      // If it's a glob pattern, it counts as a file argument
327      if (
328        typeof arg === 'object' &&
329        arg !== null &&
330        'op' in arg &&
331        arg.op === 'glob'
332      ) {
333        return true
334      }
335
336      // Skip non-string arguments that aren't glob patterns
337      if (typeof arg !== 'string') continue
338
339      // Handle -e flag followed by expression
340      if ((arg === '-e' || arg === '--expression') && i + 1 < parsed.length) {
341        hasEFlag = true
342        i++ // Skip the next argument since it's the expression
343        continue
344      }
345
346      // Handle --expression=value format
347      if (arg.startsWith('--expression=')) {
348        hasEFlag = true
349        continue
350      }
351
352      // Handle -e=value format (non-standard but defense in depth)
353      if (arg.startsWith('-e=')) {
354        hasEFlag = true
355        continue
356      }
357
358      // Skip other flags
359      if (arg.startsWith('-')) continue
360
361      argCount++
362
363      // If we used -e flags, ALL non-flag arguments are file arguments
364      if (hasEFlag) {
365        return true
366      }
367
368      // If we didn't use -e flags, the first non-flag argument is the sed expression,
369      // so we need more than 1 non-flag argument to have file arguments
370      if (argCount > 1) {
371        return true
372      }
373    }
374
375    return false
376  } catch (_error) {
377    return true // Assume dangerous if parsing fails
378  }
379}
380
381/**
382 * Extract sed expressions from command, ignoring flags and filenames
383 * @param command Full sed command
384 * @returns Array of sed expressions to check for dangerous operations
385 * @throws Error if parsing fails
386 * @internal Exported for testing
387 */
388export function extractSedExpressions(command: string): string[] {
389  const expressions: string[] = []
390
391  // Calculate withoutSed by trimming off the first N characters (removing 'sed ')
392  const sedMatch = command.match(/^\s*sed\s+/)
393  if (!sedMatch) return expressions
394
395  const withoutSed = command.slice(sedMatch[0].length)
396
397  // Reject dangerous flag combinations like -ew, -eW, -ee, -we (combined -e/-w with dangerous commands)
398  if (/-e[wWe]/.test(withoutSed) || /-w[eE]/.test(withoutSed)) {
399    throw new Error('Dangerous flag combination detected')
400  }
401
402  // Use shell-quote to parse the arguments properly
403  const parseResult = tryParseShellCommand(withoutSed)
404  if (!parseResult.success) {
405    // Malformed shell syntax - throw error to be caught by caller
406    throw new Error(`Malformed shell syntax: ${parseResult.error}`)
407  }
408  const parsed = parseResult.tokens
409  try {
410    let foundEFlag = false
411    let foundExpression = false
412
413    for (let i = 0; i < parsed.length; i++) {
414      const arg = parsed[i]
415
416      // Skip non-string arguments (like control operators)
417      if (typeof arg !== 'string') continue
418
419      // Handle -e flag followed by expression
420      if ((arg === '-e' || arg === '--expression') && i + 1 < parsed.length) {
421        foundEFlag = true
422        const nextArg = parsed[i + 1]
423        if (typeof nextArg === 'string') {
424          expressions.push(nextArg)
425          i++ // Skip the next argument since we consumed it
426        }
427        continue
428      }
429
430      // Handle --expression=value format
431      if (arg.startsWith('--expression=')) {
432        foundEFlag = true
433        expressions.push(arg.slice('--expression='.length))
434        continue
435      }
436
437      // Handle -e=value format (non-standard but defense in depth)
438      if (arg.startsWith('-e=')) {
439        foundEFlag = true
440        expressions.push(arg.slice('-e='.length))
441        continue
442      }
443
444      // Skip other flags
445      if (arg.startsWith('-')) continue
446
447      // If we haven't found any -e flags, the first non-flag argument is the sed expression
448      if (!foundEFlag && !foundExpression) {
449        expressions.push(arg)
450        foundExpression = true
451        continue
452      }
453
454      // If we've already found -e flags or a standalone expression,
455      // remaining non-flag arguments are filenames
456      break
457    }
458  } catch (error) {
459    // If shell-quote parsing fails, treat the sed command as unsafe
460    throw new Error(
461      `Failed to parse sed command: ${error instanceof Error ? error.message : 'Unknown error'}`,
462    )
463  }
464
465  return expressions
466}
467
468/**
469 * Check if a sed expression contains dangerous operations (denylist)
470 * @param expression Single sed expression (without quotes)
471 * @returns true if dangerous, false if safe
472 */
473function containsDangerousOperations(expression: string): boolean {
474  const cmd = expression.trim()
475  if (!cmd) return false
476
477  // CONSERVATIVE REJECTIONS: Broadly reject patterns that could be dangerous
478  // When in doubt, treat as unsafe
479
480  // Reject non-ASCII characters (Unicode homoglyphs, combining chars, etc.)
481  // Examples: ｗ (fullwidth), ᴡ (small capital), w̃ (combining tilde)
482  // Check for characters outside ASCII range (0x01-0x7F, excluding null byte)
483  // eslint-disable-next-line no-control-regex
484  if (/[^\x01-\x7F]/.test(cmd)) {
485    return true
486  }
487
488  // Reject curly braces (blocks) - too complex to parse
489  if (cmd.includes('{') || cmd.includes('}')) {
490    return true
491  }
492
493  // Reject newlines - multi-line commands are too complex
494  if (cmd.includes('\n')) {
495    return true
496  }
497
498  // Reject comments (# not immediately after s command)
499  // Comments look like: #comment or start with #
500  // Delimiter looks like: s#pattern#replacement#
501  const hashIndex = cmd.indexOf('#')
502  if (hashIndex !== -1 && !(hashIndex > 0 && cmd[hashIndex - 1] === 's')) {
503    return true
504  }
505
506  // Reject negation operator
507  // Negation can appear: at start (!/pattern/), after address (/pattern/!, 1,10!, $!)
508  // Delimiter looks like: s!pattern!replacement! (has 's' before it)
509  if (/^!/.test(cmd) || /[/\d$]!/.test(cmd)) {
510    return true
511  }
512
513  // Reject tilde in GNU step address format (digit~digit, ,~digit, or $~digit)
514  // Allow whitespace around tilde
515  if (/\d\s*~\s*\d|,\s*~\s*\d|\$\s*~\s*\d/.test(cmd)) {
516    return true
517  }
518
519  // Reject comma at start (bare comma is shorthand for 1,$ address range)
520  if (/^,/.test(cmd)) {
521    return true
522  }
523
524  // Reject comma followed by +/- (GNU offset addresses)
525  if (/,\s*[+-]/.test(cmd)) {
526    return true
527  }
528
529  // Reject backslash tricks:
530  // 1. s\ (substitution with backslash delimiter)
531  // 2. \X where X could be an alternate delimiter (|, #, %, etc.) - not regex escapes
532  if (/s\\/.test(cmd) || /\\[|#%@]/.test(cmd)) {
533    return true
534  }
535
536  // Reject escaped slashes followed by w/W (patterns like /\/path\/to\/file/w)
537  if (/\\\/.*[wW]/.test(cmd)) {
538    return true
539  }
540
541  // Reject malformed/suspicious patterns we don't understand
542  // If there's a slash followed by non-slash chars, then whitespace, then dangerous commands
543  // Examples: /pattern w file, /pattern e cmd, /foo X;w file
544  if (/\/[^/]*\s+[wWeE]/.test(cmd)) {
545    return true
546  }
547
548  // Reject malformed substitution commands that don't follow normal pattern
549  // Examples: s/foobareoutput.txt (missing delimiters), s/foo/bar//w (extra delimiter)
550  if (/^s\//.test(cmd) && !/^s\/[^/]*\/[^/]*\/[^/]*$/.test(cmd)) {
551    return true
552  }
553
554  // PARANOID: Reject any command starting with 's' that ends with dangerous chars (w, W, e, E)
555  // and doesn't match our known safe substitution pattern. This catches malformed s commands
556  // with non-slash delimiters that might be trying to use dangerous flags.
557  if (/^s./.test(cmd) && /[wWeE]$/.test(cmd)) {
558    // Check if it's a properly formed substitution (any delimiter, not just /)
559    const properSubst = /^s([^\\\n]).*?\1.*?\1[^wWeE]*$/.test(cmd)
560    if (!properSubst) {
561      return true
562    }
563  }
564
565  // Check for dangerous write commands
566  // Patterns: [address]w filename, [address]W filename, /pattern/w filename, /pattern/W filename
567  // Simplified to avoid exponential backtracking (CodeQL issue)
568  // Check for w/W in contexts where it would be a command (with optional whitespace)
569  if (
570    /^[wW]\s*\S+/.test(cmd) || // At start: w file
571    /^\d+\s*[wW]\s*\S+/.test(cmd) || // After line number: 1w file or 1 w file
572    /^\$\s*[wW]\s*\S+/.test(cmd) || // After $: $w file or $ w file
573    /^\/[^/]*\/[IMim]*\s*[wW]\s*\S+/.test(cmd) || // After pattern: /pattern/w file
574    /^\d+,\d+\s*[wW]\s*\S+/.test(cmd) || // After range: 1,10w file
575    /^\d+,\$\s*[wW]\s*\S+/.test(cmd) || // After range: 1,$w file
576    /^\/[^/]*\/[IMim]*,\/[^/]*\/[IMim]*\s*[wW]\s*\S+/.test(cmd) // After pattern range: /s/,/e/w file
577  ) {
578    return true
579  }
580
581  // Check for dangerous execute commands
582  // Patterns: [address]e [command], /pattern/e [command], or commands starting with e
583  // Simplified to avoid exponential backtracking (CodeQL issue)
584  // Check for e in contexts where it would be a command (with optional whitespace)
585  if (
586    /^e/.test(cmd) || // At start: e cmd
587    /^\d+\s*e/.test(cmd) || // After line number: 1e or 1 e
588    /^\$\s*e/.test(cmd) || // After $: $e or $ e
589    /^\/[^/]*\/[IMim]*\s*e/.test(cmd) || // After pattern: /pattern/e
590    /^\d+,\d+\s*e/.test(cmd) || // After range: 1,10e
591    /^\d+,\$\s*e/.test(cmd) || // After range: 1,$e
592    /^\/[^/]*\/[IMim]*,\/[^/]*\/[IMim]*\s*e/.test(cmd) // After pattern range: /s/,/e/e
593  ) {
594    return true
595  }
596
597  // Check for substitution commands with dangerous flags
598  // Pattern: s<delim>pattern<delim>replacement<delim>flags where flags contain w or e
599  // Per POSIX, sed allows any character except backslash and newline as delimiter
600  const substitutionMatch = cmd.match(/s([^\\\n]).*?\1.*?\1(.*?)$/)
601  if (substitutionMatch) {
602    const flags = substitutionMatch[2] || ''
603
604    // Check for write flag: s/old/new/w filename or s/old/new/gw filename
605    if (flags.includes('w') || flags.includes('W')) {
606      return true
607    }
608
609    // Check for execute flag: s/old/new/e or s/old/new/ge
610    if (flags.includes('e') || flags.includes('E')) {
611      return true
612    }
613  }
614
615  // Check for y (transliterate) command followed by dangerous operations
616  // Pattern: y<delim>source<delim>dest<delim> followed by anything
617  // The y command uses same delimiter syntax as s command
618  // PARANOID: Reject any y command that has w/W/e/E anywhere after the delimiters
619  const yCommandMatch = cmd.match(/y([^\\\n])/)
620  if (yCommandMatch) {
621    // If we see a y command, check if there's any w, W, e, or E in the entire command
622    // This is paranoid but safe - y commands are rare and w/e after y is suspicious
623    if (/[wWeE]/.test(cmd)) {
624      return true
625    }
626  }
627
628  return false
629}
630
631/**
632 * Cross-cutting validation step for sed commands.
633 *
634 * This is a constraint check that blocks dangerous sed operations regardless of mode.
635 * It returns 'passthrough' for non-sed commands or safe sed commands,
636 * and 'ask' for dangerous sed operations (w/W/e/E commands).
637 *
638 * @param input - Object containing the command string
639 * @param toolPermissionContext - Context containing mode and permissions
640 * @returns
641 * - 'ask' if any sed command contains dangerous operations
642 * - 'passthrough' if no sed commands or all are safe
643 */
644export function checkSedConstraints(
645  input: { command: string },
646  toolPermissionContext: ToolPermissionContext,
647): PermissionResult {
648  const commands = splitCommand_DEPRECATED(input.command)
649
650  for (const cmd of commands) {
651    // Skip non-sed commands
652    const trimmed = cmd.trim()
653    const baseCmd = trimmed.split(/\s+/)[0]
654    if (baseCmd !== 'sed') {
655      continue
656    }
657
658    // In acceptEdits mode, allow file writes (-i flag) but still block dangerous operations
659    const allowFileWrites = toolPermissionContext.mode === 'acceptEdits'
660
661    const isAllowed = sedCommandIsAllowedByAllowlist(trimmed, {
662      allowFileWrites,
663    })
664
665    if (!isAllowed) {
666      return {
667        behavior: 'ask',
668        message:
669          'sed command requires approval (contains potentially dangerous operations)',
670        decisionReason: {
671          type: 'other',
672          reason:
673            'sed command contains operations that require explicit approval (e.g., write commands, execute commands)',
674        },
675      }
676    }
677  }
678
679  // No dangerous sed commands found (or no sed commands at all)
680  return {
681    behavior: 'passthrough',
682    message: 'No dangerous sed operations detected',
683  }
684}
Configure Feed

Configure Feed