diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ab66b77f..3cd3410eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Fixed -- Properly map all hotkeys in UI based on the platform [#784](https://github.com/sourcebot-dev/sourcebot/pull/784) +- Properly map all hotkeys in UI based on the platform. [#784](https://github.com/sourcebot-dev/sourcebot/pull/784) +- Properly handle regex filters that include parenthesis. [#786](https://github.com/sourcebot-dev/sourcebot/pull/786) ## [4.10.16] - 2026-01-22 diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 43c08f248..c34df5168 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -3,6 +3,7 @@ // Entry point for the MCP server import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; +import { preprocessRegexp } from '@sourcebot/shared'; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; import { listRepos, search, getFileSource } from './client.js'; @@ -25,7 +26,8 @@ server.tool( If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable. If the \`includeCodeSnippets\` property is true, code snippets containing the matches will be included in the response. Only set this to true if the request requires code snippets (e.g., show me examples where library X is used). When referencing a file in your response, **ALWAYS** include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out. - **ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty.`, + **ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty. + If the request is asking to search for a specific file or results for files in a specific file path, **YOU MUST** ensure that the \`filterByFile\` property is used.`, { query: z .string() @@ -41,6 +43,10 @@ server.tool( .array(z.string()) .describe(`Scope the search to the provided languages. The language MUST be formatted as a GitHub linguist language. Examples: Python, JavaScript, TypeScript, Java, C#, C++, PHP, Go, Rust, Ruby, Swift, Kotlin, Shell, C, Dart, HTML, CSS, PowerShell, SQL, R`) .optional(), + filterByFile: z + .array(z.string()) + .describe("Scope the search to results inside filepaths that match the provided regex expression. By default all files are searched, so **only use this filter if you need to filter on specific files**. **YOU MUST** ensure that this is a valid regex expression and any special characters are properly escaped. If the regex expresion includes a paranthesis **YOU MUST** wrap this value in quotes when passing it in.") + .optional(), caseSensitive: z .boolean() .describe(`Whether the search should be case sensitive (default: false).`) @@ -58,6 +64,7 @@ server.tool( query, filterByRepoIds: repoIds = [], filterByLanguages: languages = [], + filterByFile: filePath = [], maxTokens = env.DEFAULT_MINIMUM_TOKENS, includeCodeSnippets = false, caseSensitive = false, @@ -70,6 +77,11 @@ server.tool( query += ` ( lang:${languages.join(' or lang:')} )`; } + if (filePath.length > 0) { + const quotedFilters = filePath.map(preprocessRegexp); + query += ` ( file:${quotedFilters.join(' or file:')} )`; + } + const response = await search({ query, matches: env.DEFAULT_MATCHES, diff --git a/packages/shared/src/index.client.ts b/packages/shared/src/index.client.ts index 8e2a161dd..54c71d263 100644 --- a/packages/shared/src/index.client.ts +++ b/packages/shared/src/index.client.ts @@ -4,4 +4,7 @@ export { } from "./env.client.js"; export { SOURCEBOT_VERSION, -} from "./version.js"; \ No newline at end of file +} from "./version.js"; +export { + preprocessRegexp, +} from "./query.js"; \ No newline at end of file diff --git a/packages/shared/src/index.server.ts b/packages/shared/src/index.server.ts index 0bd73a089..3f30b93df 100644 --- a/packages/shared/src/index.server.ts +++ b/packages/shared/src/index.server.ts @@ -49,4 +49,7 @@ export { } from "./db.js"; export { SOURCEBOT_VERSION, -} from "./version.js"; \ No newline at end of file +} from "./version.js"; +export { + preprocessRegexp, +} from "./query.js"; \ No newline at end of file diff --git a/packages/shared/src/query.ts b/packages/shared/src/query.ts new file mode 100644 index 000000000..093a427d2 --- /dev/null +++ b/packages/shared/src/query.ts @@ -0,0 +1,15 @@ +/** + * Wraps a value in quotes if it contains parentheses and isn't already quoted. + * + * This is needed because the query language does not allow values to include parenthesis unless they're quoted. This is + * due to the ParenExpr symbol which parses on these parenthesis. We instruct the agent to wrap the regexp in quotes + * but it's flaky, so we due it here as well as a backup plan to ensure the parser doesn't fail. + */ +export const preprocessRegexp = (value: string): string => { + const hasParentheses = value.includes('(') || value.includes(')'); + const isAlreadyQuoted = value.startsWith('"') && value.endsWith('"'); + if (hasParentheses && !isAlreadyQuoted) { + return `"${value}"`; + } + return value; +}; diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index f412789d1..5f3b7ea98 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -140,7 +140,8 @@ export type ReadFilesToolUIPart = ToolUIPart<{ [toolNames.readFiles]: ReadFilesT export const createCodeSearchTool = (selectedRepos: string[]) => tool({ description: `Fetches code that matches the provided regex pattern in \`query\`. This is NOT a semantic search. - Results are returned as an array of matching files, with the file's URL, repository, and language.`, + Results are returned as an array of matching files, with the file's URL, repository, and language. + If the request is asking to search for a file, or asking to only search for results within a specific filepath, **YOU MUST** use the fileNamesFilterRegexp to properly fulfil this request.`, inputSchema: z.object({ queryRegexp: z .string() @@ -168,7 +169,7 @@ Multiple expressions can be or'd together with or, negated with -, or grouped wi .optional(), fileNamesFilterRegexp: z .array(z.string()) - .describe(`Filter results from filepaths that match the regex. When this option is not specified, all files are searched.`) + .describe(`Filter results from filepaths that match the regex. When this option is not specified, all files are searched. If the regex expresion includes a paranthesis **YOU MUST** wrap this value in quotes when passing it in.`) .optional(), limit: z.number().default(10).describe("Maximum number of matches to return (default: 100)"), }), diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index c64f1ed3c..714b94d78 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -1,5 +1,6 @@ import { CreateUIMessage, TextUIPart, UIMessagePart } from "ai"; import { Descendant, Editor, Point, Range, Transforms } from "slate"; +import { preprocessRegexp } from "@sourcebot/shared/client"; import { ANSWER_TAG, FILE_REFERENCE_PREFIX, FILE_REFERENCE_REGEX } from "./constants"; import { CustomEditor, @@ -358,11 +359,13 @@ export const buildSearchQuery = (options: { } if (fileNamesFilterRegexp && fileNamesFilterRegexp.length > 0) { - query += ` ( file:${fileNamesFilterRegexp.join(' or file:')} )`; + const quotedFilters = fileNamesFilterRegexp.map(preprocessRegexp); + query += ` ( file:${quotedFilters.join(' or file:')} )`; } if (repoNamesFilterRegexp && repoNamesFilterRegexp.length > 0) { - query += ` ( repo:${repoNamesFilterRegexp.join(' or repo:')} )`; + const quotedFilters = repoNamesFilterRegexp.map(preprocessRegexp); + query += ` ( repo:${quotedFilters.join(' or repo:')} )`; } return query;