|
| 1 | +package ai.koog.agents.ext.tool.search |
| 2 | + |
| 3 | +import ai.koog.agents.core.tools.Tool |
| 4 | +import ai.koog.agents.core.tools.annotations.LLMDescription |
| 5 | +import ai.koog.agents.ext.tool.file.model.FileSystemEntry |
| 6 | +import ai.koog.agents.ext.tool.file.model.buildFileSize |
| 7 | +import ai.koog.prompt.text.text |
| 8 | +import ai.koog.rag.base.files.DocumentProvider |
| 9 | +import ai.koog.rag.base.files.FileMetadata |
| 10 | +import ai.koog.rag.base.files.FileSystemProvider |
| 11 | +import ai.koog.rag.base.files.extendRangeByLines |
| 12 | +import ai.koog.rag.base.files.readText |
| 13 | +import ai.koog.rag.base.files.toPosition |
| 14 | +import kotlinx.coroutines.CancellationException |
| 15 | +import kotlinx.coroutines.flow.Flow |
| 16 | +import kotlinx.coroutines.flow.drop |
| 17 | +import kotlinx.coroutines.flow.emitAll |
| 18 | +import kotlinx.coroutines.flow.flow |
| 19 | +import kotlinx.coroutines.flow.mapNotNull |
| 20 | +import kotlinx.coroutines.flow.take |
| 21 | +import kotlinx.coroutines.flow.toList |
| 22 | +import kotlinx.io.IOException |
| 23 | +import kotlinx.serialization.KSerializer |
| 24 | +import kotlinx.serialization.SerialName |
| 25 | +import kotlinx.serialization.Serializable |
| 26 | + |
| 27 | +/** |
| 28 | + * Regular expression based content search tool. |
| 29 | + * |
| 30 | + * Use to find occurrences of a regex pattern across text files under a path. |
| 31 | + */ |
| 32 | +public class RegexSearchTool<Path>( |
| 33 | + private val fs: FileSystemProvider.ReadOnly<Path>, |
| 34 | +) : Tool<RegexSearchTool.Args, RegexSearchTool.Result>() { |
| 35 | + |
| 36 | + override val name: String = "__search_contents_by_regex__" |
| 37 | + override val description: String = text { |
| 38 | + +"Executes a regular expression search on folder or file contents within the specified path." |
| 39 | + +"The tool returns structured results with file paths, line numbers, positions, and excerpts where the text was found." |
| 40 | + +"The tool will solely return search results and does not modify any files." |
| 41 | + } |
| 42 | + |
| 43 | + /** |
| 44 | + * Parameters for a regex content search. |
| 45 | + * |
| 46 | + * @property path Absolute start directory or file path. |
| 47 | + * @property regex Regex pattern to match in text files. |
| 48 | + * @property limit Max matching files to return (default: 25). |
| 49 | + * @property skip Matching files to skip (default: 0). |
| 50 | + * @property caseSensitive If true, case-sensitive match; otherwise ignore case. |
| 51 | + */ |
| 52 | + @Serializable |
| 53 | + public data class Args( |
| 54 | + @param:LLMDescription("Absolute starting directory or file path.") |
| 55 | + val path: String, |
| 56 | + @param:LLMDescription("Regular expression pattern.") |
| 57 | + val regex: String, |
| 58 | + @param:LLMDescription("Maximum number of matching files to return (pagination).") |
| 59 | + val limit: Int = 25, |
| 60 | + @param:LLMDescription("Number of matching files to skip (pagination).") |
| 61 | + val skip: Int = 0, |
| 62 | + @SerialName("case_sensitive") |
| 63 | + @param:LLMDescription("If false, performs case-insensitive matching.") |
| 64 | + val caseSensitive: Boolean = false, |
| 65 | + ) |
| 66 | + |
| 67 | + /** |
| 68 | + * Search output. |
| 69 | + * |
| 70 | + * @property entries Files with at least one match; each contains excerpt snippets around matches. |
| 71 | + * @property original The regex used for the search. |
| 72 | + */ |
| 73 | + @Serializable |
| 74 | + public data class Result(val entries: List<FileSystemEntry.File>, val original: String) |
| 75 | + |
| 76 | + override val argsSerializer: KSerializer<Args> = Args.serializer() |
| 77 | + override val resultSerializer: KSerializer<Result> = Result.serializer() |
| 78 | + |
| 79 | + override suspend fun execute(args: Args): Result { |
| 80 | + val path = fs.fromAbsolutePathString(args.path) |
| 81 | + val matches = search(path, args.regex, args.limit, args.skip, args.caseSensitive).toList() |
| 82 | + return Result(matches, original = args.regex) |
| 83 | + } |
| 84 | + |
| 85 | + private suspend fun search( |
| 86 | + path: Path, |
| 87 | + pattern: String, |
| 88 | + limit: Int, |
| 89 | + skip: Int, |
| 90 | + caseSensitive: Boolean, |
| 91 | + linesAroundSnippet: Int = 2, |
| 92 | + ): Flow<FileSystemEntry.File> { |
| 93 | + val options = mutableSetOf<RegexOption>() |
| 94 | + if (!caseSensitive) options.add(RegexOption.IGNORE_CASE) |
| 95 | + |
| 96 | + return searchByRegex( |
| 97 | + fs = fs, |
| 98 | + start = path, |
| 99 | + regex = Regex(pattern, options) |
| 100 | + ) |
| 101 | + .drop(skip) |
| 102 | + .take(limit) |
| 103 | + .mapNotNull { match -> |
| 104 | + val snippets = match.ranges.map { range -> |
| 105 | + val extended = extendRangeByLines(match.content, range, linesAroundSnippet, linesAroundSnippet) |
| 106 | + FileSystemEntry.File.Content.Excerpt.Snippet( |
| 107 | + text = extended.substring(match.content), |
| 108 | + range = extended |
| 109 | + ) |
| 110 | + } |
| 111 | + if (snippets.isEmpty()) return@mapNotNull null |
| 112 | + val metadata = fs.metadata(match.file) ?: return@mapNotNull null |
| 113 | + val contentType = fs.getFileContentType(match.file) |
| 114 | + FileSystemEntry.File( |
| 115 | + name = fs.name(match.file), |
| 116 | + extension = fs.extension(match.file), |
| 117 | + path = fs.toAbsolutePathString(match.file), |
| 118 | + hidden = metadata.hidden, |
| 119 | + size = buildFileSize(fs, match.file, contentType), |
| 120 | + contentType = contentType, |
| 121 | + content = FileSystemEntry.File.Content.Excerpt(snippets) |
| 122 | + ) |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + /** |
| 127 | + * A match of one file and the ranges within it that matched a regex. |
| 128 | + */ |
| 129 | + private data class ContentMatch<Path>( |
| 130 | + val file: Path, |
| 131 | + val content: String, |
| 132 | + val ranges: List<DocumentProvider.DocumentRange> |
| 133 | + ) |
| 134 | + |
| 135 | + /** |
| 136 | + * Recursively searches starting at [start] for text files whose contents match [regex]. |
| 137 | + * Returns a flow of [ContentMatch] where each item corresponds to a file and its matched ranges. |
| 138 | + */ |
| 139 | + private fun <Path> searchByRegex( |
| 140 | + fs: FileSystemProvider.ReadOnly<Path>, |
| 141 | + start: Path, |
| 142 | + regex: Regex |
| 143 | + ): Flow<ContentMatch<Path>> = flow { |
| 144 | + when (fs.metadata(start)?.type) { |
| 145 | + FileMetadata.FileType.File -> { |
| 146 | + try { |
| 147 | + if (fs.getFileContentType(start) != FileMetadata.FileContentType.Text) return@flow |
| 148 | + val content = fs.readText(start) |
| 149 | + val ranges = regex.findAll(content).map { mr -> |
| 150 | + val start = mr.range.first |
| 151 | + val end = mr.range.last + 1 // exclusive |
| 152 | + DocumentProvider.DocumentRange(start.toPosition(content), end.toPosition(content)) |
| 153 | + }.toList() |
| 154 | + if (ranges.isNotEmpty()) emit(ContentMatch(start, content, ranges)) |
| 155 | + } catch (e: CancellationException) { |
| 156 | + throw e |
| 157 | + } catch (_: IOException) { |
| 158 | + // ignore unreadable files |
| 159 | + } |
| 160 | + } |
| 161 | + FileMetadata.FileType.Directory -> { |
| 162 | + val children = try { |
| 163 | + fs.list(start) |
| 164 | + } catch (e: CancellationException) { |
| 165 | + throw e |
| 166 | + } catch (_: IOException) { |
| 167 | + emptyList() |
| 168 | + } |
| 169 | + for (child in children) emitAll(searchByRegex(fs, child, regex)) |
| 170 | + } |
| 171 | + else -> { /* ignore */ } |
| 172 | + } |
| 173 | + } |
| 174 | +} |
0 commit comments