diff --git a/CodeEdit.xcodeproj/project.pbxproj b/CodeEdit.xcodeproj/project.pbxproj index abd23b0a93..b983bf9214 100644 --- a/CodeEdit.xcodeproj/project.pbxproj +++ b/CodeEdit.xcodeproj/project.pbxproj @@ -259,6 +259,19 @@ 58FD7608291EA1CB0051D6E4 /* CommandPaletteViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 58FD7605291EA1CB0051D6E4 /* CommandPaletteViewModel.swift */; }; 58FD7609291EA1CB0051D6E4 /* CommandPaletteView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 58FD7607291EA1CB0051D6E4 /* CommandPaletteView.swift */; }; 5C4BB1E128212B1E00A92FB2 /* World.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5C4BB1E028212B1E00A92FB2 /* World.swift */; }; + 611191FA2B08CC9000D4459B /* SearchIndexer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611191F92B08CC9000D4459B /* SearchIndexer.swift */; }; + 611191FC2B08CCB800D4459B /* SearchIndexer+AsyncController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611191FB2B08CCB800D4459B /* SearchIndexer+AsyncController.swift */; }; + 611191FE2B08CCD200D4459B /* SearchIndexer+File.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611191FD2B08CCD200D4459B /* SearchIndexer+File.swift */; }; + 611192002B08CCD700D4459B /* SearchIndexer+Memory.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611191FF2B08CCD700D4459B /* SearchIndexer+Memory.swift */; }; + 611192022B08CCDC00D4459B /* SearchIndexer+Search.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611192012B08CCDC00D4459B /* SearchIndexer+Search.swift */; }; + 611192042B08CCED00D4459B /* SearchIndexer+ProgressivSearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611192032B08CCED00D4459B /* SearchIndexer+ProgressivSearch.swift */; }; + 611192062B08CCF600D4459B /* SearchIndexer+Add.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611192052B08CCF600D4459B /* SearchIndexer+Add.swift */; }; + 611192082B08CCFD00D4459B /* SearchIndexer+Terms.swift in Sources */ = {isa = PBXBuildFile; fileRef = 611192072B08CCFD00D4459B /* SearchIndexer+Terms.swift */; }; + 6111920C2B08CD0B00D4459B /* SearchIndexer+InternalMethods.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6111920B2B08CD0B00D4459B /* SearchIndexer+InternalMethods.swift */; }; + 613DF55E2B08DD5D00E9D902 /* FileHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 613DF55D2B08DD5D00E9D902 /* FileHelper.swift */; }; + 61538B902B111FE800A88846 /* String+AppearancesOfSubstring.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61538B8F2B111FE800A88846 /* String+AppearancesOfSubstring.swift */; }; + 61538B932B11201900A88846 /* String+Character.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61538B922B11201900A88846 /* String+Character.swift */; }; + 615AA21A2B0CFD480013FCCC /* LazyStringLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 615AA2192B0CFD480013FCCC /* LazyStringLoader.swift */; }; 6C049A372A49E2DB00D42923 /* DirectoryEventStream.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6C049A362A49E2DB00D42923 /* DirectoryEventStream.swift */; }; 6C05A8AF284D0CA3007F4EAA /* WorkspaceDocument+Listeners.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6C05A8AE284D0CA3007F4EAA /* WorkspaceDocument+Listeners.swift */; }; 6C092EC62A4E803300489202 /* CodeEditTextView in Frameworks */ = {isa = PBXBuildFile; productRef = 6C092EC52A4E803300489202 /* CodeEditTextView */; }; @@ -746,6 +759,19 @@ 58FD7605291EA1CB0051D6E4 /* CommandPaletteViewModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CommandPaletteViewModel.swift; sourceTree = ""; }; 58FD7607291EA1CB0051D6E4 /* CommandPaletteView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CommandPaletteView.swift; sourceTree = ""; }; 5C4BB1E028212B1E00A92FB2 /* World.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = World.swift; sourceTree = ""; }; + 611191F92B08CC9000D4459B /* SearchIndexer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SearchIndexer.swift; sourceTree = ""; }; + 611191FB2B08CCB800D4459B /* SearchIndexer+AsyncController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+AsyncController.swift"; sourceTree = ""; }; + 611191FD2B08CCD200D4459B /* SearchIndexer+File.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+File.swift"; sourceTree = ""; }; + 611191FF2B08CCD700D4459B /* SearchIndexer+Memory.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+Memory.swift"; sourceTree = ""; }; + 611192012B08CCDC00D4459B /* SearchIndexer+Search.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+Search.swift"; sourceTree = ""; }; + 611192032B08CCED00D4459B /* SearchIndexer+ProgressivSearch.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+ProgressivSearch.swift"; sourceTree = ""; }; + 611192052B08CCF600D4459B /* SearchIndexer+Add.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+Add.swift"; sourceTree = ""; }; + 611192072B08CCFD00D4459B /* SearchIndexer+Terms.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+Terms.swift"; sourceTree = ""; }; + 6111920B2B08CD0B00D4459B /* SearchIndexer+InternalMethods.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SearchIndexer+InternalMethods.swift"; sourceTree = ""; }; + 613DF55D2B08DD5D00E9D902 /* FileHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileHelper.swift; sourceTree = ""; }; + 61538B8F2B111FE800A88846 /* String+AppearancesOfSubstring.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "String+AppearancesOfSubstring.swift"; sourceTree = ""; }; + 61538B922B11201900A88846 /* String+Character.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "String+Character.swift"; sourceTree = ""; }; + 615AA2192B0CFD480013FCCC /* LazyStringLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LazyStringLoader.swift; sourceTree = ""; }; 6C049A362A49E2DB00D42923 /* DirectoryEventStream.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DirectoryEventStream.swift; sourceTree = ""; }; 6C05A8AE284D0CA3007F4EAA /* WorkspaceDocument+Listeners.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "WorkspaceDocument+Listeners.swift"; sourceTree = ""; }; 6C092ED92A53A58600489202 /* EditorLayout+StateRestoration.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "EditorLayout+StateRestoration.swift"; sourceTree = ""; }; @@ -959,8 +985,12 @@ 5831E3CF2933F4E000D5A6D2 /* Views */, 043C321527E3201F006AE443 /* WorkspaceDocument.swift */, 043BCF02281DA18A000AC47C /* WorkspaceDocument+Search.swift */, + 615AA2192B0CFD480013FCCC /* LazyStringLoader.swift */, 6C05A8AE284D0CA3007F4EAA /* WorkspaceDocument+Listeners.swift */, 6C092EDF2A53BFCF00489202 /* WorkspaceStateKey.swift */, + 61538B8F2B111FE800A88846 /* String+AppearancesOfSubstring.swift */, + 61538B922B11201900A88846 /* String+Character.swift */, + 611191F82B08CC8000D4459B /* Indexer */, ); path = Documents; sourceTree = ""; @@ -2102,6 +2132,23 @@ name = Frameworks; sourceTree = ""; }; + 611191F82B08CC8000D4459B /* Indexer */ = { + isa = PBXGroup; + children = ( + 611191F92B08CC9000D4459B /* SearchIndexer.swift */, + 611191FB2B08CCB800D4459B /* SearchIndexer+AsyncController.swift */, + 611191FF2B08CCD700D4459B /* SearchIndexer+Memory.swift */, + 611191FD2B08CCD200D4459B /* SearchIndexer+File.swift */, + 611192012B08CCDC00D4459B /* SearchIndexer+Search.swift */, + 611192032B08CCED00D4459B /* SearchIndexer+ProgressivSearch.swift */, + 611192052B08CCF600D4459B /* SearchIndexer+Add.swift */, + 611192072B08CCFD00D4459B /* SearchIndexer+Terms.swift */, + 6111920B2B08CD0B00D4459B /* SearchIndexer+InternalMethods.swift */, + 613DF55D2B08DD5D00E9D902 /* FileHelper.swift */, + ); + path = Indexer; + sourceTree = ""; + }; 6C092EDC2A53A63E00489202 /* Views */ = { isa = PBXGroup; children = ( @@ -2995,6 +3042,8 @@ 587B9E8F29301D8F00AC7927 /* BitBucketUserRouter.swift in Sources */, B66A4E5129C917D5004573B4 /* AboutWindow.swift in Sources */, 58F2EB03292FB2B0004A9BDE /* Documentation.docc in Sources */, + 611192042B08CCED00D4459B /* SearchIndexer+ProgressivSearch.swift in Sources */, + 611192022B08CCDC00D4459B /* SearchIndexer+Search.swift in Sources */, 04BA7C272AE2E9F100584E1C /* GitClient+Push.swift in Sources */, 2B7A583527E4BA0100D25D4E /* AppDelegate.swift in Sources */, D7012EE827E757850001E1EF /* FindNavigatorView.swift in Sources */, @@ -3046,8 +3095,10 @@ 587B9E8C29301D8F00AC7927 /* GitHubOpenness.swift in Sources */, 5894E59729FEF7740077E59C /* CEWorkspaceFile+Recursion.swift in Sources */, 587B9E8229301D8F00AC7927 /* GitHubPreviewHeader.swift in Sources */, + 611191FC2B08CCB800D4459B /* SearchIndexer+AsyncController.swift in Sources */, 58F2EB02292FB2B0004A9BDE /* Loopable.swift in Sources */, 6C578D8929CD36E400DC73B2 /* Commands+ForEach.swift in Sources */, + 611192082B08CCFD00D4459B /* SearchIndexer+Terms.swift in Sources */, 041FC6AA2AE42C9100C1F65A /* SourceControlNavigatorBranchGroupView.swift in Sources */, 28B8F884280FFE4600596236 /* NSTableView+Background.swift in Sources */, 6CBA0D512A1BF524002C6FAA /* SegmentedControlImproved.swift in Sources */, @@ -3070,6 +3121,7 @@ 587B9E7529301D8F00AC7927 /* String+QueryParameters.swift in Sources */, 58798219292D92370085B254 /* SearchModeModel.swift in Sources */, 6C5C891B2A3F736500A94FE1 /* FocusedValues.swift in Sources */, + 611192062B08CCF600D4459B /* SearchIndexer+Add.swift in Sources */, B62AEDD72A27B3D0009A9F52 /* UtilityAreaTabViewModel.swift in Sources */, 85773E1E2A3E0A1F00C5D926 /* SettingsSearchResult.swift in Sources */, B66A4E4F29C917B8004573B4 /* WelcomeWindow.swift in Sources */, @@ -3097,6 +3149,8 @@ 581550CF29FBD30400684881 /* StandardTableViewCell.swift in Sources */, B62AEDB82A1FE2DC009A9F52 /* UtilityAreaOutputView.swift in Sources */, 587B9E5C29301D8F00AC7927 /* Parameters.swift in Sources */, + 61538B932B11201900A88846 /* String+Character.swift in Sources */, + 613DF55E2B08DD5D00E9D902 /* FileHelper.swift in Sources */, 58798235292E30B90085B254 /* FeedbackModel.swift in Sources */, 04C3255C2801F86900C8DA2D /* ProjectNavigatorMenu.swift in Sources */, 587B9E6429301D8F00AC7927 /* GitLabCommit.swift in Sources */, @@ -3110,6 +3164,7 @@ 04BA7C132AE2AA7300584E1C /* GitCheckoutBranchViewModel.swift in Sources */, 04540D5E27DD08C300E91B77 /* WorkspaceView.swift in Sources */, DE6F77872813625500D00A76 /* EditorTabBarDivider.swift in Sources */, + 6111920C2B08CD0B00D4459B /* SearchIndexer+InternalMethods.swift in Sources */, 6CABB1A129C5593800340467 /* OverlayView.swift in Sources */, D7211D4327E066CE008F2ED7 /* Localized+Ex.swift in Sources */, 581BFB692926431000D251EC /* WelcomeActionView.swift in Sources */, @@ -3144,6 +3199,7 @@ 285FEC7027FE4B9800E57D53 /* ProjectNavigatorTableViewCell.swift in Sources */, 6CB9144B29BEC7F100BC47F2 /* (null) in Sources */, 587B9E7429301D8F00AC7927 /* URL+URLParameters.swift in Sources */, + 61538B902B111FE800A88846 /* String+AppearancesOfSubstring.swift in Sources */, 6C5BE51C2A3D542B002DA0FC /* FeatureFlagsSettingsView.swift in Sources */, 581BFB6B2926431000D251EC /* RecentProjectItem.swift in Sources */, 587FB99029C1246400B519DD /* EditorTabView.swift in Sources */, @@ -3188,6 +3244,7 @@ B62AEDB52A1FE295009A9F52 /* UtilityAreaDebugView.swift in Sources */, 6C049A372A49E2DB00D42923 /* DirectoryEventStream.swift in Sources */, 04BA7C0E2AE2A76E00584E1C /* SourceControlNavigatorChangesCommitView.swift in Sources */, + 615AA21A2B0CFD480013FCCC /* LazyStringLoader.swift in Sources */, 6CAAF68A29BC9C2300A1F48A /* (null) in Sources */, 6C6BD6EF29CD12E900235D17 /* ExtensionManagerWindow.swift in Sources */, 6CFF967629BEBCD900182D6F /* FileCommands.swift in Sources */, @@ -3271,6 +3328,7 @@ 587B9E6D29301D8F00AC7927 /* GitLabEventNote.swift in Sources */, 587B9E9129301D8F00AC7927 /* BitBucketOAuthRouter.swift in Sources */, B6E41C7429DD40010088F9F4 /* View+HideSidebarToggle.swift in Sources */, + 611191FA2B08CC9000D4459B /* SearchIndexer.swift in Sources */, 58822532292C280D00E83CDE /* UtilityAreaViewModel.swift in Sources */, 043BCF03281DA18A000AC47C /* WorkspaceDocument+Search.swift in Sources */, 58822527292C280D00E83CDE /* StatusBarIndentSelector.swift in Sources */, @@ -3290,7 +3348,9 @@ 6C2C155829B4F49100EA60A5 /* SplitViewItem.swift in Sources */, 6CDA84AD284C1BA000C1CC3A /* EditorTabBarContextMenu.swift in Sources */, 6C81916729B3E80700B75C92 /* ModifierKeysObserver.swift in Sources */, + 611192002B08CCD700D4459B /* SearchIndexer+Memory.swift in Sources */, 587B9E8129301D8F00AC7927 /* PublicKey.swift in Sources */, + 611191FE2B08CCD200D4459B /* SearchIndexer+File.swift in Sources */, 6CB52DC92AC8DC3E002E75B3 /* CEWorkspaceFileManager+FileManagement.swift in Sources */, 58F2EB0B292FB2B0004A9BDE /* AccountsSettings.swift in Sources */, 5882252A292C280D00E83CDE /* StatusBarToggleUtilityAreaButton.swift in Sources */, diff --git a/CodeEdit/Features/Documents/Indexer/FileHelper.swift b/CodeEdit/Features/Documents/Indexer/FileHelper.swift new file mode 100644 index 0000000000..299f8fdb54 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/FileHelper.swift @@ -0,0 +1,22 @@ +// +// FileHelper.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation + +enum FileHelper { + static func urlIsFolder(_ url: URL) -> Bool { + var isDirectory: ObjCBool = false + let exists = FileManager.default.fileExists(atPath: url.path, isDirectory: &isDirectory) + return exists && isDirectory.boolValue + } + + static func urlIsFile(_ url: URL) -> Bool { + var isDirectory: ObjCBool = false + let exists = FileManager.default.fileExists(atPath: url.path, isDirectory: &isDirectory) + return exists && !isDirectory.boolValue + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+Add.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Add.swift new file mode 100644 index 0000000000..8735c2552a --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Add.swift @@ -0,0 +1,145 @@ +// +// SearchIndexer+Add.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation + +extension SearchIndexer { + /// Add some text to the index for a given URL + /// + /// - Parameters: + /// - url: The identifying URL for the text + /// - text: The text to add + /// - canReplace: if true, can attempt to replace an existing document with the new one. + /// - Returns: true if the text was successfully added to the index, false otherwise + public func addFileWithText(_ url: URL, text: String, canReplace: Bool = true) -> Bool { + guard let index = self.index, + let document = SKDocumentCreateWithURL(url as CFURL) else { + return false + } + + return modifiyIndexQueue.sync { + SKIndexAddDocumentWithText(index, document.takeUnretainedValue(), text as CFString, canReplace) + } + } + + /// Adds text content to the indexer using a URL string. + /// + /// - Parameters: + /// - textURL: A string representing the URL of the text content. + /// - text: The text content to be added to the indexer. + /// - canReplace: If true, can attempt to replace an existing document with the new one. Defaults to `true`. + /// + /// - Returns: `true` if the text content is successfully added to the indexer; otherwise, returns `false`. + public func addFileWithText(textURL: String, text: String, canReplace: Bool = true) -> Bool { + guard let url = URL(string: textURL) else { + return false + } + return self.addFileWithText(url, text: text, canReplace: canReplace) + } + + /// Adds a file as a document to the index. + /// + /// - Parameters: + /// - fileURL: The file URL for the document, e.g., file:///User/Essay.txt. + /// - mimeType: + /// An optional MIME type. If nil, the function attempts to determine the file type from the extension. + /// - canReplace: + /// A flag indicating whether to attempt to replace an existing document with the new one. + /// Defaults to `true`. + /// + /// - Returns: `true` if the command was successful. Even if the document wasn't updated, it still returns `true`. + /// + /// - Important: + /// If the document wasn't updated, the function still returns `true`. + /// Be cautious when relying solely on the return value to determine if the document was replaced. + public func addFile(fileURL: URL, mimeType: String? = nil, canReplace: Bool = true) -> Bool { + guard self.dataExtractorLoaded, + let index = self.index, + let document = SKDocumentCreateWithURL(fileURL as CFURL) else { + return false + } + // Try to detect the mime type if it wasn't specified + let mime = mimeType ?? self.detectMimeType(fileURL) + + return modifiyIndexQueue.sync { + SKIndexAddDocument(index, document.takeUnretainedValue(), mime as CFString?, canReplace) + } + } + + /// Recursively adds the files contained within a folder to the search index. + /// + /// - Parameters: + /// - folderURL: The folder to be indexed. + /// - canReplace: + /// A flag indicating whether existing documents within the index can be replaced. Defaults to `true`. + /// + /// - Returns: The URLs of documents added to the index. If `folderURL` isn't a folder, returns an empty array. + public func addFolderContent(folderURL: URL, canReplace: Bool = true) -> [URL] { + let fileManger = FileManager.default + + var isDir: ObjCBool = false + guard fileManger.fileExists(atPath: folderURL.path, isDirectory: &isDir), + isDir.boolValue == true else { + return [] + } + + var addedUrls: [URL] = [] + let enumerator = fileManger.enumerator(at: folderURL, includingPropertiesForKeys: nil) + while let fileURL = enumerator?.nextObject() as? URL { + if fileManger.fileExists(atPath: fileURL.path, isDirectory: &isDir), + isDir.boolValue == false, + self.addFile(fileURL: fileURL, canReplace: canReplace) { + addedUrls.append(fileURL) + } + } + + return addedUrls + } + + /// Removes a document from the index. + /// + /// - Parameter url: The identifying URL for the document. + /// + /// - Returns: `true` if the document was successfully removed, `false` otherwise. + /// **Note:** If the document didn't exist, this also returns `true`. + public func removeDocument(url: URL) -> Bool { + let document = SKDocumentCreateWithURL(url as CFURL).takeUnretainedValue() + return self.remove(document: document) + } + + /// Remove an array of documents from the index + /// + /// - Parameter urls: An array of URLs identifying the documents to be removed. + public func removeDocuments(urls: [URL]) { + urls.forEach { url in + _ = self.removeDocument(url: url) + } + } + + /// Retrieves the indexing state of a document at the specified URL. + /// + /// - Parameter url: The URL of the document. + /// + /// - Returns: + /// The indexing state of the document. Returns `kSKDocumentStateNotIndexed` if the document is not indexed. + public func documentState(_ url: URL) -> SKDocumentIndexState { + if let index = self.index, + let document = SKDocumentCreateWithURL(url as CFURL) { + return SKIndexGetDocumentState(index, document.takeUnretainedValue()) + } + return kSKDocumentStateNotIndexed + } + + /// Checks if a document at the specified URL is indexed. + /// + /// - Parameter url: The URL of the document. + /// + /// - Returns: `true` if the document is indexed; otherwise, returns `false`. + public func documentIndexed(_ url: URL) -> Bool { + return self.documentState(url) == kSKDocumentStateIndexed + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+AsyncController.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+AsyncController.swift new file mode 100644 index 0000000000..248f6c3211 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+AsyncController.swift @@ -0,0 +1,142 @@ +// +// SearchIndexer+AsyncController.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation + +extension SearchIndexer { + /// Manager for SearchIndexer objct that supports async calls to the index + class AsyncManager { + /// An instance of the SearchIndexer + let index: SearchIndexer + private let addQueue = DispatchQueue(label: "app.codeedit.CodeEdit.AddFilesToIndex", attributes: .concurrent) + private let searchQueue = DispatchQueue(label: "app.codeedit.CodeEdit.SearchIndex", attributes: .concurrent) + + init(index: SearchIndexer) { + self.index = index + } + + class TextFile { + let url: URL + let text: String + + /// Create a text async task + /// + /// - Parameters: + /// - url: the identifying document URL + /// - text: The text to add to the index + init(url: URL, text: String) { + self.url = url + self.text = text + } + } + + // MARK: - Search + func search( + query: String, + _ maxResults: Int, + timeout: TimeInterval = 1.0 + ) async -> AsyncStream { + let search = index.progressiveSearch(query: query) + + return AsyncStream { configuration in + var moreResultsAvailable = true + while moreResultsAvailable { + let results = search.getNextSearchResultsChunk(limit: maxResults, timeout: timeout) + moreResultsAvailable = results.moreResultsAvailable + configuration.yield(results) + } + configuration.finish() + } + } + + // MARK: - Add + + func addText( + files: [TextFile], + flushWhenComplete: Bool = false + ) async -> [Bool] { + + var addedFiles = [Bool]() + + await withTaskGroup(of: Bool.self) { taskGroup in + for file in files { + taskGroup.addTask { + return self.index.addFileWithText(file.url, text: file.text, canReplace: true) + } + } + + for await result in taskGroup { + addedFiles.append(result) + } + } + if flushWhenComplete { + index.flush() + } + return addedFiles + } + + func addFiles( + urls: [URL], + flushWhenComplete: Bool = false + ) async -> [Bool] { + var addedURLs = [Bool]() + + await withTaskGroup(of: Bool.self) { taskGroup in + for url in urls { + taskGroup.addTask { + return self.index.addFile(fileURL: url, canReplace: true) + } + } + + for await results in taskGroup { + addedURLs.append(results) + } + } + + return addedURLs + } + + func addFolder( + url: URL, + flushWhenComplete: Bool = false + ) { + let dispatchGroup = DispatchGroup() + + let fileManager = FileManager.default + let enumerator = fileManager.enumerator( + at: url, + includingPropertiesForKeys: [.isRegularFileKey], + options: [.skipsHiddenFiles], + errorHandler: nil + )! + + for case let fileURL as URL in enumerator { + dispatchGroup.enter() + + if FileHelper.urlIsFolder(url) { + addQueue.async { [weak self] in + guard let self = self else { return } + self.addFolder(url: url) + dispatchGroup.leave() + } + } else { + addQueue.async { [weak self] in + guard let self = self else { return } + _ = self.index.addFile(fileURL: fileURL, canReplace: true) + dispatchGroup.leave() + } + } + } + + dispatchGroup.notify(queue: .main) { + if flushWhenComplete { + self.index.flush() + } + } + } + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+File.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+File.swift new file mode 100644 index 0000000000..deca8babe7 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+File.swift @@ -0,0 +1,92 @@ +// +// SearchIndexer+File.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation + +extension SearchIndexer { + /// A file based index + public class File: SearchIndexer { + /// The file url where the index is located + public let fileURL: URL + + private init(url: URL, index: SKIndex) { + self.fileURL = url + super.init(index: index) + } + + /// Create a new file based index + /// - Parameters: + /// - fileURL:The file URL to create the index at + /// - properties: The properties defining the capabilities of the index + public convenience init?(fileURL: URL, properties: CreateProperties) { + if !FileManager.default.fileExists(atPath: fileURL.absoluteString), + let skIndex = SKIndexCreateWithURL( + fileURL as CFURL, + nil, + properties.indexType, + properties.properties() + ) { + self.init(url: fileURL, index: skIndex.takeUnretainedValue()) + } else { + return nil + } + } + + /// Load an index from a file url + /// - Parameter fileURL: The file URL where the index is located at + /// - Parameter writable: Can the index be modified + public convenience init?(fileURL: URL, writeable: Bool) { + if let skIndex = SKIndexOpenWithURL(fileURL as CFURL, nil, writeable) { + self.init(url: fileURL, index: skIndex.takeUnretainedValue()) + } else { + return nil + } + } + + /// Open an index from a file url. + /// + /// - Parameters: + /// - fileURL: The file url to open + /// - writable: should the index be modifiable? + /// - Returns: A new index object if successful, nil otherwise + public static func openIndex(fileURL: URL, writeable: Bool) -> SearchIndexer.File? { + if let temp = SKIndexOpenWithURL(fileURL as CFURL, nil, writeable) { + return SearchIndexer.File(url: fileURL, index: temp.takeUnretainedValue()) + } + return nil + } + + /// Create an indexer using a new data container for the store + //// + /// - Parameters: + /// - fileURL: the file URL to store the index at. url must be a non-existent file + /// - properties: the properties for index creation + /// - Returns: A new index object if successful, nil otherwise. Returns nil if the file already exists at url. + public static func create( + fileURL: URL, + properties: CreateProperties = CreateProperties() + ) -> SearchIndexer.File? { + if !FileManager.default.fileExists(atPath: fileURL.absoluteString), + let skIndex = SKIndexCreateWithURL( + fileURL as CFURL, + nil, + properties.indexType, + properties.properties() + ) { + return SearchIndexer.File(url: fileURL, index: skIndex.takeUnretainedValue()) + } else { + return nil + } + } + + /// Flush, compact, i.e. apply all changes and write the content of the index to the file + public func save() { + flush() + compact() + } + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+InternalMethods.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+InternalMethods.swift new file mode 100644 index 0000000000..d3697d0c52 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+InternalMethods.swift @@ -0,0 +1,117 @@ +// +// SearchIndexer+InternalMethods.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation +import UniformTypeIdentifiers + +extension SearchIndexer { + /// A "typealias" for a document ID, using a struct becasue swift lint doesn't allow typealiases for 3 types + public struct DocumentID { + let url: URL + let docuemnt: SKDocument + let documentID: SKDocumentID + } + /// Returns the mime type for the url, or nil if the mime type couldn't be ascertained from the extension + /// + /// - Parameter url: the url to detect the mime type for + /// - Returns: the mime type of the url if able to detect, nil otherwise + func detectMimeType(_ url: URL) -> String? { + if let type = UTType(filenameExtension: url.pathExtension) { + if let mimetype = type.preferredMIMEType { + return mimetype + } + } + return nil + } + + /// Remove the given document from the index + /// When the app deletes a document, use this function to update the index to reflect the change, + /// i. e. the index does not need to get flushed. + func remove(document: SKDocument) -> Bool { + if let index = self.index { + return modifiyIndexQueue.sync { + SKIndexRemoveDocument(index, document) + } + } + return false + } + + /// Returns the number of terms of the specified document + private func termCount(for document: SKDocumentID) -> Int { + guard self.index != nil else { + return 0 + } + return SKIndexGetDocumentTermCount(self.index!, document) + } + + /// Is the specified document empty (ie. it has no terms) + private func isEmpty(for document: SKDocumentID) -> Bool { + guard self.index != nil else { + return true // true would be the default value, i.e. document is Empty + } + return self.termCount(for: document) == 0 + } + + /// Recurse through the children of a document and return an array containing all the document-ids + private func addLeafURLs(index: SKIndex, inParentDocument: SKDocument?, docs: inout [DocumentID]) { + guard let index = self.index else { + return + } + + var isLeaf = true + + let iterator = SKIndexDocumentIteratorCreate(index, inParentDocument).takeUnretainedValue() + while let skDocument = SKIndexDocumentIteratorCopyNext(iterator) { + isLeaf = false + self.addLeafURLs(index: index, inParentDocument: skDocument.takeUnretainedValue(), docs: &docs) + } + + if isLeaf, inParentDocument != nil, + kSKDocumentStateNotIndexed != SKIndexGetDocumentState(index, inParentDocument) { + if let temp = SKDocumentCopyURL(inParentDocument) { + let baseURL = temp.takeUnretainedValue() + let documentID = SKIndexGetDocumentID(index, inParentDocument) + docs.append( + DocumentID( + url: temp.takeRetainedValue() as URL, + docuemnt: inParentDocument!, + documentID: SKIndexGetDocumentID(index, inParentDocument) + ) + ) + } + } + } + + /// Return an array of all the documents contained within the index + /// + /// - Parameter termState: the termstate of documents to be returned (eg. all, empty only, non-empty only) + /// - Returns: An array containing all the documents matching the termstate + func fullDocuments(termState: TermState = .all) -> [DocumentID] { + guard let index = self.index else { + return [] + } + + var allDocs = [DocumentID]() + + self.addLeafURLs(index: index, inParentDocument: nil, docs: &allDocs) + + switch termState { + case .empty: + allDocs = allDocs.filter { + self.isEmpty(for: $0.documentID) + } + case .notEmpty: + allDocs = allDocs.filter { + !self.isEmpty(for: $0.documentID) + } + default: + break + } + + return allDocs + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+Memory.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Memory.swift new file mode 100644 index 0000000000..2d1a309ba4 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Memory.swift @@ -0,0 +1,85 @@ +// +// SearchIndexer+Memory.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation +extension SearchIndexer { + /// Memory based indxing using NSMutable + public class Memory: SearchIndexer { + // The data index store + private var store = NSMutableData() + + /// Creat a new in-memory index + /// - Parameter properties: the properties to use in the index + public init?(properties: CreateProperties = CreateProperties()) { + let data = NSMutableData() + if let skIndex = SKIndexCreateWithMutableData( + data, + nil, + properties.indexType, + properties.properties() + ) { + super.init(index: skIndex.takeUnretainedValue()) + self.store = data + } else { + return nil + } + } + + /// Create an in-memory index from the data provided + /// - Parameter data: The data to load the index data from + public convenience init?(data: Data) { + if let rawData = (data as NSData).mutableCopy() as? NSMutableData, + let skIndex = SKIndexOpenWithMutableData(rawData, nil) { + self.init(data: rawData, index: skIndex.takeUnretainedValue()) + } else { + return nil + } + } + + /// Create an indexer using a new data container for the store + /// + /// - Parameter properties: the properties for index creation + /// - Returns: A new index object if successful, nil otherwise + public static func create(properties: CreateProperties = CreateProperties()) -> SearchIndexer.Memory? { + let data = NSMutableData() + if let skIndex = SKIndexCreateWithMutableData( + data, + nil, + properties.indexType, + properties.properties() + ) { + return SearchIndexer.Memory(data: data, index: skIndex.takeUnretainedValue()) + } + return nil + } + + /// Create an indexer using the data stored in 'data'. + /// + /// **NOTE** Makes a copy of the data first - does not work on a live Data object + /// + /// - Parameter data: The data to load as an index + /// - Returns: A new index object if successful, nil otherwise + public static func loadFromData(data: Data) -> SearchIndexer.Memory? { + if let rawData = (data as NSData).mutableCopy() as? NSMutableData, + let skIndex = SKIndexOpenWithMutableData(rawData, nil) { + return SearchIndexer.Memory(data: rawData, index: skIndex.takeUnretainedValue()) + } + return nil + } + + /// Returns a copy of the index as data + public func getAsData() -> Data? { + flush() + return self.store.copy() as? Data + } + + private init(data: NSMutableData, index: SKIndex) { + super.init(index: index) + self.store = data + } + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+ProgressivSearch.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+ProgressivSearch.swift new file mode 100644 index 0000000000..ff2fb72613 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+ProgressivSearch.swift @@ -0,0 +1,111 @@ +// +// SearchIndexer+ProgressivSearch.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation + +extension SearchIndexer { + /// Object representaitng the search results + public class SearchResult { + /// The identifying url for the document + let url: URL + + /// The search score for the codument result, Heigher means more relevant + let score: Float + + init(url: URL, score: Float) { + self.url = url + self.score = score + } + } + + /// Start a progressive search + public func progressiveSearch( + query: String, + options: SKSearchOptions = SKSearchOptions(kSKSearchOptionDefault) + ) -> ProgressivSearch { + return ProgressivSearch(options: options, index: self, query: query) + } + + /// A class for creating and managing a progressive search. + /// A search starts on creation and can be cancelled at any time. + public class ProgressivSearch { + /// A class representing the results of a search request. + public class Results { + /// Create a search result + /// + /// - Parameters: + /// - moreResultsAvailable: A boolean indicating whether more search results are available + /// - results: The partial results for the search request + public init(moreResultsAvailable: Bool, results: [SearchResult]) { + self.moreResultsAvailable = moreResultsAvailable + self.results = results + } + + /// A boolean indicating whether more search results are available + public let moreResultsAvailable: Bool + + /// The partial results for the search request + public let results: [SearchResult] + } + + private let options: SKSearchOptions + private let search: SKSearch + private let index: SearchIndexer + private let query: String + + init(options: SKSearchOptions, index: SearchIndexer, query: String) { + self.options = options + self.search = SKSearchCreate(index.index, query as CFString, options).takeRetainedValue() + self.index = index + self.query = query + } + + /// Retrieves the next chunk of search results in a progressive search. + /// + /// - Parameters: + /// - limit: The maximum number of results to retrieve in each call. Defaults to 10. + /// - timeout: The duration to wait for the search to complete before stopping. Defaults to 1.0 seconds. + /// + /// - Returns: A tuple containing search results and information about the progress of the search. + /// + /// The function performs a progressive search, + /// fetching the next set of results based on the specified limit and timeout. + /// It uses the Search Kit framework to find matches, retrieve document URLs, and their corresponding scores. + public func getNextSearchResultsChunk( + limit: Int = 10, + timeout: TimeInterval = 1.0 + ) -> (ProgressivSearch.Results) { + guard self.index.index != nil else { + return Results(moreResultsAvailable: false, results: []) + } + + var scores: [Float] = Array(repeating: 0.0, count: limit) + var urls: [Unmanaged?] = Array(repeating: nil, count: limit) + var documentIDs: [SKDocumentID] = Array(repeating: 0, count: limit) + var foundCount = 0 + + let hasMore = SKSearchFindMatches(self.search, limit, &documentIDs, &scores, timeout, &foundCount) + SKIndexCopyDocumentURLsForDocumentIDs(self.index.index, foundCount, &documentIDs, &urls) + + let partialResult: [SearchResult] = zip(urls[0.. SearchResult? in + guard let url = cfurl?.takeUnretainedValue() as URL? else { + return nil + } + + return SearchResult(url: url, score: score) + } + + return Results(moreResultsAvailable: hasMore, results: partialResult) + } + + /// Cancel an active search + public func cancel() { + SKSearchCancel(self.search) + } + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+Search.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Search.swift new file mode 100644 index 0000000000..6248ab0104 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Search.swift @@ -0,0 +1,43 @@ +// +// SearchIndexer+Search.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation + +extension SearchIndexer { + /// Initiates a search operation based on the provided query. + /// + /// - Parameters: + /// - query: A string representing the term to be searched for. + /// - limit: The maximum number of search results to be returned. + /// - timeout: The duration to wait for the search to complete before stopping. + /// + /// - Returns: + /// An array of search results, each containing a match URL and its corresponding score, + /// indicating the relevance of the match to the query. + /// + /// The function performs a search using the specified query, + /// limiting the number of results based on the provided `limit`. + /// The `timeout` parameter determines how long the search operation will wait before stopping. + public func search( + _ query: String, + limit: Int = 10, + timeout: TimeInterval = 1.0, + options: SKSearchOptions = SKSearchOptions(kSKSearchOptionDefault) + ) -> [SearchResult] { + let search = self.progressiveSearch(query: query, options: options) + + var results: [SearchResult] = [] + var moreResultsAvailable = true + repeat { + let result = search.getNextSearchResultsChunk(limit: limit, timeout: timeout) + results.append(contentsOf: result.results) + moreResultsAvailable = result.moreResultsAvailable + } while moreResultsAvailable + + return results + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer+Terms.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Terms.swift new file mode 100644 index 0000000000..dadd5ccb71 --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer+Terms.swift @@ -0,0 +1,90 @@ +// +// SearchIndexer+Terms.swift +// CodeEdit +// +// Created by Tommy Ludwig on 18.11.23. +// + +import Foundation + +extension SearchIndexer { + /// A class to contain a term and the count of times it appears + public class TermCount { + /// A term within the document + public let term: String + + /// The number of occurrences of `term` + public let count: Int + + init(term: String, count: Int) { + self.term = term + self.count = count + } + } + + /// A enum to specify the state of the document + public enum TermState: Int { + /// All document states + case all = 0 + /// Only documents that have no terms + case empty = 1 + /// Only documents that have terms + case notEmpty = 2 + } + + /// Returns all the document URLs loaded into the index matching the specified term state + /// + /// - Parameter termState: Only return documents matching the specified document state + /// - Returns: An array containing all the document URLs + public func documents(termState: TermState = .all) -> [URL] { + return self.fullDocuments(termState: termState).map { $0.url } + } + + /// Returns the number of terms for the specified document url + public func termCount(for url: URL) -> Int { + if let index = self.index, + let document = SKDocumentCreateWithURL(url as CFURL) { + let documentID = SKIndexGetDocumentID(index, document.takeUnretainedValue()) + return SKIndexGetDocumentTermCount(index, documentID) + } + return 0 + } + + /// Is the specified document empty (ie. it has no terms) + public func isEmpty(for url: URL) -> Bool { + return self.termCount(for: url) > 0 + } + + /// Returns an array containing the terms and counts for a specified URL + /// + /// - Parameter url: The document URL in the index to locate + /// - Returns: An array of the terms and corresponding counts located in the document. + /// Returns an empty array if the document cannot be located. + public func terms(for url: URL) -> [TermCount] { + guard let index = self.index else { + return [] + } + + var result = [TermCount]() + + let document = SKDocumentCreateWithURL(url as CFURL).takeUnretainedValue() + let documentID = SKIndexGetDocumentID(index, document) + + guard let termVals = SKIndexCopyTermIDArrayForDocumentID(index, documentID), + let terms = termVals.takeUnretainedValue() as? [CFIndex] else { + return [] + } + + for term in terms { + if let termVal = SKIndexCopyTermStringForTermID(index, term) { + let termString = termVal.takeUnretainedValue() as String + if !self.stopWords.contains(termString) { + let count = SKIndexGetDocumentTermFrequency(index, documentID, term) as Int + result.append(TermCount(term: termString, count: count)) + } + } + } + + return result + } +} diff --git a/CodeEdit/Features/Documents/Indexer/SearchIndexer.swift b/CodeEdit/Features/Documents/Indexer/SearchIndexer.swift new file mode 100644 index 0000000000..99b1b7224b --- /dev/null +++ b/CodeEdit/Features/Documents/Indexer/SearchIndexer.swift @@ -0,0 +1,135 @@ +// +// SearchIndexer.swift +// CodeEdit +// +// Created by Tom Ludwig on 18.11.23. +// + +import Foundation + +/// Indexer using SKIndex +public class SearchIndexer { + let modifiyIndexQueue = DispatchQueue(label: "app.codeedit.CodeEdit.ModifySearchIndex") + + var index: SKIndex? + + init(index: SKIndex) { + self.index = index + } + + deinit { + self.close() + } + + /// Flush any pending commands to the search index. Flush should always be called before performing a search + public func flush() { + if let index = self.index { + SKIndexFlush(index) + } + } + + /// Reduce the size of index where possible + /// + /// - Warning: Do NOT call on the main thread + public func compact() { + if let index = self.index { + SKIndexCompact(index) + } + } + + /// Remove any documents that have no search terms + public func cleanUp() -> Int { + let allDocs = self.fullDocuments(termState: .empty) + var removedCount = 0 + for docID in allDocs { + _ = self.remove(document: docID.docuemnt) + removedCount += 1 + } + return removedCount + } + + /// Close the index + public func close() { + if let index = self.index { + SKIndexClose(index) + self.index = nil + } + } + + /// Call once at application launch to tell Search Kit to use the Spotlight metadata importers. + lazy var dataExtractorLoaded: Bool = { + SKLoadDefaultExtractorPlugIns() + return true + }() + + /// Stop words for the index, + /// these are common words which should be ignored because they are not useful for searching + private(set) lazy var stopWords: Set = { + var stopWords: Set = [] + if let index = self.index, + let properties = SKIndexGetAnalysisProperties(self.index).takeUnretainedValue() as? [String: Any], + let newStopWords = properties[kSKStopWords as String] as? Set { + stopWords = newStopWords + } + return stopWords + }() + + public enum IndexType: UInt32 { + /// Unknown index type (kSKIndexUnknown) + case unknown = 0 + /// Inverted index, mapping terms to documents (kSKIndexInverted) + case inverted = 1 + /// Vector index, mapping documents to terms (kSKIndexVector) + case vector = 2 + /// Index type with all the capabilities of an inverted and a vector index (kSKIndexInvertedVector) + case invertedVector = 3 + } + + /// A class for creating properties used in the creation of a Search Kit index. + /// **Available Options:** + /// - `indexType`: The type of the index to be created. + /// Options include `.unknown`, `.inverted`, `.vector` or `.invertedVector` + /// - `proximityIndexing`: A Boolean flag indicating whether or not Search Kit should use proximity indexing. + /// - `stopWords`: A set of stopwords — words not to index. + /// - `minTermLength`: The minimum term length to index (defaults to 1). + public class CreateProperties { + /// The type of the index to be created + private(set) var indexType: SKIndexType = kSKIndexInverted + /// Whether the index should use proximity indexing + private(set) var proximityIndexing: Bool = false + /// The stop words for the index + private(set) var stopWords: Set = Set() + /// The minimum size of word to add to the index + private(set) var minTermLength: UInt = 1 + + /// Create a properties object with the specified creation parameters + /// + /// - Parameters: + /// - indexType: The type of index + /// - proximityIndexing: A Boolean flag indicating whether or not Search Kit should use proximity indexing + /// - stopWords: A set of stopwords — words not to index + /// - minTermLength: The minimum term length to index (defaults to 1) + public init( + indexType: SearchIndexer.IndexType = .inverted, + proximityIndexing: Bool = false, + stopWords: Set = [], + minTermLengh: UInt = 1 + ) { + self.indexType = SKIndexType(indexType.rawValue) + self.proximityIndexing = proximityIndexing + self.stopWords = stopWords + self.minTermLength = minTermLengh + } + + /// Returns a CFDictionary object to use for the call to SKIndexCreate + func properties() -> CFDictionary { + let properties: [CFString: Any] = [ + kSKProximityIndexing: self.proximityIndexing, + kSKStopWords: self.stopWords, + kSKMinTermLength: self.minTermLength, + ] + return properties as CFDictionary + } + } + +} diff --git a/CodeEdit/Features/Documents/LazyStringLoader.swift b/CodeEdit/Features/Documents/LazyStringLoader.swift new file mode 100644 index 0000000000..580bfc1c49 --- /dev/null +++ b/CodeEdit/Features/Documents/LazyStringLoader.swift @@ -0,0 +1,42 @@ +// +// LazyStringLoader.swift +// CodeEdit +// +// Created by Tommy Ludwig on 21.11.23. +// + +import Foundation + +class LazyStringLoader { + let fileURL: URL + var fileHandle: FileHandle? + let chunkSize: Int + let queue = DispatchQueue(label: "com.CodeEdit.LayzLoader") + + init(fileURL: URL, chunkSize: Int = 1024) { + self.fileURL = fileURL + self.chunkSize = chunkSize + } + + func getNextChunk() -> String? { + if fileHandle == nil { + do { + fileHandle = try FileHandle(forReadingFrom: fileURL) + } catch { + + } + + var data = Data() + let semaphore = DispatchSemaphore(value: 0) + + do { + data = try fileHandle?.read(upToCount: chunkSize) ?? Data() + } catch { + + } + + return String(data: data, encoding: .utf8) + } + return nil + } +} diff --git a/CodeEdit/Features/Documents/String+AppearancesOfSubstring.swift b/CodeEdit/Features/Documents/String+AppearancesOfSubstring.swift new file mode 100644 index 0000000000..c557a9df13 --- /dev/null +++ b/CodeEdit/Features/Documents/String+AppearancesOfSubstring.swift @@ -0,0 +1,39 @@ +// +// String+AppearancesOfSubstring.swift +// CodeEdit +// +// Created by Tommy Ludwig on 24.11.23. +// + +import Foundation + +extension String { + /// Finds the appearances of a substring within the string. + /// - Parameters: + /// - substring: The substring to search for within the string. + /// - toLeft: The optional number of characters to include to the left of each found substring appearance. + /// - toRight: The optional number of characters to include to the right of each found substring appearance. + /// + /// - Returns: An array of ranges representing the appearances of the substring within the string. + func appearancesOfSubstring(substring: String, toLeft: Int=0, toRight: Int=0) -> [Range] { + guard !substring.isEmpty && self.contains(substring) else { return [] } + var appearances: [Range] = [] + for (index, character) in self.enumerated() where character == substring.first { + let startOfFoundCharacter = self.index(self.startIndex, offsetBy: index) + guard index + substring.count < self.count else { continue } + let lengthOfFoundCharacter = self.index(self.startIndex, offsetBy: (substring.count + index)) + if self[startOfFoundCharacter.. Character? { + guard index < self.count else { + return nil + } + + return self[self.index(self.startIndex, offsetBy: index)] + } +} diff --git a/CodeEdit/Features/Documents/WorkspaceDocument+Search.swift b/CodeEdit/Features/Documents/WorkspaceDocument+Search.swift index c8a3effa61..8d7fa7929c 100644 --- a/CodeEdit/Features/Documents/WorkspaceDocument+Search.swift +++ b/CodeEdit/Features/Documents/WorkspaceDocument+Search.swift @@ -9,106 +9,228 @@ import Foundation extension WorkspaceDocument { final class SearchState: ObservableObject { + @Published var searchResult: [SearchResultModel] = [] + @Published var searchResultsFileCount: Int = 0 + @Published var searchResultsCount: Int = 0 + unowned var workspace: WorkspaceDocument + var tempSearchResults = [SearchResultModel]() + var ignoreCase: Bool = true + var indexer: SearchIndexer? var selectedMode: [SearchModeModel] = [ .Find, .Text, .Containing ] - @Published var searchResult: [SearchResultModel] = [] - @Published var searchResultCount: Int = 0 - - var ignoreCase: Bool = true init(_ workspace: WorkspaceDocument) { self.workspace = workspace + self.indexer = SearchIndexer.Memory.create() + addProjectToIndex() } - /// Searches the entire workspace for the given string, using the ``selectedMode`` modifiers - /// to modify the search if needed. - /// - /// This method will update ``searchResult`` and ``searchResultCount`` with any matched - /// search results. See `Search.SearchResultModel` and `Search.SearchResultMatchModel` - /// for more information on search results and matches. - /// - /// - Parameter text: The search text to search for. Pass `nil` to this parameter to clear - /// the search results. - func search(_ text: String?) { // swiftlint:disable:this function_body_length - guard let text else { - searchResult = [] - searchResultCount = 0 + /// Adds the contents of the current worksapce URL to the search index. + /// That means that the contents of the workspace will be indexed and searchable. + func addProjectToIndex() { + guard let indexer = indexer else { return } - let textToCompare = ignoreCase ? text.lowercased() : text - self.searchResult = [] - self.searchResultCount = 0 - guard let url = self.workspace.fileURL else { return } + guard let url = workspace.fileURL else { + return + } + + let filePaths = getFileURLs(at: url) + Task { + let textFiles = await getFileContents(from: filePaths) + let asyncController = SearchIndexer.AsyncManager(index: indexer) + _ = await asyncController.addText(files: textFiles, flushWhenComplete: true) + } + } + + /// Retrieves an array of file URLs within the specified directory URL. + /// + /// - Parameter url: The URL of the directory to search for files. + /// + /// - Returns: An array of file URLs found within the specified directory. + func getFileURLs(at url: URL) -> [URL] { let enumerator = FileManager.default.enumerator( at: url, - includingPropertiesForKeys: [ - .isRegularFileKey - ], - options: [ - .skipsHiddenFiles, - .skipsPackageDescendants - ] + includingPropertiesForKeys: [.isRegularFileKey], + options: [.skipsHiddenFiles, .skipsPackageDescendants] ) - guard let filePaths = enumerator?.allObjects as? [URL] else { return } - - // This could be optimized further by doing a couple things: - // - Making sure strings and indexes are using UTF8 everywhere possible - // (this will increase matching speed and time taken to calculate byte offsets for string indexes) - // - Lazily fetching file paths. Right now we do `enumerator.allObjects`, but using an actual - // enumerator object to lazily enumerate through files would drop time. - // - Loop through each character instead of each line to find matches, then return the line if needed. - // This could help in cases when the file is one *massive* line (eg: a minified JS document). - // - Lazily load strings using `FileHandle.AsyncBytes` - // https://developer.apple.com/documentation/foundation/filehandle/3766681-bytes - filePaths.forEach { url in - guard let data = try? Data(contentsOf: url), - let string = String(data: data, encoding: .utf8) else { return } - var fileSearchResult: SearchResultModel? - - // Loop through each line and look for any matches - // If one is found we create a `SearchResultModel` and add any lines - // with matches, and any information we may need to display or navigate - // to them. - for (lineNumber, line) in string.split(separator: "\n").lazy.enumerated() { - let rawNoSpaceLine = line.trimmingCharacters(in: .whitespacesAndNewlines) - let noSpaceLine = ignoreCase ? rawNoSpaceLine.lowercased() : rawNoSpaceLine - - if lineContainsSearchTerm(line: noSpaceLine, term: textToCompare) { - // We've got a match - let matches = noSpaceLine.ranges(of: textToCompare).map { range in - return SearchResultMatchModel( - lineNumber: lineNumber, - file: CEWorkspaceFile(url: url), - lineContent: String(noSpaceLine), - keywordRange: range - ) - } - if fileSearchResult != nil { - // We've already found something in this file, add the rest - // of the matches - fileSearchResult?.lineMatches.append(contentsOf: matches) - } else { - // We haven't found anything in this file yet, record a new one - fileSearchResult = SearchResultModel( - file: CEWorkspaceFile(url: url), - lineMatches: matches - ) + return enumerator?.allObjects as? [URL] ?? [] + } + + /// Retrieves the contents of a files from the specified file paths. + /// + /// - Parameter filePaths: An array of file URLs representing the paths of the files. + /// + /// - Returns: An array of `TextFile` objects containing the standardized file URLs and text content. + func getFileContents(from filePaths: [URL]) async -> [SearchIndexer.AsyncManager.TextFile] { + var textFiles = [SearchIndexer.AsyncManager.TextFile]() + for file in filePaths { + if let content = try? String(contentsOf: file) { + textFiles.append( + SearchIndexer.AsyncManager.TextFile(url: file.standardizedFileURL, text: content) + ) + } + } + return textFiles + } + + /// Creates a search term based on the given query and search mode. + /// + /// - Parameter query: The original user query string. + /// + /// - Returns: A modified search term according to the specified search mode. + func getSearchTerm(_ query: String) -> String { + let newQuery = ignoreCase ? query.lowercased() : query + guard let mode = selectedMode.third else { + return newQuery + } + switch mode { + case .Containing: + return "*\(newQuery)*" + case .StartingWith: + return "\(newQuery)*" + case .EndingWith: + return "*\(newQuery)" + default: + return newQuery + } + } + + /// Searches the entire workspace for the given string, using the + /// ``WorkspaceDocument/SearchState-swift.class/selectedMode`` modifiers + /// to modify the search if needed. This is done by filtering out files with SearchKit and then searching + /// within each file for the given string. + /// + /// This method will update + /// ``WorkspaceDocument/SearchState-swift.class/searchResult``, + /// ``WorkspaceDocument/SearchState-swift.class/searchResultsFileCount`` + /// and ``WorkspaceDocument/SearchState-swift.class/searchResultCount`` with any matched + /// search results. See ``SearchResultModel`` and ``SearchResultMatchModel`` + /// for more information on search results and matches. + /// + /// - Parameter query: The search query to search for. + func search(_ query: String) async { + let searchQuery = getSearchTerm(query) + guard let indexer = indexer else { + return + } + + let asyncController = SearchIndexer.AsyncManager(index: indexer) + + let evaluateResultGroup = DispatchGroup() + let evaluateSearchQueue = DispatchQueue(label: "app.codeedit.CodeEdit.EvaluateSearch") + + let searchStream = await asyncController.search(query: searchQuery, 20) + for try await result in searchStream { + let urls2: [(URL, Float)] = result.results.map { + ($0.url, $0.score) + } + + for (url, score) in urls2 { + evaluateSearchQueue.async(group: evaluateResultGroup) { + evaluateResultGroup.enter() + Task { + var newResult = SearchResultModel(file: CEWorkspaceFile(url: url), score: score) + await self.evaluateResult(query: query.lowercased(), searchResult: &newResult) + + // Check if the new result has any line matches. + if !newResult.lineMatches.isEmpty { + // The function needs to be called because, + // we are trying to modify the array from within a concurrent context. + self.appendNewResultsToTempResults(newResult: newResult) + } + evaluateResultGroup.leave() } - searchResultCount += matches.count } } + } - // If `fileSearchResult` isn't nil it means we've found matches in the file - // so we add it to the search results. - if let fileSearchResult { - searchResult.append(fileSearchResult) + evaluateResultGroup.notify(queue: evaluateSearchQueue) { + self.setSearchResults() + } + } + + /// Appends a new search result to the temporary search results array on the main thread. + /// + /// - Parameters: + /// - newResult: The `SearchResultModel` to be appended to the temporary search results. + func appendNewResultsToTempResults(newResult: SearchResultModel) { + DispatchQueue.main.async { + self.tempSearchResults.append(newResult) + } + } + + /// Sets the search results by updating various properties on the main thread. + /// This function updates `searchResult`, `searchResultCount`, and `searchResultsFileCount` + /// and sets the `tempSearchResults` to an empty array. + /// - Important: Call this function when you are ready to + /// display or use the final search results. + func setSearchResults() { + DispatchQueue.main.async { + self.searchResult = self.tempSearchResults.sorted { $0.score > $1.score } + self.searchResultsCount = self.tempSearchResults.map { $0.lineMatches.count }.reduce(0, +) + self.searchResultsFileCount = self.tempSearchResults.count + self.tempSearchResults = [] + } + } + + /// Addes line matchings to a `SearchResultsViewModel` array. + /// That means if a search result is a file, and the search term appears in the file, + /// the function will add the line number, line content, and keyword range to the `SearchResultsViewModel`. + /// + /// - Parameters: + /// - query: The search query string. + /// - searchResults: An inout parameter containing the array of `SearchResultsViewModel` to be evaluated. + /// It will be modified to include line matches. + private func evaluateResult(query: String, searchResult: inout SearchResultModel) async { + let searchResultCopy = searchResult + var newMatches = [SearchResultMatchModel]() + + guard let data = try? Data(contentsOf: searchResult.file.url), + let string = String(data: data, encoding: .utf8) else { + return + } + + await withTaskGroup(of: SearchResultMatchModel?.self) { group in + for (lineNumber, line) in string.components(separatedBy: .whitespacesAndNewlines).lazy.enumerated() { + group.addTask { + let rawNoSpaceLine = line.trimmingCharacters(in: .whitespacesAndNewlines) + let noSpaceLine = rawNoSpaceLine.lowercased() + if self.lineContainsSearchTerm(line: noSpaceLine, term: query) { + let matches = noSpaceLine.ranges(of: query).map { range in + return [lineNumber, rawNoSpaceLine, range] + } + + for match in matches { + if let lineNumber = match[0] as? Int, + let lineContent = match[1] as? String, + let keywordRange = match[2] as? Range { + let matchModel = SearchResultMatchModel( + lineNumber: lineNumber, + file: searchResultCopy.file, + lineContent: lineContent, + keywordRange: keywordRange + ) + + return matchModel + } + } + } + return nil + } + for await groupRes in group { + if let groupRes { + newMatches.append(groupRes) + } + } } } + searchResult.lineMatches = newMatches } // see if the line contains search term, obeying selectedMode @@ -138,12 +260,12 @@ extension WorkspaceDocument { var endsWith = false if appearanceString.hasPrefix(searchterm) || !appearanceString.first!.isLetter || - !appearanceString.character(at: 2).isLetter { + !(appearanceString.character(at: 2)?.isLetter ?? false) { startsWith = true } if appearanceString.hasSuffix(searchterm) || !appearanceString.last!.isLetter || - !appearanceString.character(at: appearanceString.count-2).isLetter { + !(appearanceString.character(at: appearanceString.count-2)?.isLetter ?? false) { endsWith = true } @@ -165,36 +287,16 @@ extension WorkspaceDocument { } return false - // TODO: references and definitions } - } -} - -extension String { - func character(at index: Int) -> Character { - return self[self.index(self.startIndex, offsetBy: index)] - } - func appearancesOfSubstring(substring: String, toLeft: Int=0, toRight: Int=0) -> [Range] { - guard !substring.isEmpty && self.contains(substring) else { return [] } - var appearances: [Range] = [] - for (index, character) in self.enumerated() where character == substring.first { - let startOfFoundCharacter = self.index(self.startIndex, offsetBy: index) - guard index + substring.count < self.count else { continue } - let lengthOfFoundCharacter = self.index(self.startIndex, offsetBy: (substring.count + index)) - if self[startOfFoundCharacter..