feat: file search for Windows (#762)

This commit implements the file search extension for Windows platforms using the [Windows Search](https://learn.microsoft.com/en-us/windows/win32/search/-search-3x-wds-qryidx-overview) functionality.

Something to note:

1. Searching by file content is not natively supported. Coco would search for all the columns (attributes/fields within the index) with this option:

```rust
        SearchBy::NameAndContents => {
            // Windows File Search does not support searching by file content.
            //
            // `CONTAINS('query_string')` would search all columns for `query_string`,
            // this is the closest solution we have.
            format!("((System.FileName LIKE '%{query_string}%') OR CONTAINS('{query_string}'))")
        }
```

2. Tests have been added, but they failed in our CI for unknown reasons so I disabled them:

```rust
// Skip these tests in our CI, they fail with the following error 
// "SQL is invalid: "0x80041820""
// 
// I have no idea about the underlying root cause
#[cfg(all(test, not(ci)))]
mod test {
```

3. The Windows Search index is not real-time and can return obsolete results. Opening the returned documents could fail if the chosen file has been deleted or moved.
This commit is contained in:
SteveLauC
2025-07-16 09:11:53 +08:00
committed by GitHub
parent aaf4bf2737
commit c8a723ed9d
13 changed files with 1229 additions and 501 deletions

View File

@@ -16,6 +16,7 @@ Information about release notes of Coco Server is provided here.
- feat: file search using spotlight #705
- feat: voice input support in both search and chat modes #732
- feat: text to speech now powered by LLM #750
- feat: file search for Windows #762
### 🐛 Bug fix

62
src-tauri/Cargo.lock generated
View File

@@ -908,6 +908,7 @@ dependencies = [
"tungstenite 0.24.0",
"url",
"walkdir",
"windows 0.60.0",
"zip 4.0.0",
]
@@ -7598,17 +7599,39 @@ dependencies = [
"windows-targets 0.53.0",
]
[[package]]
name = "windows"
version = "0.60.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddf874e74c7a99773e62b1c671427abf01a425e77c3d3fb9fb1e4883ea934529"
dependencies = [
"windows-collections 0.1.1",
"windows-core 0.60.1",
"windows-future 0.1.1",
"windows-link",
"windows-numerics 0.1.1",
]
[[package]]
name = "windows"
version = "0.61.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5ee8f3d025738cb02bad7868bbb5f8a6327501e870bf51f1b455b0a2454a419"
dependencies = [
"windows-collections",
"windows-collections 0.2.0",
"windows-core 0.61.0",
"windows-future",
"windows-future 0.2.0",
"windows-link",
"windows-numerics",
"windows-numerics 0.2.0",
]
[[package]]
name = "windows-collections"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5467f79cc1ba3f52ebb2ed41dbb459b8e7db636cc3429458d9a852e15bc24dec"
dependencies = [
"windows-core 0.60.1",
]
[[package]]
@@ -7655,6 +7678,19 @@ dependencies = [
"windows-targets 0.53.0",
]
[[package]]
name = "windows-core"
version = "0.60.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca21a92a9cae9bf4ccae5cf8368dce0837100ddf6e6d57936749e85f152f6247"
dependencies = [
"windows-implement 0.59.0",
"windows-interface 0.59.1",
"windows-link",
"windows-result 0.3.2",
"windows-strings 0.3.1",
]
[[package]]
name = "windows-core"
version = "0.61.0"
@@ -7668,6 +7704,16 @@ dependencies = [
"windows-strings 0.4.0",
]
[[package]]
name = "windows-future"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a787db4595e7eb80239b74ce8babfb1363d8e343ab072f2ffe901400c03349f0"
dependencies = [
"windows-core 0.60.1",
"windows-link",
]
[[package]]
name = "windows-future"
version = "0.2.0"
@@ -7774,6 +7820,16 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
[[package]]
name = "windows-numerics"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "005dea54e2f6499f2cee279b8f703b3cf3b5734a2d8d21867c8f44003182eeed"
dependencies = [
"windows-core 0.60.1",
"windows-link",
]
[[package]]
name = "windows-numerics"
version = "0.2.0"

View File

@@ -129,3 +129,4 @@ tauri-plugin-updater = { git = "https://github.com/infinilabs/plugins-workspace"
[target."cfg(target_os = \"windows\")".dependencies]
enigo="0.3"
windows = { version = "0.60", features = ["Win32_Foundation", "Win32_System_Com", "Win32_System_Ole", "Win32_System_Search", "Win32_UI_Shell_PropertiesSystem", "Win32_Data"] }

View File

@@ -1,3 +1,14 @@
fn main() {
tauri_build::build()
tauri_build::build();
// If env var `GITHUB_ACTIONS` exists, we are running in CI, set up the `ci`
// attribute
if std::env::var("GITHUB_ACTIONS").is_ok() {
println!("cargo:rustc-cfg=ci");
}
// Notify `rustc` of this `cfg` attribute to suppress unknown attribute warnings.
//
// unexpected condition name: `ci`
println!("cargo::rustc-check-cfg=cfg(ci)");
}

View File

@@ -1,476 +0,0 @@
use super::super::LOCAL_QUERY_SOURCE_TYPE;
use crate::common::{
document::{DataSourceReference, Document},
error::SearchError,
search::{QueryResponse, QuerySource, SearchQuery},
traits::SearchSource,
};
use crate::extension::OnOpened;
use async_trait::async_trait;
use futures::stream::Stream;
use futures::stream::StreamExt;
use hostname;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::os::fd::OwnedFd;
use std::path::Path;
use std::sync::LazyLock;
use tauri_plugin_store::StoreExt;
use tokio::io::AsyncBufReadExt;
use tokio::io::BufReader;
use tokio::process::Child;
use tokio::process::Command;
use tokio_stream::wrappers::LinesStream;
use crate::util::file::get_file_icon;
pub(crate) const EXTENSION_ID: &str = "File Search";
/// JSON file for this extension.
pub(crate) const PLUGIN_JSON_FILE: &str = r#"
{
"id": "File Search",
"name": "File Search",
"platforms": ["macos"],
"description": "Search files on your system using macOS Spotlight",
"icon": "font_Filesearch",
"type": "extension",
"enabled": true
}
"#;
// Tauri store keys for file system configuration
const TAURI_STORE_FILE_SYSTEM_CONFIG: &str = "file_system_config";
const TAURI_STORE_KEY_SEARCH_BY: &str = "search_by";
const TAURI_STORE_KEY_SEARCH_PATHS: &str = "search_paths";
const TAURI_STORE_KEY_EXCLUDE_PATHS: &str = "exclude_paths";
const TAURI_STORE_KEY_FILE_TYPES: &str = "file_types";
static HOME_DIR: LazyLock<String> = LazyLock::new(|| {
let os_string = dirs::home_dir()
.expect("$HOME should be set")
.into_os_string();
os_string
.into_string()
.expect("User home directory should be encoded with UTF-8")
});
#[derive(Debug, Clone, Serialize, Deserialize, Copy)]
pub enum SearchBy {
Name,
NameAndContents,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileSearchConfig {
pub search_paths: Vec<String>,
pub exclude_paths: Vec<String>,
pub file_types: Vec<String>,
pub search_by: SearchBy,
}
impl Default for FileSearchConfig {
fn default() -> Self {
Self {
search_paths: vec![
format!("{}/Documents", HOME_DIR.as_str()),
format!("{}/Desktop", HOME_DIR.as_str()),
format!("{}/Downloads", HOME_DIR.as_str()),
],
exclude_paths: Vec::new(),
file_types: Vec::new(),
search_by: SearchBy::Name,
}
}
}
impl FileSearchConfig {
fn get() -> Self {
let tauri_app_handle = crate::GLOBAL_TAURI_APP_HANDLE
.get()
.expect("global tauri app handle not set");
let store = tauri_app_handle
.store(TAURI_STORE_FILE_SYSTEM_CONFIG)
.unwrap_or_else(|e| {
panic!(
"store [{}] not found/loaded, error [{}]",
TAURI_STORE_FILE_SYSTEM_CONFIG, e
)
});
// Default value, will be used when specific config entries are not set
let default_config = FileSearchConfig::default();
let search_paths = {
if let Some(search_paths) = store.get(TAURI_STORE_KEY_SEARCH_PATHS) {
match search_paths {
Value::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
for v in arr {
match v {
Value::String(s) => vec.push(s),
other => panic!(
"Expected all elements of 'search_paths' to be strings, but found: {:?}",
other
),
}
}
vec
}
other => panic!(
"Expected 'search_paths' to be an array of strings in the file system config store, but got: {:?}",
other
),
}
} else {
store.set(
TAURI_STORE_KEY_SEARCH_PATHS,
default_config.search_paths.as_slice(),
);
default_config.search_paths
}
};
let exclude_paths = {
if let Some(exclude_paths) = store.get(TAURI_STORE_KEY_EXCLUDE_PATHS) {
match exclude_paths {
Value::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
for v in arr {
match v {
Value::String(s) => vec.push(s),
other => panic!(
"Expected all elements of 'exclude_paths' to be strings, but found: {:?}",
other
),
}
}
vec
}
other => panic!(
"Expected 'exclude_paths' to be an array of strings in the file system config store, but got: {:?}",
other
),
}
} else {
store.set(
TAURI_STORE_KEY_EXCLUDE_PATHS,
default_config.exclude_paths.as_slice(),
);
default_config.exclude_paths
}
};
let file_types = {
if let Some(file_types) = store.get(TAURI_STORE_KEY_FILE_TYPES) {
match file_types {
Value::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
for v in arr {
match v {
Value::String(s) => vec.push(s),
other => panic!(
"Expected all elements of 'file_types' to be strings, but found: {:?}",
other
),
}
}
vec
}
other => panic!(
"Expected 'file_types' to be an array of strings in the file system config store, but got: {:?}",
other
),
}
} else {
store.set(
TAURI_STORE_KEY_FILE_TYPES,
default_config.file_types.as_slice(),
);
default_config.file_types
}
};
let search_by = {
if let Some(search_by) = store.get(TAURI_STORE_KEY_SEARCH_BY) {
serde_json::from_value(search_by.clone()).unwrap_or_else(|e| {
panic!(
"Failed to deserialize 'search_by' from file system config store. Invalid JSON: {:?}, error: {}",
search_by, e
)
})
} else {
store.set(
TAURI_STORE_KEY_SEARCH_BY,
serde_json::to_value(default_config.search_by).unwrap(),
);
default_config.search_by
}
};
Self {
search_by,
search_paths,
exclude_paths,
file_types,
}
}
}
pub struct FileSearchExtensionSearchSource {
base_score: f64,
}
impl FileSearchExtensionSearchSource {
pub fn new(base_score: f64) -> Self {
FileSearchExtensionSearchSource { base_score }
}
/// Return an array containing the `mdfind` command and its arguments.
fn build_mdfind_query(query_string: &str, config: &FileSearchConfig) -> Vec<String> {
let mut args = vec!["mdfind".to_string()];
match config.search_by {
SearchBy::Name => {
args.push(format!("kMDItemFSName == '*{}*'", query_string));
}
SearchBy::NameAndContents => {
args.push(format!(
"kMDItemFSName == '*{}*' || kMDItemTextContent == '{}'",
query_string, query_string
));
}
}
// Add search paths using -onlyin
for path in &config.search_paths {
if Path::new(path).exists() {
args.extend_from_slice(&["-onlyin".to_string(), path.to_string()]);
}
}
args
}
/// Spawn the `mdfind` child process and return an async iterator over its output,
/// allowing us to collect the results asynchronously.
///
/// # Return value:
///
/// * impl Stream: an async iterator that will yield the matched files
/// * Child: The handle to the mdfind process, we need to kill it once we
/// collect all the results to avoid zombie processes.
fn execute_mdfind_query(
query_string: &str,
from: usize,
size: usize,
config: &FileSearchConfig,
) -> Result<(impl Stream<Item = std::io::Result<String>>, Child), String> {
let args = Self::build_mdfind_query(query_string, &config);
let (rx, tx) = std::io::pipe().unwrap();
let rx_owned = OwnedFd::from(rx);
let async_rx = tokio::net::unix::pipe::Receiver::from_owned_fd(rx_owned).unwrap();
let buffered_rx = BufReader::new(async_rx);
let lines = LinesStream::new(buffered_rx.lines());
let child = Command::new(&args[0])
.args(&args[1..])
.stdout(tx)
.stderr(std::process::Stdio::null())
.spawn()
.map_err(|e| format!("Failed to spawn mdfind: {}", e))?;
let config_clone = config.clone();
let iter = lines
.filter(move |res_path| {
std::future::ready({
match res_path {
Ok(path) => !Self::should_be_filtered_out(&config_clone, path),
Err(_) => {
// Don't filter out Err() values
true
}
}
})
})
.skip(from)
.take(size);
Ok((iter, child))
}
/// If `file_path` should be removed from the search results given the filter
/// conditions specified in `config`.
fn should_be_filtered_out(config: &FileSearchConfig, file_path: &str) -> bool {
let is_excluded = config
.exclude_paths
.iter()
.any(|exclude_path| file_path.starts_with(exclude_path));
if is_excluded {
return true;
}
let matches_file_type = if config.file_types.is_empty() {
true
} else {
let path_obj = camino::Utf8Path::new(&file_path);
if let Some(extension) = path_obj.extension() {
config
.file_types
.iter()
.any(|file_type| file_type == extension)
} else {
// `config.file_types` is not empty, then the search results
// should have extensions.
false
}
};
!matches_file_type
}
}
#[async_trait]
impl SearchSource for FileSearchExtensionSearchSource {
fn get_type(&self) -> QuerySource {
QuerySource {
r#type: LOCAL_QUERY_SOURCE_TYPE.into(),
name: hostname::get()
.unwrap_or(EXTENSION_ID.into())
.to_string_lossy()
.into(),
id: EXTENSION_ID.into(),
}
}
async fn search(&self, query: SearchQuery) -> Result<QueryResponse, SearchError> {
let Some(query_string) = query.query_strings.get("query") else {
return Ok(QueryResponse {
source: self.get_type(),
hits: Vec::new(),
total_hits: 0,
});
};
let from = usize::try_from(query.from).expect("from too big");
let size = usize::try_from(query.size).expect("size too big");
let query_string = query_string.trim();
if query_string.is_empty() {
return Ok(QueryResponse {
source: self.get_type(),
hits: Vec::new(),
total_hits: 0,
});
}
// Get configuration from tauri store
let config = FileSearchConfig::get();
// If search paths are empty, then the hit should be empty.
//
// Without this, empty search paths will result in a mdfind that has no `-onlyin`
// option, which will in turn query the whole disk volume.
if config.search_paths.is_empty() {
return Ok(QueryResponse {
source: self.get_type(),
hits: Vec::new(),
total_hits: 0,
});
}
// Execute search in a blocking task
let query_source = self.get_type();
let base_score = self.base_score;
let (mut iter, mut mdfind_child_process) =
Self::execute_mdfind_query(&query_string, from, size, &config)
.map_err(SearchError::InternalError)?;
// Convert results to documents
let mut hits: Vec<(Document, f64)> = Vec::new();
while let Some(res_file_path) = iter.next().await {
let file_path =
res_file_path.map_err(|io_err| SearchError::InternalError(io_err.to_string()))?;
let icon = get_file_icon(file_path.clone()).await;
let file_path_of_type_path = camino::Utf8Path::new(&file_path);
let r#where = file_path_of_type_path
.parent()
.unwrap_or_else(|| {
panic!(
"expect path [{}] to have a parent, but it does not",
file_path
);
})
.to_string();
let file_name = file_path_of_type_path.file_name().unwrap_or_else(|| {
panic!(
"expect path [{}] to have a file name, but it does not",
file_path
);
});
let on_opened = OnOpened::Document {
url: file_path.clone(),
};
let doc = Document {
id: file_path.clone(),
title: Some(file_name.to_string()),
source: Some(DataSourceReference {
r#type: Some(LOCAL_QUERY_SOURCE_TYPE.into()),
name: Some(EXTENSION_ID.into()),
id: Some(EXTENSION_ID.into()),
icon: Some(String::from("font_Filesearch")),
}),
category: Some(r#where),
on_opened: Some(on_opened),
url: Some(file_path),
icon: Some(icon.to_string()),
..Default::default()
};
hits.push((doc, base_score));
}
mdfind_child_process
.kill()
.await
.map_err(|e| SearchError::InternalError(format!("{:?}", e)))?;
let total_hits = hits.len();
Ok(QueryResponse {
source: query_source,
hits,
total_hits,
})
}
}
// Tauri commands for managing file system configuration
#[tauri::command]
pub async fn get_file_system_config() -> FileSearchConfig {
FileSearchConfig::get()
}
#[tauri::command]
pub async fn set_file_system_config(config: FileSearchConfig) -> Result<(), String> {
let tauri_app_handle = crate::GLOBAL_TAURI_APP_HANDLE
.get()
.expect("global tauri app handle not set");
let store = tauri_app_handle
.store(TAURI_STORE_FILE_SYSTEM_CONFIG)
.map_err(|e| e.to_string())?;
store.set(TAURI_STORE_KEY_SEARCH_PATHS, config.search_paths);
store.set(TAURI_STORE_KEY_EXCLUDE_PATHS, config.exclude_paths);
store.set(TAURI_STORE_KEY_FILE_TYPES, config.file_types);
store.set(
TAURI_STORE_KEY_SEARCH_BY,
serde_json::to_value(config.search_by).unwrap(),
);
Ok(())
}

View File

@@ -0,0 +1,213 @@
//! File Search configuration entries definition and getter/setter functions.
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value;
use std::sync::LazyLock;
use tauri_plugin_store::StoreExt;
// Tauri store keys for file system configuration
const TAURI_STORE_FILE_SYSTEM_CONFIG: &str = "file_system_config";
const TAURI_STORE_KEY_SEARCH_BY: &str = "search_by";
const TAURI_STORE_KEY_SEARCH_PATHS: &str = "search_paths";
const TAURI_STORE_KEY_EXCLUDE_PATHS: &str = "exclude_paths";
const TAURI_STORE_KEY_FILE_TYPES: &str = "file_types";
static HOME_DIR: LazyLock<String> = LazyLock::new(|| {
let os_string = dirs::home_dir()
.expect("$HOME should be set")
.into_os_string();
os_string
.into_string()
.expect("User home directory should be encoded with UTF-8")
});
#[derive(Debug, Clone, Serialize, Deserialize, Copy)]
pub enum SearchBy {
Name,
NameAndContents,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileSearchConfig {
pub search_paths: Vec<String>,
pub exclude_paths: Vec<String>,
pub file_types: Vec<String>,
pub search_by: SearchBy,
}
impl Default for FileSearchConfig {
fn default() -> Self {
Self {
search_paths: vec![
format!("{}/Documents", HOME_DIR.as_str()),
format!("{}/Desktop", HOME_DIR.as_str()),
format!("{}/Downloads", HOME_DIR.as_str()),
],
exclude_paths: Vec::new(),
file_types: Vec::new(),
search_by: SearchBy::Name,
}
}
}
impl FileSearchConfig {
pub(crate) fn get() -> Self {
let tauri_app_handle = crate::GLOBAL_TAURI_APP_HANDLE
.get()
.expect("global tauri app handle not set");
let store = tauri_app_handle
.store(TAURI_STORE_FILE_SYSTEM_CONFIG)
.unwrap_or_else(|e| {
panic!(
"store [{}] not found/loaded, error [{}]",
TAURI_STORE_FILE_SYSTEM_CONFIG, e
)
});
// Default value, will be used when specific config entries are not set
let default_config = FileSearchConfig::default();
let search_paths = {
if let Some(search_paths) = store.get(TAURI_STORE_KEY_SEARCH_PATHS) {
match search_paths {
Value::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
for v in arr {
match v {
Value::String(s) => vec.push(s),
other => panic!(
"Expected all elements of 'search_paths' to be strings, but found: {:?}",
other
),
}
}
vec
}
other => panic!(
"Expected 'search_paths' to be an array of strings in the file system config store, but got: {:?}",
other
),
}
} else {
store.set(
TAURI_STORE_KEY_SEARCH_PATHS,
default_config.search_paths.as_slice(),
);
default_config.search_paths
}
};
let exclude_paths = {
if let Some(exclude_paths) = store.get(TAURI_STORE_KEY_EXCLUDE_PATHS) {
match exclude_paths {
Value::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
for v in arr {
match v {
Value::String(s) => vec.push(s),
other => panic!(
"Expected all elements of 'exclude_paths' to be strings, but found: {:?}",
other
),
}
}
vec
}
other => panic!(
"Expected 'exclude_paths' to be an array of strings in the file system config store, but got: {:?}",
other
),
}
} else {
store.set(
TAURI_STORE_KEY_EXCLUDE_PATHS,
default_config.exclude_paths.as_slice(),
);
default_config.exclude_paths
}
};
let file_types = {
if let Some(file_types) = store.get(TAURI_STORE_KEY_FILE_TYPES) {
match file_types {
Value::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
for v in arr {
match v {
Value::String(s) => vec.push(s),
other => panic!(
"Expected all elements of 'file_types' to be strings, but found: {:?}",
other
),
}
}
vec
}
other => panic!(
"Expected 'file_types' to be an array of strings in the file system config store, but got: {:?}",
other
),
}
} else {
store.set(
TAURI_STORE_KEY_FILE_TYPES,
default_config.file_types.as_slice(),
);
default_config.file_types
}
};
let search_by = {
if let Some(search_by) = store.get(TAURI_STORE_KEY_SEARCH_BY) {
serde_json::from_value(search_by.clone()).unwrap_or_else(|e| {
panic!(
"Failed to deserialize 'search_by' from file system config store. Invalid JSON: {:?}, error: {}",
search_by, e
)
})
} else {
store.set(
TAURI_STORE_KEY_SEARCH_BY,
serde_json::to_value(default_config.search_by).unwrap(),
);
default_config.search_by
}
};
Self {
search_by,
search_paths,
exclude_paths,
file_types,
}
}
}
// Tauri commands for managing file system configuration
#[tauri::command]
pub async fn get_file_system_config() -> FileSearchConfig {
FileSearchConfig::get()
}
#[tauri::command]
pub async fn set_file_system_config(config: FileSearchConfig) -> Result<(), String> {
let tauri_app_handle = crate::GLOBAL_TAURI_APP_HANDLE
.get()
.expect("global tauri app handle not set");
let store = tauri_app_handle
.store(TAURI_STORE_FILE_SYSTEM_CONFIG)
.map_err(|e| e.to_string())?;
store.set(TAURI_STORE_KEY_SEARCH_PATHS, config.search_paths);
store.set(TAURI_STORE_KEY_EXCLUDE_PATHS, config.exclude_paths);
store.set(TAURI_STORE_KEY_FILE_TYPES, config.file_types);
store.set(
TAURI_STORE_KEY_SEARCH_BY,
serde_json::to_value(config.search_by).unwrap(),
);
Ok(())
}

View File

@@ -0,0 +1,189 @@
use super::super::config::FileSearchConfig;
use super::super::config::SearchBy;
use super::super::EXTENSION_ID;
use crate::common::{
document::{DataSourceReference, Document},
};
use crate::extension::OnOpened;
use crate::extension::LOCAL_QUERY_SOURCE_TYPE;
use crate::util::file::get_file_icon;
use futures::stream::Stream;
use futures::stream::StreamExt;
use std::os::fd::OwnedFd;
use std::path::Path;
use tokio::io::AsyncBufReadExt;
use tokio::io::BufReader;
use tokio::process::Child;
use tokio::process::Command;
use tokio_stream::wrappers::LinesStream;
/// `mdfind` won't return scores, we use this score for all the documents.
const SCORE: f64 = 1.0;
pub(crate) async fn hits(
query_string: &str,
from: usize,
size: usize,
config: &FileSearchConfig,
) -> Result<Vec<(Document, f64)>, String> {
let (mut iter, mut mdfind_child_process) =
execute_mdfind_query(&query_string, from, size, &config)?;
// Convert results to documents
let mut hits: Vec<(Document, f64)> = Vec::new();
while let Some(res_file_path) = iter.next().await {
let file_path =
res_file_path.map_err(|io_err| io_err.to_string())?;
let icon = get_file_icon(file_path.clone()).await;
let file_path_of_type_path = camino::Utf8Path::new(&file_path);
let r#where = file_path_of_type_path
.parent()
.unwrap_or_else(|| {
panic!(
"expect path [{}] to have a parent, but it does not",
file_path
);
})
.to_string();
let file_name = file_path_of_type_path.file_name().unwrap_or_else(|| {
panic!(
"expect path [{}] to have a file name, but it does not",
file_path
);
});
let on_opened = OnOpened::Document {
url: file_path.clone(),
};
let doc = Document {
id: file_path.clone(),
title: Some(file_name.to_string()),
source: Some(DataSourceReference {
r#type: Some(LOCAL_QUERY_SOURCE_TYPE.into()),
name: Some(EXTENSION_ID.into()),
id: Some(EXTENSION_ID.into()),
icon: Some(String::from("font_Filesearch")),
}),
category: Some(r#where),
on_opened: Some(on_opened),
url: Some(file_path),
icon: Some(icon.to_string()),
..Default::default()
};
hits.push((doc, SCORE));
}
// Kill the mdfind process once we get the needed results to prevent zombie
// processes.
mdfind_child_process
.kill()
.await
.map_err(|e| format!("{:?}", e))?;
Ok(hits)
}
/// Return an array containing the `mdfind` command and its arguments.
fn build_mdfind_query(query_string: &str, config: &FileSearchConfig) -> Vec<String> {
let mut args = vec!["mdfind".to_string()];
match config.search_by {
SearchBy::Name => {
args.push(format!("kMDItemFSName == '*{}*'", query_string));
}
SearchBy::NameAndContents => {
args.push(format!(
"kMDItemFSName == '*{}*' || kMDItemTextContent == '{}'",
query_string, query_string
));
}
}
// Add search paths using -onlyin
for path in &config.search_paths {
if Path::new(path).exists() {
args.extend_from_slice(&["-onlyin".to_string(), path.to_string()]);
}
}
args
}
/// Spawn the `mdfind` child process and return an async iterator over its output,
/// allowing us to collect the results asynchronously.
///
/// # Return value:
///
/// * impl Stream: an async iterator that will yield the matched files
/// * Child: The handle to the mdfind process, we need to kill it once we
/// collect all the results to avoid zombie processes.
fn execute_mdfind_query(
query_string: &str,
from: usize,
size: usize,
config: &FileSearchConfig,
) -> Result<(impl Stream<Item = std::io::Result<String>>, Child), String> {
let args = build_mdfind_query(query_string, &config);
let (rx, tx) = std::io::pipe().unwrap();
let rx_owned = OwnedFd::from(rx);
let async_rx = tokio::net::unix::pipe::Receiver::from_owned_fd(rx_owned).unwrap();
let buffered_rx = BufReader::new(async_rx);
let lines = LinesStream::new(buffered_rx.lines());
let child = Command::new(&args[0])
.args(&args[1..])
.stdout(tx)
.stderr(std::process::Stdio::null())
.spawn()
.map_err(|e| format!("Failed to spawn mdfind: {}", e))?;
let config_clone = config.clone();
let iter = lines
.filter(move |res_path| {
std::future::ready({
match res_path {
Ok(path) => !should_be_filtered_out(&config_clone, path),
Err(_) => {
// Don't filter out Err() values
true
}
}
})
})
.skip(from)
.take(size);
Ok((iter, child))
}
/// If `file_path` should be removed from the search results given the filter
/// conditions specified in `config`.
fn should_be_filtered_out(config: &FileSearchConfig, file_path: &str) -> bool {
let is_excluded = config
.exclude_paths
.iter()
.any(|exclude_path| file_path.starts_with(exclude_path));
if is_excluded {
return true;
}
let matches_file_type = if config.file_types.is_empty() {
true
} else {
let path_obj = camino::Utf8Path::new(&file_path);
if let Some(extension) = path_obj.extension() {
config
.file_types
.iter()
.any(|file_type| file_type == extension)
} else {
// `config.file_types` is not empty, then the search results
// should have extensions.
false
}
};
!matches_file_type
}

View File

@@ -0,0 +1,10 @@
#[cfg(target_os = "macos")]
mod macos;
#[cfg(target_os = "windows")]
mod windows;
// `hits()` function is platform-specific, export the corresponding impl.
#[cfg(target_os = "macos")]
pub(crate) use macos::hits;
#[cfg(target_os = "windows")]
pub(crate) use windows::hits;

View File

@@ -0,0 +1,630 @@
//! # Credits
//!
//! https://github.com/IRONAGE-Park/rag-sample/blob/3f0ad8c8012026cd3a7e453d08f041609426cb91/src/native/windows.rs
//! is the starting point of this implementation.
use super::super::config::FileSearchConfig;
use super::super::config::SearchBy;
use super::super::EXTENSION_ID;
use crate::common::document::{DataSourceReference, Document};
use crate::extension::OnOpened;
use crate::extension::LOCAL_QUERY_SOURCE_TYPE;
use crate::util::file::get_file_icon;
use windows::{
core::{w, IUnknown, Interface, GUID, PWSTR},
Win32::System::{
Com::{CoCreateInstance, CLSCTX_INPROC_SERVER},
Ole::{OleInitialize, OleUninitialize},
Search::{
IAccessor, ICommand, ICommandText, IDBCreateCommand, IDBCreateSession, IDBInitialize,
IDataInitialize, IRowset, DBACCESSOR_ROWDATA, DBBINDING, DBMEMOWNER_CLIENTOWNED,
DBPARAMIO_NOTPARAM, DBPART_VALUE, DBTYPE_WSTR, DB_NULL_HCHAPTER, HACCESSOR,
MSDAINITIALIZE,
},
},
};
/// Owned version of `PWSTR` that holds the heap memory.
///
/// Use `as_pwstr()` to convert it to a raw pointer.
struct PwStrOwned(Vec<u16>);
impl PwStrOwned {
/// # SAFETY
///
/// The returned `PWSTR` is basically a raw pointer, it is only valid within the
/// lifetime of `PwStrOwned`.
unsafe fn as_pwstr(&mut self) -> PWSTR {
let raw_ptr = self.0.as_mut_ptr();
PWSTR::from_raw(raw_ptr)
}
}
/// Construct `PwStrOwned` from any `str`.
impl<S: AsRef<str> + ?Sized> From<&S> for PwStrOwned {
fn from(value: &S) -> Self {
let mut utf16_bytes = value.as_ref().encode_utf16().collect::<Vec<u16>>();
utf16_bytes.push(0); // the tailing NULL
PwStrOwned(utf16_bytes)
}
}
/// Helper function to construct the Windows Search SQL.
///
/// Paging is not natively supported by windows Search SQL, it only supports `size`
/// via the `TOP` keyword ("SELECT TOP {n} {columns}"). The SQL returned by this
/// function will have `{n}` set to `from + size`, then we will manually implement
/// paging.
fn query_sql(query_string: &str, from: usize, size: usize, config: &FileSearchConfig) -> String {
let top_n = from
.checked_add(size)
.expect("[from + size] cannot fit into an [usize]");
// System.ItemUrl is a column that contains the file path
// example: "file:C:/Users/desktop.ini"
//
// System.Search.Rank is the relevance score
let mut sql = format!(
"SELECT TOP {} System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE",
top_n
);
// Use debug print to escape the newline character, which cannot be handled by Windows Search.
let query_string_debug_print = format!("{:?}", query_string);
// Debug print will be double quoted, we need to trim them.
let query_string_debug_print_len = query_string_debug_print.len();
let query_string = &query_string_debug_print[1..(query_string_debug_print_len - 1)];
let search_by_predicate = match config.search_by {
SearchBy::Name => {
// `contains(System.FileName, '{query_string}')` would be faster
// because it uses inverted index, but that's not what we want
// due to the limitation of tokenization. For example, suppose "Coco AI.rs"
// will be tokenized to `["Coco", "AI", "rs"]`, then if users search
// via `Co`, this file won't be returned because term `Co` does not
// exist in the index.
//
// So we use wildcard instead even though it is slower.
format!("(System.FileName LIKE '%{query_string}%')")
}
SearchBy::NameAndContents => {
// Windows File Search does not support searching by file content.
//
// `CONTAINS('query_string')` would search all columns for `query_string`,
// this is the closest solution we have.
format!("((System.FileName LIKE '%{query_string}%') OR CONTAINS('{query_string}'))")
}
};
let search_paths_predicate: Option<String> = {
if config.search_paths.is_empty() {
None
} else {
let mut output = String::from("(");
for (idx, search_path) in config.search_paths.iter().enumerate() {
if idx != 0 {
output.push_str(" OR ");
}
output.push_str("SCOPE = 'file:");
output.push_str(&search_path);
output.push('\'');
}
output.push(')');
Some(output)
}
};
let exclude_paths_predicate: Option<String> = {
if config.exclude_paths.is_empty() {
None
} else {
let mut output = String::from("(");
for (idx, exclude_path) in config.exclude_paths.iter().enumerate() {
if idx != 0 {
output.push_str(" AND ");
}
output.push_str("(NOT SCOPE = 'file:");
output.push_str(&exclude_path);
output.push('\'');
output.push(')');
}
output.push(')');
Some(output)
}
};
let file_types_predicate: Option<String> = {
if config.file_types.is_empty() {
None
} else {
let mut output = String::from("(");
for (idx, file_type) in config.file_types.iter().enumerate() {
if idx != 0 {
output.push_str(" OR ");
}
// NOTE that this column contains a starting dot
output.push_str("System.FileExtension = '.");
output.push_str(&file_type);
output.push('\'');
}
output.push(')');
Some(output)
}
};
sql.push(' ');
sql.push_str(search_by_predicate.as_str());
if let Some(search_paths_predicate) = search_paths_predicate {
sql.push_str(" AND ");
sql.push_str(search_paths_predicate.as_str());
}
if let Some(exclude_paths_predicate) = exclude_paths_predicate {
sql.push_str(" AND ");
sql.push_str(exclude_paths_predicate.as_str());
}
if let Some(file_types_predicate) = file_types_predicate {
sql.push_str(" AND ");
sql.push_str(file_types_predicate.as_str());
}
sql
}
/// Default GUID for Search.CollatorDSO.1
const DBGUID_DEFAULT: GUID = GUID {
data1: 0xc8b521fb,
data2: 0x5cf3,
data3: 0x11ce,
data4: [0xad, 0xe5, 0x00, 0xaa, 0x00, 0x44, 0x77, 0x3d],
};
unsafe fn create_accessor_handle(accessor: &IAccessor, index: usize) -> Result<HACCESSOR, String> {
let bindings = DBBINDING {
iOrdinal: index,
obValue: 0,
obStatus: 0,
obLength: 0,
dwPart: DBPART_VALUE.0 as u32,
dwMemOwner: DBMEMOWNER_CLIENTOWNED.0 as u32,
eParamIO: DBPARAMIO_NOTPARAM.0 as u32,
cbMaxLen: 512,
dwFlags: 0,
wType: DBTYPE_WSTR.0 as u16,
bPrecision: 0,
bScale: 0,
..Default::default()
};
let mut status = 0;
let mut accessor_handle = HACCESSOR::default();
unsafe {
accessor
.CreateAccessor(
DBACCESSOR_ROWDATA.0 as u32,
1,
&bindings,
0,
&mut accessor_handle,
Some(&mut status),
)
.map_err(|e| e.to_string())?;
}
Ok(accessor_handle)
}
fn create_db_initialize() -> Result<IDBInitialize, String> {
unsafe {
let data_init: IDataInitialize =
CoCreateInstance(&MSDAINITIALIZE, None, CLSCTX_INPROC_SERVER)
.map_err(|e| e.to_string())?;
let mut unknown: Option<IUnknown> = None;
data_init
.GetDataSource(
None,
CLSCTX_INPROC_SERVER.0,
w!("provider=Search.CollatorDSO.1;EXTENDED PROPERTIES=\"Application=Windows\""),
&IDBInitialize::IID,
&mut unknown as *mut _ as *mut _,
)
.map_err(|e| e.to_string())?;
Ok(unknown.unwrap().cast().map_err(|e| e.to_string())?)
}
}
fn create_command(db_init: IDBInitialize) -> Result<ICommandText, String> {
unsafe {
let db_create_session: IDBCreateSession = db_init.cast().map_err(|e| e.to_string())?;
let session: IUnknown = db_create_session
.CreateSession(None, &IUnknown::IID)
.map_err(|e| e.to_string())?;
let db_create_command: IDBCreateCommand = session.cast().map_err(|e| e.to_string())?;
Ok(db_create_command
.CreateCommand(None, &ICommand::IID)
.map_err(|e| e.to_string())?
.cast()
.map_err(|e| e.to_string())?)
}
}
fn execute_windows_search_sql(sql_query: &str) -> Result<Vec<(String, String)>, String> {
unsafe {
let mut pwstr_owned_sql = PwStrOwned::from(sql_query);
// SAFETY: pwstr_owned_sql will live for the whole lifetime of this function.
let sql_query = pwstr_owned_sql.as_pwstr();
let db_init = create_db_initialize()?;
db_init.Initialize().map_err(|e| e.to_string())?;
let command = create_command(db_init)?;
// Set the command text
command
.SetCommandText(&DBGUID_DEFAULT, sql_query)
.map_err(|e| e.to_string())?;
// Execute the command
let mut rowset: Option<IRowset> = None;
command
.Execute(
None,
&IRowset::IID,
None,
None,
Some(&mut rowset as *mut _ as *mut _),
)
.map_err(|e| e.to_string())?;
let rowset = rowset.ok_or_else(|| {
format!(
"No rowset returned for query: {}",
// SAFETY: the raw pointer is not dangling
sql_query
.to_string()
.expect("the conversion should work as `sql_query` was created from a String",)
)
})?;
let accessor: IAccessor = rowset
.cast()
.map_err(|e| format!("Failed to cast to IAccessor: {}", e.to_string()))?;
let mut output = Vec::new();
let mut count = 0;
loop {
let mut rows_fetched = 0;
let mut row_handles = [std::ptr::null_mut(); 1];
let result = rowset.GetNextRows(
DB_NULL_HCHAPTER as usize,
0,
&mut rows_fetched,
&mut row_handles,
);
if result.is_err() {
break;
}
if rows_fetched == 0 {
break;
}
let mut data = Vec::new();
for i in 0..2 {
let mut item_name = [0u16; 512];
let accessor_handle = create_accessor_handle(&accessor, i + 1)?;
rowset
.GetData(
*row_handles[0],
accessor_handle,
item_name.as_mut_ptr() as *mut _,
)
.map_err(|e| {
format!(
"Failed to get data at count {}, index {}: {}",
count,
i,
e.to_string()
)
})?;
let name = String::from_utf16_lossy(&item_name);
// Remove null characters
data.push(name.trim_end_matches('\u{0000}').to_string());
accessor
.ReleaseAccessor(accessor_handle, None)
.map_err(|e| {
format!(
"Failed to release accessor at count {}, index {}: {}",
count,
i,
e.to_string()
)
})?;
}
output.push((data[0].clone(), data[1].clone()));
count += 1;
rowset
.ReleaseRows(
1,
row_handles[0],
std::ptr::null_mut(),
std::ptr::null_mut(),
std::ptr::null_mut(),
)
.map_err(|e| {
format!(
"Failed to release rows at count {}: {}",
count,
e.to_string()
)
})?;
}
Ok(output)
}
}
pub(crate) async fn hits(
query_string: &str,
from: usize,
size: usize,
config: &FileSearchConfig,
) -> Result<Vec<(Document, f64)>, String> {
let sql = query_sql(query_string, from, size, config);
unsafe { OleInitialize(None).map_err(|e| e.to_string())? };
let result = execute_windows_search_sql(&sql)?;
unsafe { OleUninitialize() };
// .take(size) is not needed as `result` will contain `from+size` files at most
let result_with_paging = result.into_iter().skip(from);
// result_with_paging won't contain more than `size` entries
let mut hits = Vec::with_capacity(size);
const ITEM_URL_PREFIX: &str = "file:";
const ITEM_URL_PREFIX_LEN: usize = ITEM_URL_PREFIX.len();
for (item_url, score_str) in result_with_paging {
// path returned from Windows Search contains a prefix, we need to trim it.
//
// "file:C:/Users/desktop.ini" => "C:/Users/desktop.ini"
let file_path = &item_url[ITEM_URL_PREFIX_LEN..];
let icon = get_file_icon(file_path.to_string()).await;
let file_path_of_type_path = camino::Utf8Path::new(&file_path);
let r#where = file_path_of_type_path
.parent()
.unwrap_or_else(|| {
panic!(
"expect path [{}] to have a parent, but it does not",
file_path
);
})
.to_string();
let file_name = file_path_of_type_path.file_name().unwrap_or_else(|| {
panic!(
"expect path [{}] to have a file name, but it does not",
file_path
);
});
let on_opened = OnOpened::Document {
url: file_path.to_string(),
};
let doc = Document {
id: file_path.to_string(),
title: Some(file_name.to_string()),
source: Some(DataSourceReference {
r#type: Some(LOCAL_QUERY_SOURCE_TYPE.into()),
name: Some(EXTENSION_ID.into()),
id: Some(EXTENSION_ID.into()),
icon: Some(String::from("font_Filesearch")),
}),
category: Some(r#where),
on_opened: Some(on_opened),
url: Some(file_path.into()),
icon: Some(icon.to_string()),
..Default::default()
};
let score: f64 = score_str.parse().expect(
"System.Search.Rank should be in range [0, 1000], which should be valid for [f64]",
);
hits.push((doc, score));
}
Ok(hits)
}
// Skip these tests in our CI, they fail with the following error
// "SQL is invalid: "0x80041820""
//
// I have no idea about the underlying root cause
#[cfg(all(test, not(ci)))]
mod test {
use super::*;
/// Helper function for ensuring `sql` is valid SQL by actually executing it.
fn ensure_it_is_valid_sql(sql: &str) {
unsafe { OleInitialize(None).unwrap() };
execute_windows_search_sql(&sql).expect("SQL is invalid");
unsafe { OleUninitialize() };
}
#[test]
fn test_query_sql_empty_config_search_by_name() {
let config = FileSearchConfig {
search_paths: Vec::new(),
exclude_paths: Vec::new(),
file_types: Vec::new(),
search_by: SearchBy::Name,
};
let sql = query_sql("coco", 0, 10, &config);
assert_eq!(
sql,
"SELECT TOP 10 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%coco%')"
);
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_empty_config_search_by_name_and_content() {
let config = FileSearchConfig {
search_paths: Vec::new(),
exclude_paths: Vec::new(),
file_types: Vec::new(),
search_by: SearchBy::NameAndContents,
};
let sql = query_sql("coco", 0, 10, &config);
assert_eq!(sql, "SELECT TOP 10 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE ((System.FileName LIKE '%coco%') OR CONTAINS('coco'))");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_with_search_paths() {
let config = FileSearchConfig {
search_paths: vec!["C:/Users/".into()],
exclude_paths: Vec::new(),
file_types: Vec::new(),
search_by: SearchBy::Name,
};
let sql = query_sql("coco", 0, 10, &config);
assert_eq!(sql, "SELECT TOP 10 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%coco%') AND (SCOPE = 'file:C:/Users/')");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_with_multiple_search_paths() {
let config = FileSearchConfig {
search_paths: vec![
"C:/Users/".into(),
"D:/Projects/".into(),
"E:/Documents/".into(),
],
exclude_paths: Vec::new(),
file_types: Vec::new(),
search_by: SearchBy::Name,
};
let sql = query_sql("test", 0, 5, &config);
assert_eq!(sql, "SELECT TOP 5 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%test%') AND (SCOPE = 'file:C:/Users/' OR SCOPE = 'file:D:/Projects/' OR SCOPE = 'file:E:/Documents/')");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_with_exclude_paths() {
let config = FileSearchConfig {
search_paths: Vec::new(),
exclude_paths: vec!["C:/Windows/".into()],
file_types: Vec::new(),
search_by: SearchBy::Name,
};
let sql = query_sql("file", 0, 20, &config);
assert_eq!(sql, "SELECT TOP 20 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%file%') AND ((NOT SCOPE = 'file:C:/Windows/'))");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_with_multiple_exclude_paths() {
let config = FileSearchConfig {
search_paths: Vec::new(),
exclude_paths: vec!["C:/Windows/".into(), "C:/System/".into(), "C:/Temp/".into()],
file_types: Vec::new(),
search_by: SearchBy::Name,
};
let sql = query_sql("data", 5, 15, &config);
assert_eq!(sql, "SELECT TOP 20 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%data%') AND ((NOT SCOPE = 'file:C:/Windows/') AND (NOT SCOPE = 'file:C:/System/') AND (NOT SCOPE = 'file:C:/Temp/'))");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_with_file_types() {
let config = FileSearchConfig {
search_paths: Vec::new(),
exclude_paths: Vec::new(),
file_types: vec!["txt".into()],
search_by: SearchBy::Name,
};
let sql = query_sql("readme", 0, 10, &config);
assert_eq!(sql, "SELECT TOP 10 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%readme%') AND (System.FileExtension = '.txt')");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_with_multiple_file_types() {
let config = FileSearchConfig {
search_paths: Vec::new(),
exclude_paths: Vec::new(),
file_types: vec!["rs".into(), "toml".into(), "md".into(), "json".into()],
search_by: SearchBy::Name,
};
let sql = query_sql("config", 0, 50, &config);
assert_eq!(sql, "SELECT TOP 50 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%config%') AND (System.FileExtension = '.rs' OR System.FileExtension = '.toml' OR System.FileExtension = '.md' OR System.FileExtension = '.json')");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_all_fields_combined() {
let config = FileSearchConfig {
search_paths: vec!["C:/Projects/".into(), "D:/Code/".into()],
exclude_paths: vec!["C:/Projects/temp/".into()],
file_types: vec!["rs".into(), "ts".into()],
search_by: SearchBy::Name,
};
let sql = query_sql("main", 10, 25, &config);
assert_eq!(sql, "SELECT TOP 35 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%main%') AND (SCOPE = 'file:C:/Projects/' OR SCOPE = 'file:D:/Code/') AND ((NOT SCOPE = 'file:C:/Projects/temp/')) AND (System.FileExtension = '.rs' OR System.FileExtension = '.ts')");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_with_special_characters() {
let config = FileSearchConfig {
search_paths: vec!["C:/Users/John Doe/".into()],
exclude_paths: Vec::new(),
file_types: vec!["c++".into()],
search_by: SearchBy::Name,
};
let sql = query_sql("hello-world", 0, 10, &config);
assert_eq!(sql, "SELECT TOP 10 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%hello-world%') AND (SCOPE = 'file:C:/Users/John Doe/') AND (System.FileExtension = '.c++')");
ensure_it_is_valid_sql(&sql);
}
#[test]
fn test_query_sql_edge_case_large_offset() {
let config = FileSearchConfig {
search_paths: Vec::new(),
exclude_paths: Vec::new(),
file_types: Vec::new(),
search_by: SearchBy::Name,
};
let sql = query_sql("test", 100, 50, &config);
assert_eq!(
sql,
"SELECT TOP 150 System.ItemUrl, System.Search.Rank FROM SystemIndex WHERE (System.FileName LIKE '%test%')"
);
ensure_it_is_valid_sql(&sql);
}
}

View File

@@ -0,0 +1,90 @@
pub(crate) mod config;
pub(crate) mod implementation;
use super::super::LOCAL_QUERY_SOURCE_TYPE;
use crate::common::{
error::SearchError,
search::{QueryResponse, QuerySource, SearchQuery},
traits::SearchSource,
};
use async_trait::async_trait;
use config::FileSearchConfig;
use hostname;
pub(crate) const EXTENSION_ID: &str = "File Search";
/// JSON file for this extension.
pub(crate) const PLUGIN_JSON_FILE: &str = r#"
{
"id": "File Search",
"name": "File Search",
"platforms": ["macos", "windows"],
"description": "Search files on your system",
"icon": "font_Filesearch",
"type": "extension"
}
"#;
pub struct FileSearchExtensionSearchSource;
#[async_trait]
impl SearchSource for FileSearchExtensionSearchSource {
fn get_type(&self) -> QuerySource {
QuerySource {
r#type: LOCAL_QUERY_SOURCE_TYPE.into(),
name: hostname::get()
.unwrap_or(EXTENSION_ID.into())
.to_string_lossy()
.into(),
id: EXTENSION_ID.into(),
}
}
async fn search(&self, query: SearchQuery) -> Result<QueryResponse, SearchError> {
let Some(query_string) = query.query_strings.get("query") else {
return Ok(QueryResponse {
source: self.get_type(),
hits: Vec::new(),
total_hits: 0,
});
};
let from = usize::try_from(query.from).expect("from too big");
let size = usize::try_from(query.size).expect("size too big");
let query_string = query_string.trim();
if query_string.is_empty() {
return Ok(QueryResponse {
source: self.get_type(),
hits: Vec::new(),
total_hits: 0,
});
}
// Get configuration from tauri store
let config = FileSearchConfig::get();
// If search paths are empty, then the hit should be empty.
//
// Without this, empty search paths will result in a mdfind that has no `-onlyin`
// option, which will in turn query the whole disk volume.
if config.search_paths.is_empty() {
return Ok(QueryResponse {
source: self.get_type(),
hits: Vec::new(),
total_hits: 0,
});
}
// Execute search in a blocking task
let query_source = self.get_type();
let hits = implementation::hits(&query_string, from, size, &config).await.map_err(SearchError::InternalError)?;
let total_hits = hits.len();
Ok(QueryResponse {
source: query_source,
hits,
total_hits,
})
}
}

View File

@@ -3,7 +3,7 @@
pub mod ai_overview;
pub mod application;
pub mod calculator;
#[cfg(target_os = "macos")]
#[cfg(any(target_os = "macos", target_os = "windows"))]
pub mod file_search;
pub mod pizza_engine_runtime;
pub mod quick_ai_access;
@@ -175,7 +175,7 @@ pub(crate) async fn list_built_in_extensions() -> Result<Vec<Extension>, String>
);
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
if #[cfg(any(target_os = "macos", target_os = "windows"))] {
built_in_extensions.push(
load_built_in_extension(
dir,
@@ -214,9 +214,9 @@ pub(super) async fn init_built_in_extension<R: Runtime>(
}
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
if #[cfg(any(target_os = "macos", target_os = "windows"))] {
if extension.id == file_search::EXTENSION_ID {
let file_system_search = file_search::FileSearchExtensionSearchSource::new(1f64);
let file_system_search = file_search::FileSearchExtensionSearchSource;
search_source_registry
.register_source(file_system_search)
.await;
@@ -303,18 +303,18 @@ pub(crate) async fn enable_built_in_extension(
}
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
if #[cfg(any(target_os = "macos", target_os = "windows"))] {
if bundle_id.extension_id == file_search::EXTENSION_ID {
let file_system_search = file_search::FileSearchExtensionSearchSource::new(1f64);
search_source_registry_tauri_state
.register_source(file_system_search)
.await;
alter_extension_json_file(
&BUILT_IN_EXTENSION_DIRECTORY.as_path(),
bundle_id,
update_extension,
)?;
return Ok(());
let file_system_search = file_search::FileSearchExtensionSearchSource;
search_source_registry_tauri_state
.register_source(file_system_search)
.await;
alter_extension_json_file(
&BUILT_IN_EXTENSION_DIRECTORY.as_path(),
bundle_id,
update_extension,
)?;
return Ok(());
}
}
}
@@ -393,7 +393,7 @@ pub(crate) async fn disable_built_in_extension(
}
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
if #[cfg(any(target_os = "macos", target_os = "windows"))] {
if bundle_id.extension_id == file_search::EXTENSION_ID {
search_source_registry_tauri_state
.remove_source(bundle_id.extension_id)
@@ -538,7 +538,7 @@ pub(crate) async fn is_built_in_extension_enabled(
}
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
if #[cfg(any(target_os = "macos", target_os = "windows"))] {
if bundle_id.extension_id == file_search::EXTENSION_ID
&& bundle_id.sub_extension_id.is_none()
{

View File

@@ -165,10 +165,10 @@ pub fn run() {
settings::get_allow_self_signature,
assistant::ask_ai,
crate::common::document::open,
#[cfg(target_os = "macos")]
extension::built_in::file_search::get_file_system_config,
#[cfg(target_os = "macos")]
extension::built_in::file_search::set_file_system_config,
#[cfg(any(target_os = "macos", target_os = "windows"))]
extension::built_in::file_search::config::get_file_system_config,
#[cfg(any(target_os = "macos", target_os = "windows"))]
extension::built_in::file_search::config::set_file_system_config,
server::synthesize::synthesize,
util::file::get_file_icon,
])

View File

@@ -28,6 +28,7 @@ pub(crate) enum FileType {
Sql,
Csv,
Javascript,
Lnk,
Typescript,
Python,
Java,
@@ -110,6 +111,7 @@ async fn get_file_type(path: &str) -> FileType {
"indd" => FileType::AdobeId,
"svg" => FileType::Svg,
"epub" => FileType::Epub,
"lnk" => FileType::Lnk,
_ => FileType::Unknown,
}
}
@@ -144,6 +146,7 @@ fn type_to_icon(ty: FileType) -> &'static str {
FileType::Sql => "font_file_sql",
FileType::Csv => "font_file_csv",
FileType::Javascript => "font_file_javascript",
FileType::Lnk => "font_file_lnk",
FileType::Typescript => "font_file_typescript",
FileType::Python => "font_file_python",
FileType::Java => "font_file_java",