use lazy_static::lazy_static; use regex::Regex; use serde_yaml::Value; use sha2::{Digest, Sha256}; use std::collections::HashMap; use std::path::Path; lazy_static! { static ref MATRIX_PATTERN: Regex = Regex::new(r"\$\{\{\s*matrix\.([a-zA-Z0-9_]+)\s*\}\}").unwrap(); static ref HASH_FILES_PATTERN: Regex = Regex::new(r"\$\{\{\s*hashFiles\(([^)]+)\)\s*\}\}").unwrap(); } /// Preprocesses a command string to replace GitHub-style matrix variable references /// with their values from the environment pub fn preprocess_command(command: &str, matrix_values: &HashMap) -> String { // Replace matrix references like ${{ matrix.os }} with their values let result = MATRIX_PATTERN.replace_all(command, |caps: ®ex::Captures| { let var_name = &caps[1]; // Get the value from matrix context if let Some(value) = matrix_values.get(var_name) { // Convert value to string match value { Value::String(s) => s.clone(), Value::Number(n) => n.to_string(), Value::Bool(b) => b.to_string(), _ => format!("\\${{{{ matrix.{} }}}}", var_name), // Escape $ for shell } } else { // Keep original if not found but escape $ to prevent shell errors format!("\\${{{{ matrix.{} }}}}", var_name) } }); result.into_owned() } /// Apply variable substitution to step run commands pub fn process_step_run(run: &str, matrix_combination: &Option>) -> String { if let Some(matrix) = matrix_combination { preprocess_command(run, matrix) } else { // Escape $ in GitHub expression syntax to prevent shell interpretation MATRIX_PATTERN .replace_all(run, |caps: ®ex::Captures| { let var_name = &caps[1]; format!("\\${{{{ matrix.{} }}}}", var_name) }) .to_string() } } /// Replace `${{ hashFiles(...) }}` expressions with the SHA-256 hash of matched files. /// /// Accepts one or more comma-separated, quoted glob patterns. Files are matched /// relative to `workspace`, sorted lexicographically, and hashed in order to /// produce a deterministic digest — matching GitHub Actions behavior. /// /// Returns `Err` if any matched file cannot be read. pub fn preprocess_hash_files(text: &str, workspace: &Path) -> Result { let mut error: Option = None; let result = HASH_FILES_PATTERN .replace_all(text, |caps: ®ex::Captures| { if error.is_some() { return String::new(); } let args_raw = &caps[1]; match compute_hash_files(args_raw, workspace) { Ok(hash) => hash, Err(e) => { error = Some(e); String::new() } } }) .into_owned(); match error { Some(e) => Err(e), None => Ok(result), } } /// Compute a SHA-256 hash of the contents of all files matching the given glob patterns. /// /// `args_raw` is the raw argument string inside `hashFiles(...)`, e.g. /// `'**/package-lock.json', '**/yarn.lock'`. /// /// Returns `Ok(hash)` on success or `Err(message)` if any matched file cannot be read. fn compute_hash_files(args_raw: &str, workspace: &Path) -> Result { // Parse comma-separated, quoted patterns let patterns: Vec<&str> = args_raw .split(',') .map(|s| s.trim().trim_matches('\'').trim_matches('"')) .filter(|s| !s.is_empty()) .collect(); if patterns.is_empty() { return Ok(String::new()); } // Reject patterns containing path traversal components for pattern in &patterns { if pattern.split('/').any(|seg| seg == "..") { return Err(format!( "hashFiles: pattern '{}' contains '..' path traversal", pattern )); } } // Collect all matching files let mut matched_files = Vec::new(); for pattern in &patterns { let full_pattern = workspace.join(pattern).to_string_lossy().to_string(); if let Ok(entries) = glob::glob(&full_pattern) { for entry in entries.flatten() { if entry.is_file() { matched_files.push(entry); } } } } if matched_files.is_empty() { // GHA returns the SHA-256 of empty input when no files match return Ok(format!("{:x}", Sha256::new().finalize())); } // Sort for deterministic output (GHA sorts lexicographically) matched_files.sort(); matched_files.dedup(); // Hash all file contents (stream to avoid loading large files into memory) let mut hasher = Sha256::new(); for path in &matched_files { let mut file = std::fs::File::open(path) .map_err(|e| format!("hashFiles: could not read '{}': {}", path.display(), e))?; std::io::copy(&mut file, &mut hasher) .map_err(|e| format!("hashFiles: could not read '{}': {}", path.display(), e))?; } Ok(format!("{:x}", hasher.finalize())) } /// Apply all expression substitutions: hashFiles, matrix variables. /// /// Returns `Err` if a `hashFiles()` expression fails (e.g. unreadable file). pub fn preprocess_expressions( text: &str, workspace: &Path, matrix_combination: &Option>, ) -> Result { // First resolve hashFiles let result = preprocess_hash_files(text, workspace)?; // Then resolve matrix variables Ok(if let Some(matrix) = matrix_combination { preprocess_command(&result, matrix) } else { MATRIX_PATTERN .replace_all(&result, |caps: ®ex::Captures| { let var_name = &caps[1]; format!("\\${{{{ matrix.{} }}}}", var_name) }) .to_string() }) } #[cfg(test)] mod tests { use super::*; use std::fs; use tempfile::tempdir; #[test] fn test_preprocess_simple_matrix_vars() { let mut matrix = HashMap::new(); matrix.insert("os".to_string(), Value::String("ubuntu-latest".to_string())); matrix.insert( "node".to_string(), Value::Number(serde_yaml::Number::from(14)), ); let cmd = "echo \"Running on ${{ matrix.os }} with Node ${{ matrix.node }}\""; let processed = preprocess_command(cmd, &matrix); assert_eq!(processed, "echo \"Running on ubuntu-latest with Node 14\""); } #[test] fn test_preprocess_with_missing_vars() { let mut matrix = HashMap::new(); matrix.insert("os".to_string(), Value::String("ubuntu-latest".to_string())); let cmd = "echo \"Running on ${{ matrix.os }} with Node ${{ matrix.node }}\""; let processed = preprocess_command(cmd, &matrix); // Missing vars should be escaped assert_eq!( processed, "echo \"Running on ubuntu-latest with Node \\${{ matrix.node }}\"" ); } #[test] fn test_preprocess_preserves_other_text() { let mut matrix = HashMap::new(); matrix.insert("os".to_string(), Value::String("ubuntu-latest".to_string())); let cmd = "echo \"Starting job\" && echo \"OS: ${{ matrix.os }}\" && echo \"Done!\""; let processed = preprocess_command(cmd, &matrix); assert_eq!( processed, "echo \"Starting job\" && echo \"OS: ubuntu-latest\" && echo \"Done!\"" ); } #[test] fn test_process_without_matrix() { let cmd = "echo \"Value: ${{ matrix.value }}\""; let processed = process_step_run(cmd, &None); assert_eq!(processed, "echo \"Value: \\${{ matrix.value }}\""); } #[test] fn hash_files_single_pattern() { let dir = tempdir().unwrap(); fs::write(dir.path().join("package-lock.json"), "lock-content").unwrap(); fs::write(dir.path().join("other.txt"), "other").unwrap(); let text = "${{ hashFiles('package-lock.json') }}"; let result = preprocess_hash_files(text, dir.path()).unwrap(); assert!(!result.is_empty()); assert!(!result.contains("hashFiles")); // Hash should be 64 hex chars (SHA-256) assert_eq!(result.len(), 64); } #[test] fn hash_files_multiple_patterns() { let dir = tempdir().unwrap(); fs::write(dir.path().join("a.lock"), "aaa").unwrap(); fs::write(dir.path().join("b.json"), "bbb").unwrap(); let text = "${{ hashFiles('*.lock', '*.json') }}"; let result = preprocess_hash_files(text, dir.path()).unwrap(); assert_eq!(result.len(), 64); } #[test] fn hash_files_no_matches_returns_hash_of_empty() { let dir = tempdir().unwrap(); let text = "${{ hashFiles('nonexistent-*.xyz') }}"; let result = preprocess_hash_files(text, dir.path()).unwrap(); // GHA returns SHA-256 of empty input when no files match let expected = format!("{:x}", Sha256::new().finalize()); assert_eq!(result, expected); assert_eq!(result.len(), 64); } #[test] fn hash_files_deterministic() { let dir = tempdir().unwrap(); fs::write(dir.path().join("a.txt"), "hello").unwrap(); fs::write(dir.path().join("b.txt"), "world").unwrap(); let text = "${{ hashFiles('*.txt') }}"; let r1 = preprocess_hash_files(text, dir.path()).unwrap(); let r2 = preprocess_hash_files(text, dir.path()).unwrap(); assert_eq!(r1, r2); } #[test] fn hash_files_glob_recursive() { let dir = tempdir().unwrap(); let sub = dir.path().join("sub"); fs::create_dir(&sub).unwrap(); fs::write(sub.join("deep.lock"), "deep-content").unwrap(); let text = "${{ hashFiles('**/deep.lock') }}"; let result = preprocess_hash_files(text, dir.path()).unwrap(); assert_eq!(result.len(), 64); } #[test] fn hash_files_inline_with_other_text() { let dir = tempdir().unwrap(); fs::write(dir.path().join("Cargo.lock"), "lockfile").unwrap(); let text = "cache-key-${{ hashFiles('Cargo.lock') }}-suffix"; let result = preprocess_hash_files(text, dir.path()).unwrap(); assert!(result.starts_with("cache-key-")); assert!(result.ends_with("-suffix")); assert!(!result.contains("hashFiles")); } #[test] fn preprocess_expressions_combines_hash_and_matrix() { let dir = tempdir().unwrap(); fs::write(dir.path().join("Cargo.lock"), "lockfile").unwrap(); let mut matrix = HashMap::new(); matrix.insert("os".to_string(), Value::String("ubuntu".to_string())); let text = "${{ matrix.os }}-${{ hashFiles('Cargo.lock') }}"; let result = preprocess_expressions(text, dir.path(), &Some(matrix)).unwrap(); assert!(result.starts_with("ubuntu-")); assert!(!result.contains("hashFiles")); assert!(!result.contains("matrix")); } #[test] fn hash_files_rejects_path_traversal() { let dir = tempdir().unwrap(); fs::write(dir.path().join("legit.txt"), "content").unwrap(); let text = "${{ hashFiles('../../etc/passwd') }}"; let result = preprocess_hash_files(text, dir.path()); assert!(result.is_err()); assert!(result.unwrap_err().contains("path traversal")); } #[test] fn hash_files_rejects_mid_path_traversal() { let dir = tempdir().unwrap(); let result = compute_hash_files("'subdir/../../etc/passwd'", dir.path()); assert!(result.is_err()); assert!(result.unwrap_err().contains("path traversal")); } }