//! Boyer-Moore-Horspool fast string search implementation
//! 
//! Implements the Boyer-Moore-Horspool algorithm with:
//! - 256-character skip table optimization
//! - Multi-pattern support
//! - Memory-mapped file access
//! - Case-sensitive and case-insensitive search modes

use std::cmp::min;
use memmap2::Mmap;
use anyhow::{Result, Context};

/// Boyer-Moore-Horspool search engine for multiple patterns
pub struct BoyerMooreHorspool {
    patterns: Vec<Vec<u8>>,
    skip_tables: Vec<[usize; 256]>,
    case_sensitive: bool,
}

impl BoyerMooreHorspool {
    /// Create a new Boyer-Moore-Horspool search engine
    pub fn new(patterns: Vec<Vec<u8>>, case_sensitive: bool) -> Self {
        let skip_tables = patterns.iter()
            .map(|pattern| build_skip_table(pattern, case_sensitive))
            .collect();
        
        BoyerMooreHorspool {
            patterns,
            skip_tables,
            case_sensitive,
        }
    }
    
    /// Search for all patterns in memory-mapped file (AND logic - all patterns must be found)
    pub fn search_memory_mapped(&self, mmap: &Mmap) -> Result<SearchResult> {
        let data = unsafe { std::slice::from_raw_parts(mmap.as_ptr(), mmap.len()) };
        self.search_memory(data)
    }
    
    /// Search for all patterns in memory (AND logic - all patterns must be found)
    pub fn search_memory(&self, data: &[u8]) -> Result<SearchResult> {
        let total_size = data.len();
        let mut matches_found = true;
        let mut first_match_positions = Vec::new();
        
        // Search for each pattern - all must be found (AND logic)
        for (pattern_idx, pattern) in self.patterns.iter().enumerate() {
            match self.find_first_occurrence(data, pattern) {
                Some(pos) => {
                    first_match_positions.push((pattern_idx, pos));
                    debug!("Pattern {} found at position {}", pattern_idx, pos);
                }
                None => {
                    matches_found = false;
                    debug!("Pattern {} not found", pattern_idx);
                    break;
                }
            }
        }
        
        Ok(SearchResult {
            all_patterns_found: matches_found,
            pattern_positions: first_match_positions,
            total_size,
        })
    }
    
    /// Search for all patterns in file (AND logic - all patterns must be found)
    pub fn search_file(&self, file_path: &std::path::Path) -> Result<SearchResult> {
        let file = std::fs::File::open(file_path)
            .with_context(|| format!("Failed to open file: {:?}", file_path))?;
        
        let mmap = unsafe { Mmap::map(&file) }
            .with_context(|| format!("Failed to memory-map file: {:?}", file_path))?;
        
        self.search_memory_mapped(&mmap)
    }
    
    /// Find first occurrence of pattern in data using Boyer-Moore-Horspool
    fn find_first_occurrence(&self, data: &[u8], pattern: &[u8]) -> Option<usize> {
        if pattern.is_empty() {
            return Some(0);
        }
        
        if data.len() < pattern.len() {
            return None;
        }
        
        let skip_table = if self.case_sensitive {
            &self.skip_tables[0] // Use first skip table if case sensitive
        } else {
            // For case-insensitive, we need to build a new skip table
            // For simplicity, we'll use the first table and do case-insensitive comparison
            &self.skip_tables[0]
        };
        
        let pattern_len = pattern.len();
        let data_len = data.len();
        let mut i = 0;
        
        while i <= data_len - pattern_len {
            let mut j = pattern_len;
            
            // Compare from end of pattern to beginning
            while j > 0 && self.compare_bytes(data[i + j - 1], pattern[j - 1]) {
                j -= 1;
            }
            
            if j == 0 {
                // Pattern found
                return Some(i);
            }
            
            // Calculate shift using skip table
            let last_char = if self.case_sensitive {
                data[i + pattern_len - 1]
            } else {
                data[i + pattern_len - 1].to_ascii_lowercase()
            };
            
            let shift = skip_table[last_char as usize];
            if shift == 0 {
                // This can happen if the character is not in the skip table
                // In that case, skip the entire pattern length
                i += pattern_len;
            } else {
                i += shift;
            }
        }
        
        None
    }
    
    /// Compare two bytes (case-insensitive if needed)
    fn compare_bytes(&self, a: u8, b: u8) -> bool {
        if self.case_sensitive {
            a == b
        } else {
            a.to_ascii_lowercase() == b.to_ascii_lowercase()
        }
    }
}

/// Build skip table for Boyer-Moore-Horspool algorithm
/// 
/// The skip table contains for each character the distance to skip
/// when it doesn't match. Default is pattern length for characters
/// not in the pattern.
fn build_skip_table(pattern: &[u8], case_sensitive: bool) -> [usize; 256] {
    let mut skip_table = [usize::MAX; 256];
    let pattern_len = pattern.len();
    
    // Initialize skip table
    for entry in &mut skip_table {
        *entry = pattern_len;
    }
    
    // Fill in skip distances for characters in pattern
    for i in 0..pattern_len {
        let byte = if case_sensitive {
            pattern[i]
        } else {
            pattern[i].to_ascii_lowercase()
        };
        skip_table[byte as usize] = pattern_len - i;
    }
    
    skip_table
}

/// Result of pattern search
#[derive(Debug, Clone)]
pub struct SearchResult {
    /// Whether all patterns were found (AND logic)
    pub all_patterns_found: bool,
    /// Vector of (pattern_index, position) for first occurrence of each pattern
    pub pattern_positions: Vec<(usize, usize)>,
    /// Total size of data searched
    pub total_size: usize,
}

impl SearchResult {
    /// Check if verification was successful
    pub fn is_successful(&self) -> bool {
        self.all_patterns_found && !self.pattern_positions.is_empty()
    }
    
    /// Get number of patterns found
    pub fn patterns_found(&self) -> usize {
        self.pattern_positions.len()
    }
}

/// Utility functions for Boyer-Moore-Horspool search
pub mod utils {
    use super::*;
    use memmap2::MmapOptions;
    use std::fs::File;
    use std::path::Path;
    
    /// Verify patterns exist in file using Boyer-Moore-Horspool
    pub fn verify_patterns_in_file(
        file_path: &Path,
        patterns: &[Vec<u8>],
        case_sensitive: bool,
        binary_mode: bool,
    ) -> Result<VerificationResult> {
        let file = File::open(file_path)
            .with_context(|| format!("Failed to open file: {:?}", file_path))?;
        
        let metadata = file.metadata()
            .with_context(|| format!("Failed to get file metadata: {:?}", file_path))?;
        
        if binary_mode {
            // Use memory-mapped file for binary search
            let mmap = unsafe {
                MmapOptions::new()
                    .map(&file)
                    .with_context(|| format!("Failed to memory-map file: {:?}", file_path))?
            };
            
            verify_patterns_memory_mapped(&mmap, patterns, case_sensitive, file_path)
        } else {
            // For text mode, read file content
            let content = std::fs::read(file_path)
                .with_context(|| format!("Failed to read file: {:?}", file_path))?;
            
            verify_patterns_memory(&content, patterns, case_sensitive, file_path)
        }
    }
    
    /// Verify patterns exist in memory-mapped file
    pub fn verify_patterns_memory_mapped(
        mmap: &Mmap,
        patterns: &[Vec<u8>],
        case_sensitive: bool,
        file_path: &Path,
    ) -> Result<VerificationResult> {
        let searcher = BoyerMooreHorspool::new(patterns.to_vec(), case_sensitive);
        let result = searcher.search_memory_mapped(mmap)?;
        
        Ok(VerificationResult {
            file_path: file_path.to_path_buf(),
            all_patterns_found: result.all_patterns_found,
            pattern_positions: result.pattern_positions,
            file_size: result.total_size as u64,
            verification_successful: result.is_successful(),
        })
    }
    
    /// Verify patterns exist in memory
    pub fn verify_patterns_memory(
        data: &[u8],
        patterns: &[Vec<u8>],
        case_sensitive: bool,
        file_path: &Path,
    ) -> Result<VerificationResult> {
        let searcher = BoyerMooreHorspool::new(patterns.to_vec(), case_sensitive);
        let result = searcher.search_memory(data)?;
        
        Ok(VerificationResult {
            file_path: file_path.to_path_buf(),
            all_patterns_found: result.all_patterns_found,
            pattern_positions: result.pattern_positions,
            file_size: result.total_size as u64,
            verification_successful: result.is_successful(),
        })
    }
}

/// Result of pattern verification
#[derive(Debug, Clone)]
pub struct VerificationResult {
    pub file_path: std::path::PathBuf,
    pub all_patterns_found: bool,
    pub pattern_positions: Vec<(usize, usize)>,
    pub file_size: u64,
    pub verification_successful: bool,
}

impl VerificationResult {
    /// Create successful verification result
    pub fn success(file_path: std::path::PathBuf, file_size: u64) -> Self {
        Self {
            file_path,
            all_patterns_found: true,
            pattern_positions: Vec::new(),
            file_size,
            verification_successful: true,
        }
    }
    
    /// Create failed verification result
    pub fn failure(file_path: std::path::PathBuf, file_size: u64) -> Self {
        Self {
            file_path,
            all_patterns_found: false,
            pattern_positions: Vec::new(),
            file_size,
            verification_successful: false,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    
    #[test]
    fn test_boyer_moore_horspool_basic() {
        let data = b"Hello World, this is a test string";
        let patterns = vec![b"World".to_vec(), b"test".to_vec()];
        let searcher = BoyerMooreHorspool::new(patterns, true);
        
        let result = searcher.search_memory(data).unwrap();
        assert!(result.all_patterns_found);
        assert_eq!(result.pattern_positions.len(), 2);
    }
    
    #[test]
    fn test_boyer_moore_horspool_not_all_found() {
        let data = b"Hello World, this is a test string";
        let patterns = vec![b"World".to_vec(), b"nonexistent".to_vec()];
        let searcher = BoyerMooreHorspool::new(patterns, true);
        
        let result = searcher.search_memory(data).unwrap();
        assert!(!result.all_patterns_found);
    }
    
    #[test]
    fn test_case_insensitive() {
        let data = b"Hello World, this is a test string";
        let patterns = vec![b"world".to_vec()];
        let searcher = BoyerMooreHorspool::new(patterns, false);
        
        let result = searcher.search_memory(data).unwrap();
        assert!(result.all_patterns_found);
    }
}
