/// Search module - core search functionality
use anyhow::{Result, Context};
use std::path::{Path, PathBuf};
use std::collections::HashMap;
use std::sync::Arc;
use rayon::prelude::*;
use log::{info, warn, debug};

use crate::patterns::Pattern;
use crate::filters::MetadataFilter;

/// Search results structure
#[derive(Debug, Clone)]
pub struct SearchMatch {
    pub file_id: u32,
    pub file_path: PathBuf,
    pub offset: Option<u64>,
    pub metadata: HashMap<String, String>,
    pub verified: bool,
    pub match_quality: f64,
}

/// Search statistics
#[derive(Debug, Clone, Default)]
pub struct SearchStats {
    pub total_candidates: usize,
    pub verified_matches: usize,
    pub filtered_out: usize,
    pub search_time_ms: u64,
    pub verification_time_ms: u64,
}

/// Main search engine
pub struct SearchEngine {
    config: SearchEngineConfig,
}

#[derive(Debug, Clone)]
pub struct SearchEngineConfig {
    pub candidate_limit: usize,
    pub verify_matches: bool,
    pub use_yara: bool,
    pub yara_rules: Option<PathBuf>,
    pub parallel_search: bool,
    pub num_threads: usize,
}

impl Default for SearchEngineConfig {
    fn default() -> Self {
        Self {
            candidate_limit: 15000,
            verify_matches: false,
            use_yara: false,
            yara_rules: None,
            parallel_search: true,
            num_threads: 12,
        }
    }
}

impl SearchEngine {
    /// Create new search engine
    pub fn new(config: SearchEngineConfig) -> Self {
        Self { config }
    }

    /// Search across multiple indexes
    pub fn search_indexes(
        &self,
        index_files: &[PathBuf],
        patterns: &[Pattern],
        filters: &[MetadataFilter],
    ) -> Result<Vec<SearchMatch>> {
        info!("Starting search across {} index file(s)", index_files.len());
        
        let start_time = std::time::Instant::now();

        // Search in parallel if enabled
        let results = if self.config.parallel_search {
            let pool = rayon::ThreadPoolBuilder::new()
                .num_threads(self.config.num_threads)
                .build()
                .context("Failed to create thread pool")?;

            pool.install(|| {
                index_files
                    .par_iter()
                    .flat_map(|index_file| {
                        self.search_single_index(index_file, patterns, filters)
                            .unwrap_or_else(|e| {
                                warn!("Failed to search index {:?}: {:?}", index_file, e);
                                Vec::new()
                            })
                    })
                    .collect::<Vec<_>>()
            })
        } else {
            let mut all_results = Vec::new();
            for index_file in index_files {
                let index_results = self.search_single_index(index_file, patterns, filters)
                    .unwrap_or_else(|e| {
                        warn!("Failed to search index {:?}: {:?}", index_file, e);
                        Vec::new()
                    });
                all_results.extend(index_results);
            }
            all_results
        };

        let search_time = start_time.elapsed().as_millis() as u64;
        debug!("Search completed in {}ms, found {} candidate(s)", search_time, results.len());

        Ok(results)
    }

    /// Search a single index file
    fn search_single_index(
        &self,
        index_file: &Path,
        patterns: &[Pattern],
        filters: &[MetadataFilter],
    ) -> Result<Vec<SearchMatch>> {
        debug!("Searching index: {:?}", index_file);

        // Open index file
        let file = std::fs::File::open(index_file)
            .with_context(|| format!("Failed to open index file: {:?}", index_file))?;

        // Memory-map the file for efficient access
        let mmap = unsafe { memmap2::MmapOptions::new().map(&file) }
            .with_context(|| format!("Failed to memory-map index file: {:?}", index_file))?;

        // Parse index header
        let header = parse_biggrep_header(&mmap)?;
        debug!("Index header: {:?}", header);

        // Generate search terms (N-grams) from patterns
        let search_terms = self.extract_search_terms(patterns)?;
        debug!("Generated {} search term(s)", search_terms.len());

        // Search using index structure
        let candidates = self.search_by_terms(&mmap, &header, &search_terms)?;

        // Load file metadata
        let file_metadata = self.load_file_metadata(&mmap, &header)?;

        // Convert candidates to matches with metadata
        let mut matches = self.candidates_to_matches(candidates, &file_metadata)?;

        // Apply filters
        if !filters.is_empty() {
            matches = self.apply_metadata_filters(matches, filters)?;
        }

        // Apply candidate limit
        if matches.len() > self.config.candidate_limit && self.config.candidate_limit > 0 {
            warn!("Too many matches ({}), limiting to {} candidates", 
                  matches.len(), self.config.candidate_limit);
            matches.truncate(self.config.candidate_limit);
        }

        debug!("Found {} match(es) in index {:?}", matches.len(), index_file);
        Ok(matches)
    }

    /// Extract search terms (N-grams) from patterns
    fn extract_search_terms(&self, patterns: &[Pattern]) -> Result<Vec<Vec<u8>>> {
        let mut terms = Vec::new();

        for pattern in patterns {
            let ngrams = pattern.get_ngrams();
            for ngram in ngrams {
                if !terms.contains(ngram) {
                    terms.push(ngram.clone());
                }
            }
        }

        Ok(terms)
    }

    /// Search index using N-gram terms
    fn search_by_terms(
        &self,
        mmap: &[u8],
        header: &BiggrepHeader,
        terms: &[Vec<u8>],
    ) -> Result<Vec<u32>> {
        // This is a simplified implementation
        // In the real implementation, this would:
        // 1. Use hints to locate relevant N-gram entries
        // 2. Decode compressed posting lists
        // 3. Intersect file IDs across multiple N-grams
        // 4. Return candidate file IDs

        // For now, return empty result
        // TODO: Implement actual BigGrep search algorithm
        warn!("Index search not yet implemented - returning empty results");
        Ok(Vec::new())
    }

    /// Load file metadata from index
    fn load_file_metadata(
        &self,
        mmap: &[u8],
        header: &BiggrepHeader,
    ) -> Result<HashMap<u32, FileMetadata>> {
        // Simplified implementation
        // In real implementation, would parse the fileid_map section
        
        let mut metadata_map = HashMap::new();
        
        // TODO: Implement actual fileid_map parsing
        // This would involve:
        // 1. Locating the fileid_map section using header.fileid_map_offset
        // 2. Decompressing if needed (zlib compression support)
        // 3. Parsing file ID to path mappings
        // 4. Extracting metadata fields
        
        warn!("File metadata loading not yet implemented");
        Ok(metadata_map)
    }

    /// Convert candidate file IDs to search matches
    fn candidates_to_matches(
        &self,
        candidates: Vec<u32>,
        metadata_map: &HashMap<u32, FileMetadata>,
    ) -> Result<Vec<SearchMatch>> {
        let mut matches = Vec::new();

        for file_id in candidates {
            if let Some(file_metadata) = metadata_map.get(&file_id) {
                let search_match = SearchMatch {
                    file_id,
                    file_path: file_metadata.path.clone(),
                    offset: None, // Would be populated by verification
                    metadata: file_metadata.metadata.clone(),
                    verified: false,
                    match_quality: 1.0,
                };
                matches.push(search_match);
            }
        }

        Ok(matches)
    }

    /// Apply metadata filters to search matches
    fn apply_metadata_filters(
        &self,
        matches: Vec<SearchMatch>,
        filters: &[MetadataFilter],
    ) -> Result<Vec<SearchMatch>> {
        if filters.is_empty() {
            return Ok(matches);
        }

        debug!("Applying {} filter(s) to {} match(es)", filters.len(), matches.len());

        let filtered_matches: Vec<SearchMatch> = matches
            .into_iter()
            .filter(|search_match| {
                filters.iter().all(|filter| {
                    match filter.evaluate(&search_match.metadata) {
                        Ok(passes) => passes,
                        Err(_) => false, // Filter fails on missing metadata
                    }
                })
            })
            .collect();

        debug!("Filtered {} match(es) down to {} result(s)", 
               matches.len() - filtered_matches.len(), filtered_matches.len());

        Ok(filtered_matches)
    }

    /// Verify matches using bgverify or YARA
    pub fn verify_matches(
        &self,
        matches: Vec<SearchMatch>,
        patterns: &[Pattern],
    ) -> Result<Vec<SearchMatch>> {
        if !self.config.verify_matches {
            return Ok(matches);
        }

        info!("Verifying {} match(es)", matches.len());

        if self.config.use_yara {
            self.verify_with_yara(matches, patterns)
        } else {
            self.verify_with_bgverify(matches, patterns)
        }
    }

    /// Verify matches using YARA
    fn verify_with_yara(
        &self,
        matches: Vec<SearchMatch>,
        patterns: &[Pattern],
    ) -> Result<Vec<SearchMatch>> {
        // TODO: Implement YARA verification
        // This would use the yara-x crate to scan files
        
        warn!("YARA verification not yet implemented");
        Ok(matches)
    }

    /// Verify matches using bgverify
    fn verify_with_bgverify(
        &self,
        matches: Vec<SearchMatch>,
        patterns: &[Pattern],
    ) -> Result<Vec<SearchMatch>> {
        // TODO: Implement bgverify integration
        // This would invoke the bgverify binary
        
        warn!("bgverify integration not yet implemented");
        Ok(matches)
    }

    /// Get search statistics
    pub fn get_stats(&self) -> &SearchStats {
        // TODO: Implement search statistics tracking
        &SearchStats::default()
    }
}

/// BigGrep index header structure
#[derive(Debug, Clone)]
pub struct BiggrepHeader {
    pub version: u32,
    pub ngram_size: usize,
    pub total_ngrams: u64,
    pub fileid_map_offset: u64,
    pub compressed: bool,
}

/// File metadata structure
#[derive(Debug, Clone)]
pub struct FileMetadata {
    pub path: PathBuf,
    pub metadata: HashMap<String, String>,
}

/// Parse BigGrep index header
fn parse_biggrep_header(mmap: &[u8]) -> Result<BiggrepHeader> {
    if mmap.len() < 32 {
        anyhow::bail!("Index file too small ({} bytes)", mmap.len());
    }

    // Check magic number (simplified)
    let magic = &mmap[0..4];
    if magic != b"BGI1" {
        anyhow::bail!("Invalid BigGrep index magic number");
    }

    let version = u32::from_le_bytes(mmap[4..8].try_into()?);
    let ngram_size = mmap[8] as usize;
    let total_ngrams = u64::from_le_bytes(mmap[12..20].try_into()?);
    let fileid_map_offset = u64::from_le_bytes(mmap[20..28].try_into()?);
    let compressed = mmap[30] == 1;

    Ok(BiggrepHeader {
        version,
        ngram_size,
        total_ngrams,
        fileid_map_offset,
        compressed,
    })
}

/// Utility functions for search operations
pub struct SearchUtils;

impl SearchUtils {
    /// Estimate memory usage for search
    pub fn estimate_memory_usage(
        num_indexes: usize,
        avg_index_size: u64,
        candidate_buffer_size: usize,
    ) -> u64 {
        let index_memory = num_indexes as u64 * avg_index_size;
        let buffer_memory = candidate_buffer_size as u64 * 64; // Rough estimate per candidate
        index_memory + buffer_memory
    }

    /// Calculate search priority based on pattern characteristics
    pub fn calculate_search_priority(pattern: &Pattern) -> f64 {
        // Longer patterns are generally more specific
        let length_factor = (pattern.bytes.len() as f64 / 1024.0).min(1.0);
        
        // ASCII patterns might be more common
        let type_factor = match pattern.pattern_type {
            crate::patterns::PatternType::Ascii => 0.8,
            crate::patterns::PatternType::Binary => 1.0,
            crate::patterns::PatternType::Unicode => 0.9,
        };

        length_factor * type_factor
    }

    /// Check if search should be throttled
    pub fn should_throttle(candidate_count: usize, throttle_limit: usize) -> bool {
        candidate_count > throttle_limit
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_search_engine_creation() {
        let config = SearchEngineConfig::default();
        let engine = SearchEngine::new(config);
        assert_eq!(engine.config.candidate_limit, 15000);
    }

    #[test]
    fn test_pattern_priority_calculation() {
        use crate::patterns::Pattern;
        
        let ascii_pattern = Pattern::from_ascii("hello").unwrap();
        let priority = SearchUtils::calculate_search_priority(&ascii_pattern);
        assert!(priority > 0.0);
    }

    #[test]
    fn test_throttle_decision() {
        assert!(!SearchUtils::should_throttle(1000, 10000));
        assert!(SearchUtils::should_throttle(15000, 10000));
    }

    #[test]
    fn test_header_parsing() {
        // Create a minimal header
        let mut header_bytes = vec![0u8; 32];
        header_bytes[0..4].copy_from_slice(b"BGI1");
        header_bytes[4..8].copy_from_slice(&3u32.to_le_bytes()); // version
        header_bytes[8] = 3; // ngram_size
        header_bytes[12..20].copy_from_slice(&1000u64.to_le_bytes()); // total_ngrams
        header_bytes[20..28].copy_from_slice(&512u64.to_le_bytes()); // fileid_map_offset
        header_bytes[30] = 0; // not compressed

        let header = parse_biggrep_header(&header_bytes).unwrap();
        assert_eq!(header.version, 3);
        assert_eq!(header.ngram_size, 3);
        assert_eq!(header.total_ngrams, 1000);
    }
}
