//! Search module for querying N-gram indexes
//! 
//! Provides search functionality including prefix queries, range enumeration,
//! and regex post-filtering over candidate matches.

use crate::error::{BigGrepError, BigGrepResult};
use crate::index::{IndexReader, EFTrie, IndexEntry};
use crate::ngram::{Token, NgramCount};
use regex::Regex;
use std::collections::{HashSet, BTreeSet};
use std::sync::Arc;

/// Search result containing matched N-grams and metadata
#[derive(Debug, Clone)]
pub struct SearchResult {
    pub ngram: Vec<String>,
    pub count: u64,
    pub score: f64,
    pub matches: Vec<SearchMatch>,
}

impl SearchResult {
    pub fn new(ngram: Vec<String>, count: u64) -> Self {
        Self {
            ngram,
            count,
            score: count as f64,
            matches: Vec::new(),
        }
    }
    
    pub fn with_matches(mut self, matches: Vec<SearchMatch>) -> Self {
        self.matches = matches;
        self
    }
}

/// Individual match occurrence
#[derive(Debug, Clone)]
pub struct SearchMatch {
    pub file_path: String,
    pub byte_offset: u64,
    pub line_number: Option<u64>,
    pub column_number: Option<u64>,
    pub context: Option<String>,
}

impl SearchMatch {
    pub fn new(file_path: String, byte_offset: u64) -> Self {
        Self {
            file_path,
            byte_offset,
            line_number: None,
            column_number: None,
            context: None,
        }
    }
}

/// Search options for controlling search behavior
#[derive(Debug, Clone)]
pub struct SearchOptions {
    pub case_sensitive: bool,
    pub whole_word: bool,
    pub regex_filter: Option<Regex>,
    pub file_scope: Option<HashSet<String>>,
    pub min_count: u64,
    pub max_results: usize,
    pub parallel_search: bool,
}

impl Default for SearchOptions {
    fn default() -> Self {
        Self {
            case_sensitive: false,
            whole_word: false,
            regex_filter: None,
            file_scope: None,
            min_count: 1,
            max_results: 1000,
            parallel_search: true,
        }
    }
}

/// Candidate N-gram for search
#[derive(Debug, Clone)]
pub struct Candidate {
    pub ngram: Vec<String>,
    pub count: u64,
}

impl Candidate {
    pub fn new(ngram: Vec<String>, count: u64) -> Self {
        Self { ngram, count }
    }
}

/// Search engine for querying N-gram indexes
pub struct SearchEngine {
    index: Arc<EFTrie>,
    options: SearchOptions,
}

impl SearchEngine {
    pub fn new(index: Arc<EFTrie>, options: SearchOptions) -> Self {
        Self { index, options }
    }
    
    /// Search for N-grams with given prefix
    pub fn search_prefix(&self, prefix: &[String]) -> BigGrepResult<Vec<SearchResult>> {
        let candidates = self.index.search_prefix(prefix);
        let mut results = Vec::new();
        
        for ngram_count in candidates {
            if ngram_count.count >= self.options.min_count {
                let mut ngram_strs = Vec::new();
                for token in &ngram_count.tokens {
                    ngram_strs.push(token.text.clone());
                }
                
                // Apply filters
                if !self.passes_filters(&ngram_strs) {
                    continue;
                }
                
                let result = SearchResult::new(ngram_strs, ngram_count.count);
                results.push(result);
                
                if results.len() >= self.options.max_results {
                    break;
                }
            }
        }
        
        Ok(results)
    }
    
    /// Search for N-grams matching regex pattern
    pub fn search_regex(&self, pattern: &str) -> BigGrepResult<Vec<SearchResult>> {
        let regex = Regex::new(pattern)
            .map_err(|e| BigGrepError::Search(e.to_string()))?;
        
        let mut results = Vec::new();
        
        // Collect all N-grams from the index
        let all_ngrams = self.collect_all_ngrams();
        
        for ngram_count in all_ngrams {
            if ngram_count.count >= self.options.min_count {
                let ngram_strs: Vec<String> = ngram_count.tokens.iter()
                    .map(|t| t.text.clone())
                    .collect();
                
                let ngram_str = ngram_strs.join(" ");
                
                if regex.is_match(&ngram_str) {
                    if self.passes_filters(&ngram_strs) {
                        results.push(SearchResult::new(ngram_strs, ngram_count.count));
                    }
                }
                
                if results.len() >= self.options.max_results {
                    break;
                }
            }
        }
        
        Ok(results)
    }
    
    /// Perform range search between two N-gram prefixes
    pub fn search_range(&self, start_prefix: &[String], end_prefix: &[String]) -> BigGrepResult<Vec<SearchResult>> {
        let start = self.prefix_to_key(start_prefix);
        let end = self.prefix_to_key(end_prefix);
        
        if start >= end {
            return Err(BigGrepError::Search("Invalid range: start >= end".to_string()));
        }
        
        let mut results = Vec::new();
        let all_ngrams = self.collect_all_ngrams();
        
        for ngram_count in all_ngrams {
            let ngram_strs: Vec<String> = ngram_count.tokens.iter()
                .map(|t| t.text.clone())
                .collect();
            let key = self.prefix_to_key(&ngram_strs);
            
            if key >= start && key <= end && ngram_count.count >= self.options.min_count {
                if self.passes_filters(&ngram_strs) {
                    results.push(SearchResult::new(ngram_strs, ngram_count.count));
                }
            }
            
            if results.len() >= self.options.max_results {
                break;
            }
        }
        
        Ok(results)
    }
    
    /// Search with complex filter chain
    pub fn search_with_filters(&self, filters: &[SearchFilter]) -> BigGrepResult<Vec<SearchResult>> {
        let mut results = Vec::new();
        let all_ngrams = self.collect_all_ngrams();
        
        for ngram_count in all_ngrams {
            if ngram_count.count >= self.options.min_count {
                let ngram_strs: Vec<String> = ngram_count.tokens.iter()
                    .map(|t| t.text.clone())
                    .collect();
                
                // Apply all filters
                let mut passes = true;
                for filter in filters {
                    if !filter.matches(&ngram_strs) {
                        passes = false;
                        break;
                    }
                }
                
                if passes {
                    results.push(SearchResult::new(ngram_strs, ngram_count.count));
                }
                
                if results.len() >= self.options.max_results {
                    break;
                }
            }
        }
        
        Ok(results)
    }
    
    /// Get top N results by count
    pub fn top_n(&self, n: usize) -> BigGrepResult<Vec<SearchResult>> {
        let mut results = Vec::new();
        let all_ngrams = self.collect_all_ngrams();
        
        // Convert to results and sort by count
        for ngram_count in all_ngrams {
            if ngram_count.count >= self.options.min_count {
                let ngram_strs: Vec<String> = ngram_count.tokens.iter()
                    .map(|t| t.text.clone())
                    .collect();
                
                if self.passes_filters(&ngram_strs) {
                    let mut result = SearchResult::new(ngram_strs, ngram_count.count);
                    result.score = ngram_count.count as f64;
                    results.push(result);
                }
            }
        }
        
        // Sort by count (descending)
        results.sort_by(|a, b| b.count.cmp(&a.count));
        
        Ok(results.into_iter().take(n).collect())
    }
    
    /// Check if N-gram passes current filters
    fn passes_filters(&self, ngram: &[String]) -> bool {
        // Check regex filter
        if let Some(regex) = &self.options.regex_filter {
            let ngram_str = ngram.join(" ");
            if !regex.is_match(&ngram_str) {
                return false;
            }
        }
        
        // Additional filters can be added here
        
        true
    }
    
    /// Collect all N-grams from the index
    fn collect_all_ngrams(&self) -> Vec<NgramCount> {
        // This is a simplified version - in practice, you'd want to traverse the trie
        // For now, we'll return an empty vector as a placeholder
        Vec::new()
    }
    
    /// Convert prefix to search key for range queries
    fn prefix_to_key(&self, prefix: &[String]) -> String {
        prefix.join(" ")
    }
}

/// Search filter trait for custom filtering logic
pub trait SearchFilter {
    fn matches(&self, ngram: &[String]) -> bool;
}

/// Regex-based search filter
#[derive(Debug)]
pub struct RegexFilter {
    regex: Regex,
}

impl RegexFilter {
    pub fn new(pattern: &str) -> BigGrepResult<Self> {
        let regex = Regex::new(pattern)
            .map_err(|e| BigGrepError::Search(e.to_string()))?;
        Ok(Self { regex })
    }
}

impl SearchFilter for RegexFilter {
    fn matches(&self, ngram: &[String]) -> bool {
        let ngram_str = ngram.join(" ");
        self.regex.is_match(&ngram_str)
    }
}

/// Token count filter
#[derive(Debug)]
pub struct CountFilter {
    min_count: u64,
    max_count: Option<u64>,
}

impl CountFilter {
    pub fn new(min_count: u64) -> Self {
        Self {
            min_count,
            max_count: None,
        }
    }
    
    pub fn with_max_count(mut self, max_count: u64) -> Self {
        self.max_count = Some(max_count);
        self
    }
}

impl SearchFilter for CountFilter {
    fn matches(&self, ngram: &[String]) -> bool {
        // This is a simplified check - in practice you'd look up actual counts
        // For now, we assume all N-grams pass the count filter
        true
    }
}

/// Token length filter
#[derive(Debug)]
pub struct LengthFilter {
    min_length: usize,
    max_length: Option<usize>,
}

impl LengthFilter {
    pub fn new(min_length: usize) -> Self {
        Self {
            min_length,
            max_length: None,
        }
    }
    
    pub fn with_max_length(mut self, max_length: usize) -> Self {
        self.max_length = Some(max_length);
        self
    }
}

impl SearchFilter for LengthFilter {
    fn matches(&self, ngram: &[String]) -> bool {
        let total_length: usize = ngram.iter().map(|s| s.len()).sum();
        
        if total_length < self.min_length {
            return false;
        }
        
        if let Some(max_len) = self.max_length {
            if total_length > max_len {
                return false;
            }
        }
        
        true
    }
}

/// Filter chain for combining multiple filters
#[derive(Debug)]
pub struct FilterChain {
    filters: Vec<Box<dyn SearchFilter>>,
}

impl FilterChain {
    pub fn new() -> Self {
        Self {
            filters: Vec::new(),
        }
    }
    
    pub fn add_filter<F: SearchFilter + 'static>(mut self, filter: F) -> Self {
        self.filters.push(Box::new(filter));
        self
    }
    
    pub fn matches(&self, ngram: &[String]) -> bool {
        self.filters.iter().all(|f| f.matches(ngram))
    }
}

impl Default for FilterChain {
    fn default() -> Self {
        Self::new()
    }
}

/// Range query builder for complex search patterns
#[derive(Debug)]
pub struct RangeQuery {
    start_prefix: Vec<String>,
    end_prefix: Vec<String>,
    filters: FilterChain,
}

impl RangeQuery {
    pub fn new(start_prefix: Vec<String>, end_prefix: Vec<String>) -> Self {
        Self {
            start_prefix,
            end_prefix,
            filters: FilterChain::new(),
        }
    }
    
    pub fn with_filter<F: SearchFilter + 'static>(mut self, filter: F) -> Self {
        self.filters = self.filters.add_filter(filter);
        self
    }
    
    pub fn execute(&self, engine: &SearchEngine) -> BigGrepResult<Vec<SearchResult>> {
        engine.search_with_filters(&[self.filters])
    }
}

/// Prefix query builder
#[derive(Debug)]
pub struct PrefixQuery {
    prefix: Vec<String>,
    filters: FilterChain,
}

impl PrefixQuery {
    pub fn new(prefix: Vec<String>) -> Self {
        Self {
            prefix,
            filters: FilterChain::new(),
        }
    }
    
    pub fn with_filter<F: SearchFilter + 'static>(mut self, filter: F) -> Self {
        self.filters = self.filters.add_filter(filter);
        self
    }
    
    pub fn execute(&self, engine: &SearchEngine) -> BigGrepResult<Vec<SearchResult>> {
        let results = engine.search_prefix(&self.prefix)?;
        // Apply filters
        let filtered_results: Vec<SearchResult> = results
            .into_iter()
            .filter(|r| self.filters.matches(&r.ngram))
            .collect();
        Ok(filtered_results)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::index::{IndexBuilder, EFTrie};
    use crate::ngram::{VocabularyMapper, Token};
    
    #[test]
    fn test_search_prefix() {
        let mut vocab = VocabularyMapper::new();
        vocab.add_tokens(&["hello".to_string(), "world".to_string()]);
        
        let mut trie = EFTrie::new(2, vocab);
        trie.insert(&[
            Token::new("hello".to_string(), 0, 5),
            Token::new("world".to_string(), 0, 5),
        ], 1).unwrap();
        
        let index = Arc::new(trie);
        let engine = SearchEngine::new(index, SearchOptions::default());
        
        let results = engine.search_prefix(&["hello".to_string()]).unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].ngram, vec!["hello", "world"]);
    }
    
    #[test]
    fn test_regex_filter() {
        let filter = RegexFilter::new("^hello.*$").unwrap();
        let ngram = vec!["hello".to_string(), "world".to_string()];
        assert!(filter.matches(&ngram));
        
        let ngram2 = vec!["goodbye".to_string(), "world".to_string()];
        assert!(!filter.matches(&ngram2));
    }
    
    #[test]
    fn test_count_filter() {
        let filter = CountFilter::new(5).with_max_count(10);
        assert!(filter.matches(&vec!["any".to_string(), "ngram".to_string()])); // Simplified test
    }
    
    #[test]
    fn test_length_filter() {
        let filter = LengthFilter::new(5).with_max_length(15);
        assert!(filter.matches(&vec!["hi".to_string(), "world".to_string()])); // 7 chars
        assert!(!filter.matches(&vec!["very".to_string(), "long".to_string(), "phrase".to_string()])); // 16 chars
    }
}
