/// BigGrep Search Orchestrator
/// 
/// This tool orchestrates searches across BigGrep indexes, converting terms,
/// discovering indexes, coordinating parallel search execution, filtering results,
/// and invoking verification when requested.

use anyhow::{Result, Context};
use clap::{Parser, Subcommand};
use log::{info, warn, error, debug};
use std::path::{Path, PathBuf};
use std::fs;
use std::io::{self, Write};
use std::sync::Arc;
use std::collections::HashMap;
use rayon::prelude::*;
use regex::Regex;
use glob::glob;
use serde::{Deserialize, Serialize};

mod search;
mod config;
mod filters;
mod patterns;

use config::Config;
use filters::{MetadataFilter, FilterError};
use patterns::{Pattern, PatternType};

/// Command-line arguments for bgsearch
#[derive(Parser, Debug)]
#[command(name = "rs-bgsearch")]
#[command(about = "BigGrep search orchestrator - orchestrates searches and invokes verification")]
#[command(version = "0.1.0")]
struct Args {
    /// Search for ASCII string term
    #[arg(short = 'a', long = "ascii")]
    ascii_terms: Vec<String>,

    /// Search for binary hexadecimal string term
    #[arg(short = 'b', long = "binary")]
    binary_terms: Vec<String>,

    /// Search for Unicode string term
    #[arg(short = 'u', long = "unicode")]
    unicode_terms: Vec<String>,

    /// Direct pattern specification (alternative to -a/-b/-u)
    #[arg(short = 'p', long = "patterns")]
    patterns: Vec<String>,

    /// Directory to search for .bgi index files
    #[arg(short = 'd', long = "directory")]
    directories: Vec<PathBuf>,

    /// Recurse into subdirectories
    #[arg(short = 'r', long = "recursive")]
    recursive: bool,

    /// Do not show metadata in results
    #[arg(short = 'M', long = "no-metadata")]
    no_metadata: bool,

    /// Invoke verification on candidates (use bgverify)
    #[arg(short = 'v', long = "verify")]
    verify: bool,

    /// Use YARA rules file for verification
    #[arg(short = 'y', long = "yara")]
    yara_rules: Option<PathBuf>,

    /// Halt verification if candidates exceed NUM (0 disables, default 15000)
    #[arg(short = 'l', long = "limit")]
    candidate_limit: Option<usize>,

    /// Metadata filter criteria (format: "field operator value")
    #[arg(short = 'f', long = "filter")]
    filters: Vec<String>,

    /// Number of simultaneous .bgi files to search (default 12)
    #[arg(short = 'n', long = "numprocs")]
    numprocs: Option<usize>,

    /// Display text file as MOTD
    #[arg(long = "banner")]
    banner: Option<PathBuf>,

    /// Set order of index searches: "alpha" or "shuffle" (default alpha)
    #[arg(long = "index-order", value_enum)]
    index_order: Option<IndexOrder>,

    /// Buffer threshold for throttling (default 10000)
    #[arg(short = 't', long = "throttle")]
    throttle: Option<usize>,

    /// Verbose output (INFO level logging)
    #[arg(short = 'V', long = "verbose")]
    verbose: bool,

    /// Diagnostic output (DEBUG level logging)
    #[arg(short = 'D', long = "debug")]
    debug: bool,

    /// Log to syslog
    #[arg(long = "syslog")]
    syslog: bool,

    /// Display per-directory timing metrics
    #[arg(long = "metrics")]
    metrics: bool,

    /// Configuration file path
    #[arg(long = "config")]
    config_file: Option<PathBuf>,

    /// Output format (json, csv, text)
    #[arg(short = 'o', long = "output-format")]
    output_format: Option<OutputFormat>,

    /// Show help message
    #[arg(short = 'h', long = "help")]
    help: bool,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum IndexOrder {
    Alpha,
    Shuffle,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum OutputFormat {
    Text,
    Json,
    Csv,
}

impl std::str::FromStr for IndexOrder {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_lowercase().as_str() {
            "alpha" => Ok(IndexOrder::Alpha),
            "shuffle" => Ok(IndexOrder::Shuffle),
            _ => Err(format!("Invalid index order: {}", s)),
        }
    }
}

impl std::str::FromStr for OutputFormat {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_lowercase().as_str() {
            "text" => Ok(OutputFormat::Text),
            "json" => Ok(OutputFormat::Json),
            "csv" => Ok(OutputFormat::Csv),
            _ => Err(format!("Invalid output format: {}", s)),
        }
    }
}

#[derive(Debug, Clone)]
struct SearchTerm {
    pattern: Pattern,
    raw_string: String,
}

#[derive(Debug, Clone)]
struct SearchResult {
    file_path: PathBuf,
    offset: Option<u64>,
    metadata: HashMap<String, String>,
    match_data: Option<String>,
    verified: bool,
    yara_matches: Option<Vec<String>>,
}

/// Main entry point
fn main() -> Result<()> {
    let args = Args::parse();

    // Initialize logging
    let log_level = if args.debug {
        log::Level::Debug
    } else if args.verbose {
        log::Level::Info
    } else {
        log::Level::Warn
    };
    
    env_logger::Builder::from_env(
        env_logger::Env::default().default_filter_or(log::Level::to_string(&log_level))
    ).init();

    // Display banner if specified
    if let Some(banner_path) = &args.banner {
        if let Ok(content) = fs::read_to_string(banner_path) {
            eprintln!("{}", content);
        }
    }

    // Load configuration
    let config = load_config(&args)?;

    // Collect search terms
    let terms = collect_search_terms(&args)?;
    if terms.is_empty() {
        error!("No search terms provided");
        eprintln!("Error: No search terms provided. Use -a, -b, -u, or -p flags.");
        std::process::exit(1);
    }

    info!("Starting search with {} term(s)", terms.len());

    // Discover index files
    let index_files = discover_indexes(&args.directories, args.recursive, args.index_order)?;
    if index_files.is_empty() {
        error!("No index files (.bgi) found");
        eprintln!("Error: No index files (.bgi) found in specified directories");
        std::process::exit(1);
    }

    info!("Discovered {} index file(s)", index_files.len());

    // Parse metadata filters
    let metadata_filters = parse_metadata_filters(&args.filters)?;

    // Set up search parameters
    let numprocs = args.numprocs.unwrap_or(12);
    let throttle_threshold = args.throttle.unwrap_or(10000);
    let candidate_limit = args.candidate_limit.unwrap_or(15000);
    let show_metadata = !args.no_metadata;

    // Execute search across indexes
    let results = search_indexes(
        &index_files,
        &terms,
        &metadata_filters,
        numprocs,
        throttle_threshold,
        show_metadata,
    )?;

    // Apply verification if requested
    let final_results = if args.verify || args.yara_rules.is_some() {
        verify_results(results, &terms, &args.yara_rules, candidate_limit)?
    } else {
        results
    };

    // Output results
    output_results(&final_results, args.output_format)?;

    if args.metrics {
        print_metrics(&index_files);
    }

    info!("Search completed. Found {} result(s)", final_results.len());
    Ok(())
}

/// Load configuration from file and merge with command-line arguments
fn load_config(args: &Args) -> Result<Config> {
    let mut config = Config::default();

    // Load from configuration file if specified
    if let Some(config_path) = &args.config_file {
        if config_path.exists() {
            info!("Loading configuration from {:?}", config_path);
            let config_text = fs::read_to_string(config_path)
                .with_context(|| format!("Failed to read config file: {:?}", config_path))?;
            let file_config: Config = toml::from_str(&config_text)
                .with_context(|| format!("Failed to parse config file: {:?}", config_path))?;
            config.merge(file_config);
        }
    }

    // Apply command-line overrides
    config.apply_args(args);

    Ok(config)
}

/// Collect and convert search terms from command-line arguments
fn collect_search_terms(args: &Args) -> Result<Vec<SearchTerm>> {
    let mut terms = Vec::new();

    // Process ASCII terms
    for term in &args.ascii_terms {
        let pattern = Pattern::from_ascii(term)
            .with_context(|| format!("Invalid ASCII pattern: {}", term))?;
        terms.push(SearchTerm {
            pattern,
            raw_string: term.clone(),
        });
    }

    // Process binary/hex terms
    for term in &args.binary_terms {
        let pattern = Pattern::from_hex(term)
            .with_context(|| format!("Invalid binary/hex pattern: {}", term))?;
        terms.push(SearchTerm {
            pattern,
            raw_string: term.clone(),
        });
    }

    // Process Unicode terms
    for term in &args.unicode_terms {
        let pattern = Pattern::from_unicode(term)
            .with_context(|| format!("Invalid Unicode pattern: {}", term))?;
        terms.push(SearchTerm {
            pattern,
            raw_string: term.clone(),
        });
    }

    // Process direct patterns (auto-detect type)
    for pattern_str in &args.patterns {
        let pattern = Pattern::auto_detect(pattern_str)
            .with_context(|| format!("Invalid pattern: {}", pattern_str))?;
        terms.push(SearchTerm {
            pattern,
            raw_string: pattern_str.clone(),
        });
    }

    // Convert terms to N-grams for searching
    for term in &mut terms {
        term.pattern.convert_to_ngrams(3)?; // Default to 3-grams
    }

    Ok(terms)
}

/// Discover BigGrep index files (.bgi) in specified directories
fn discover_indexes(
    directories: &[PathBuf],
    recursive: bool,
    order: Option<IndexOrder>,
) -> Result<Vec<PathBuf>> {
    let mut index_files = Vec::new();

    for dir in directories {
        if !dir.exists() {
            warn!("Directory does not exist: {:?}", dir);
            continue;
        }

        if !dir.is_dir() {
            warn!("Path is not a directory: {:?}", dir);
            continue;
        }

        let pattern = if recursive {
            format!("{}/**/*.bgi", dir.display())
        } else {
            format!("{}/*.bgi", dir.display())
        };

        for entry in glob(&pattern)
            .with_context(|| format!("Failed to search for index files in: {}", dir.display()))? {
            match entry {
                Ok(path) => index_files.push(path),
                Err(e) => warn!("Error accessing index file: {:?}", e),
            }
        }
    }

    // Apply ordering
    if let Some(IndexOrder::Shuffle) = order {
        // Shuffle with fixed seed for reproducibility
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};
        
        let mut hasher = DefaultHasher::new();
        format!("{:?}", std::time::Instant::now()).hash(&mut hasher);
        let seed = hasher.finish();
        
        // Simple Fisher-Yates shuffle with fixed seed
        let mut rng = SimpleRng::new(seed);
        for i in (1..index_files.len()).rev() {
            let j = rng.gen_range(0..=i);
            index_files.swap(i, j);
        }
    } else {
        index_files.sort();
    }

    Ok(index_files)
}

/// Parse metadata filter expressions
fn parse_metadata_filters(filters: &[String]) -> Result<Vec<MetadataFilter>> {
    let mut parsed_filters = Vec::new();

    for filter_str in filters {
        let filter = MetadataFilter::parse(filter_str)
            .with_context(|| format!("Invalid filter expression: {}", filter_str))?;
        parsed_filters.push(filter);
    }

    Ok(parsed_filters)
}

/// Execute search across multiple index files in parallel
fn search_indexes(
    index_files: &[PathBuf],
    terms: &[SearchTerm],
    filters: &[MetadataFilter],
    numprocs: usize,
    throttle_threshold: usize,
    show_metadata: bool,
) -> Result<Vec<SearchResult>> {
    info!("Searching across {} index file(s) with {} process(es)", index_files.len(), numprocs);

    // Set up thread pool
    let pool = rayon::ThreadPoolBuilder::new()
        .num_threads(numprocs)
        .build()
        .context("Failed to create thread pool")?;

    // Search across indexes
    let all_results = pool.install(|| {
        index_files
            .par_iter()
            .flat_map(|index_file| {
                search_single_index(index_file, terms, filters, show_metadata)
                    .unwrap_or_else(|e| {
                        warn!("Failed to search index {:?}: {:?}", index_file, e);
                        Vec::new()
                    })
            })
            .collect::<Vec<_>>()
    })?;

    // Apply metadata filters
    let filtered_results = apply_filters(all_results, filters)?;

    Ok(filtered_results)
}

/// Search a single index file
fn search_single_index(
    index_file: &Path,
    terms: &[SearchTerm],
    filters: &[MetadataFilter],
    show_metadata: bool,
) -> Result<Vec<SearchResult>> {
    debug!("Searching index: {:?}", index_file);

    // Open and memory-map the index file
    let file = fs::File::open(index_file)
        .with_context(|| format!("Failed to open index file: {:?}", index_file))?;
    
    let mmap = unsafe { memmap2::MmapOptions::new().map(&file) }
        .with_context(|| format!("Failed to memory-map index file: {:?}", index_file))?;

    // Read index header and parse structure
    let index_header = parse_index_header(&mmap)?;
    
    // Convert search terms to N-grams
    let ngrams = terms.iter()
        .flat_map(|term| term.pattern.get_ngrams())
        .collect::<Vec<_>>();

    // Search using the index
    let candidates = search_index_by_ngrams(&mmap, &index_header, &ngrams)?;

    // Get file metadata
    let file_metadata = load_file_metadata(index_file, &mmap, &index_header)?;

    // Generate results
    let mut results = Vec::new();
    for file_id in candidates {
        if let Some(metadata) = file_metadata.get(&file_id) {
            let result = SearchResult {
                file_path: metadata.path.clone(),
                offset: None, // Would be populated by verification
                metadata: if show_metadata { metadata.metadata.clone() } else { HashMap::new() },
                match_data: None,
                verified: false,
                yara_matches: None,
            };
            results.push(result);
        }
    }

    Ok(results)
}

/// Parse BigGrep index file header
fn parse_index_header(mmap: &[u8]) -> Result<IndexHeader> {
    // Simplified header parsing - in real implementation would parse actual BGI format
    if mmap.len() < 32 {
        anyhow::bail!("Index file too small");
    }

    // Magic number check (simplified)
    let magic = &mmap[0..4];
    if magic != b"BGI1" {
        anyhow::bail!("Invalid index file magic number");
    }

    let version = u32::from_le_bytes(mmap[4..8].try_into()?);
    let ngram_size = mmap[8] as usize;
    let num_ngrams = u64::from_le_bytes(mmap[12..20].try_into()?);
    let fileid_map_offset = u64::from_le_bytes(mmap[20..28].try_into()?);

    Ok(IndexHeader {
        version,
        ngram_size,
        num_ngrams,
        fileid_map_offset,
    })
}

/// Search index using N-grams
fn search_index_by_ngrams(
    mmap: &[u8],
    header: &IndexHeader,
    ngrams: &[Vec<u8>],
) -> Result<Vec<u32>> {
    // Simplified search implementation
    // In real implementation would use actual BigGrep search algorithms
    
    // For now, return empty result
    // TODO: Implement actual N-gram search with posting list intersection
    Ok(Vec::new())
}

/// Load file metadata from index
fn load_file_metadata(
    index_file: &Path,
    mmap: &[u8],
    header: &IndexHeader,
) -> Result<HashMap<u32, FileMetadata>> {
    // Simplified metadata loading
    // In real implementation would parse actual fileid_map section
    
    let mut metadata_map = HashMap::new();
    
    // TODO: Implement actual fileid_map parsing
    // This would involve reading the compressed metadata section
    
    Ok(metadata_map)
}

/// Apply metadata filters to search results
fn apply_filters(
    results: Vec<SearchResult>,
    filters: &[MetadataFilter],
) -> Result<Vec<SearchResult>> {
    if filters.is_empty() {
        return Ok(results);
    }

    let filtered_results = results
        .into_iter()
        .filter(|result| {
            filters.iter().all(|filter| {
                match filter.evaluate(&result.metadata) {
                    Ok(true) => true,
                    Ok(false) => false,
                    Err(FilterError::MissingKey(_)) => {
                        // Missing metadata is handled gracefully
                        false
                    }
                    Err(e) => {
                        warn!("Filter error: {:?}", e);
                        false
                    }
                }
            })
        })
        .collect();

    Ok(filtered_results)
}

/// Verify search results using bgverify or YARA
fn verify_results(
    results: Vec<SearchResult>,
    terms: &[SearchTerm],
    yara_rules: &Option<PathBuf>,
    candidate_limit: usize,
) -> Result<Vec<SearchResult>> {
    if results.len() > candidate_limit && candidate_limit > 0 {
        warn!("Too many candidates ({}), skipping verification (limit: {})", 
              results.len(), candidate_limit);
        return Ok(results);
    }

    info!("Verifying {} result(s)", results.len());

    let mut verified_results = Vec::new();

    if let Some(yara_path) = yara_rules {
        // Use YARA for verification
        verified_results = verify_with_yara(results, yara_path)?;
    } else {
        // Use bgverify for verification
        verified_results = verify_with_bgverify(results, terms)?;
    }

    Ok(verified_results)
}

/// Verify results using YARA rules
fn verify_with_yara(
    results: Vec<SearchResult>,
    yara_rules: &PathBuf,
) -> Result<Vec<SearchResult>> {
    info!("Using YARA rules from: {:?}", yara_rules);

    // TODO: Implement YARA verification
    // This would use yara-x crate to scan files
    
    Ok(results)
}

/// Verify results using bgverify
fn verify_with_bgverify(
    results: Vec<SearchResult>,
    terms: &[SearchTerm],
) -> Result<Vec<SearchResult>> {
    info!("Using bgverify for verification");

    // TODO: Implement bgverify integration
    // This would invoke the bgverify binary with candidate files
    
    Ok(results)
}

/// Output search results in specified format
fn output_results(
    results: &[SearchResult],
    output_format: Option<OutputFormat>,
) -> Result<()> {
    let format = output_format.unwrap_or(OutputFormat::Text);

    match format {
        OutputFormat::Json => output_json(results),
        OutputFormat::Csv => output_csv(results),
        OutputFormat::Text => output_text(results),
    }
}

/// Output results in JSON format
fn output_json(results: &[SearchResult]) -> Result<()> {
    let json_output = serde_json::to_string_pretty(results)
        .context("Failed to serialize results to JSON")?;
    
    println!("{}", json_output);
    Ok(())
}

/// Output results in CSV format
fn output_csv(results: &[SearchResult]) -> Result<()> {
    let mut wtr = csv::Writer::from_writer(io::stdout());

    // Write header
    wtr.write_record(&["file_path", "offset", "verified", "yara_matches"])?;

    for result in results {
        let yara_matches = result.yara_matches
            .as_ref()
            .map(|m| m.join(";"))
            .unwrap_or_default();

        wtr.write_record(&[
            result.file_path.to_string_lossy(),
            result.offset.map(|o| o.to_string()).unwrap_or_default(),
            result.verified.to_string(),
            yara_matches,
        ])?;
    }

    wtr.flush()?;
    Ok(())
}

/// Output results in text format
fn output_text(results: &[SearchResult]) -> Result<()> {
    for result in results {
        print!("{}", result.file_path.display());
        
        if let Some(offset) = result.offset {
            print!(":{}", offset);
        }
        
        if result.verified {
            print!(" [verified]");
        }
        
        if let Some(ref yara_matches) = result.yara_matches {
            print!(" [YARA: {}]", yara_matches.join(", "));
        }
        
        println!();
    }

    Ok(())
}

/// Print per-directory timing metrics
fn print_metrics(index_files: &[PathBuf]) {
    eprintln!("\n=== Search Metrics ===");
    
    let mut dir_metrics: HashMap<PathBuf, usize> = HashMap::new();
    
    for index_file in index_files {
        if let Some(parent) = index_file.parent() {
            *dir_metrics.entry(parent.to_path_buf()).or_insert(0) += 1;
        }
    }
    
    for (dir, count) in dir_metrics {
        eprintln!("Directory: {} - {} index file(s)", dir.display(), count);
    }
    
    eprintln!("======================\n");
}

// Helper structs and types

#[derive(Debug, Clone)]
struct IndexHeader {
    version: u32,
    ngram_size: usize,
    num_ngrams: u64,
    fileid_map_offset: u64,
}

#[derive(Debug, Clone)]
struct FileMetadata {
    path: PathBuf,
    metadata: HashMap<String, String>,
}

/// Simple RNG for reproducible shuffling
struct SimpleRng {
    state: u64,
}

impl SimpleRng {
    fn new(seed: u64) -> Self {
        Self { state: seed }
    }

    fn gen_range(&mut self, range: std::ops::Range<usize>) -> usize {
        self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
        let r = (self.state >> 32) as u32;
        range.start + (r as usize) % (range.end - range.start)
    }
}
