//! BigGrep Core Library
//! 
//! Core shared functionality for BigGrep including N-gram processing,
//! Elias-Fano compressed indexes, verification algorithms, and parallel processing.
//! 
//! This library implements the core capabilities:
//! - N-gram tokenization and counting (3-gram and 4-gram support)
//! - Elias-Fano trie index construction and search
//! - Boyer-Moore-Horspool pattern verification
//! - File processing with memory-mapped I/O
//! - Parallel search and processing utilities

pub mod ngram;
pub mod index;
pub mod search;
pub mod verify;
pub mod metadata;
pub mod parallel;
pub mod io;
pub mod error;

// Re-export commonly used types
pub use ngram::{NgramProcessor, NgramCount, Token};
pub use index::{EFTrie, IndexBuilder, IndexReader, IndexEntry};
pub use search::{SearchEngine, SearchResult, SearchOptions, Candidate, boyer_moore::{BoyerMooreHorspool, SearchResult as BMHSearchResult, VerificationResult as BMHVerificationResult, utils}};
pub use verify::{VerificationEngine, VerificationResult};
pub use metadata::{FileMetadata, MetadataStore};
pub use parallel::{ThreadPool, ParallelProcessor};
pub use io::{FileProcessor, MemoryMappedFile};
pub use error::{BigGrepError, BigGrepResult};

// Re-export dependency types
pub use rayon;
pub use memmap2;
pub use regex;

/// Library version
pub const VERSION: &str = env!("CARGO_PKG_VERSION");

/// Default configuration for BigGrep operations
#[derive(Debug, Clone)]
pub struct BigGrepConfig {
    pub ngram_order: usize,
    pub use_memory_mapping: bool,
    pub parallel_search: bool,
    pub verification_checks: bool,
    pub num_threads: usize,
    pub chunk_size: usize,
}

impl Default for BigGrepConfig {
    fn default() -> Self {
        Self {
            ngram_order: 3, // Default to 3-grams
            use_memory_mapping: true,
            parallel_search: true,
            verification_checks: true,
            num_threads: rayon::current_num_threads(),
            chunk_size: 1024 * 1024, // 1MB chunks
        }
    }
}

/// Utility function to detect binary files
pub fn is_binary_file(path: &std::path::Path) -> bool {
    if let Ok(metadata) = std::fs::metadata(path) {
        if metadata.len() == 0 {
            return false;
        }
        
        // Check first few bytes for binary content
        if let Ok(mut file) = std::fs::File::open(path) {
            let mut buffer = [0u8; 1024];
            if let Ok(bytes_read) = std::io::Read::read(&mut file, &mut buffer) {
                let null_count = buffer[..bytes_read].iter().filter(|&&b| b == 0).count();
                let high_bit_count = buffer[..bytes_read].iter().filter(|&&b| b >= 0x80).count();
                
                // Heuristic: if more than 30% of bytes are null or high-bit set
                let threshold = bytes_read as f64 * 0.3;
                return null_count as f64 > threshold || high_bit_count as f64 > threshold;
            }
        }
    }
    false
}
