//! BigGrep Index File Management (rs-bgextractfile)
//!
//! Manages the fileid_map section of BigGrep indexes, enabling removal,
//! replacement, and addition of file entries without full re-indexing.
//! Handles compressed index files and provides atomic index updates.

use clap::{Parser, Subcommand};
use std::path::{Path, PathBuf};
use anyhow::{Result, Context};
use log::{info, warn, error, debug};
use biggrep_core::*;
use std::fs;
use std::io::{self, Read, Write, Seek, SeekFrom, BufReader, BufRead};
use byteorder::{ReadBytesExt, WriteBytesExt, BigEndian};
use std::collections::HashMap;
use flate2::Compression;
use flate2::read::ZlibDecoder;
use flate2::write::ZlibEncoder;

/// Index management operations
#[derive(Debug, Clone)]
pub enum IndexOperation {
    /// Remove files from index
    Remove(Vec<String>),
    /// Replace files in index
    Replace(Vec<String>, String),
    /// Add files to index
    Add(Vec<String>),
}

/// Index header structure matching BigGrep format
#[derive(Debug, Clone)]
struct BgiHeader {
    magic: [u8; 8],
    version: u32,
    flags: u32,
    ngram_order: u32,
    num_ngrams: u64,
    num_files: u32,
    index_offset: u64,
    hints_offset: u64,
    hints_size: u32,
    fileid_map_offset: u64,
    fileid_map_size: u32,
    fmt_minor: u32,
}

impl BgiHeader {
    pub fn new() -> Self {
        Self {
            magic: *b"BIGGREP1",
            version: 1,
            flags: 0,
            ngram_order: 3,
            num_ngrams: 0,
            num_files: 0,
            index_offset: 0,
            hints_offset: 0,
            hints_size: 0,
            fileid_map_offset: 0,
            fileid_map_size: 0,
            fmt_minor: 1,
        }
    }

    pub fn validate(&self) -> Result<()> {
        if &self.magic != b"BIGGREP1" {
            return Err(anyhow::anyhow!("Invalid magic number in index file"));
        }
        if self.version != 1 {
            return Err(anyhow::anyhow!("Unsupported index version: {}", self.version));
        }
        Ok(())
    }

    pub fn is_compressed(&self) -> bool {
        self.fmt_minor >= 2
    }
}

/// File ID map entry
#[derive(Debug, Clone)]
struct FileEntry {
    id: u32,
    path: String,
    metadata: Option<String>,
}

impl FileEntry {
    pub fn from_line(line: &str) -> Result<Self> {
        let parts: Vec<&str> = line.trim().splitn(2, '\t').collect();
        if parts.len() != 2 {
            return Err(anyhow::anyhow!("Invalid fileid_map entry format"));
        }

        let id = parts[0].parse::<u32>()
            .map_err(|_| anyhow::anyhow!("Invalid file ID: {}", parts[0]))?;
        
        // Split path and metadata if present
        let path_and_metadata: Vec<&str> = parts[1].splitn(2, '\t').collect();
        let path = path_and_metadata[0].to_string();
        let metadata = if path_and_metadata.len() > 1 {
            Some(path_and_metadata[1].to_string())
        } else {
            None
        };

        Ok(Self { id, path, metadata })
    }

    pub fn to_line(&self) -> String {
        if let Some(ref metadata) = self.metadata {
            format!("{}\t{}\t{}", self.id, self.path, metadata)
        } else {
            format!("{}\t{}", self.id, self.path)
        }
    }
}

#[derive(Parser)]
#[command(name = "rs-bgextractfile")]
#[command(about = "BigGrep index file management tool - manages fileid_map entries")]
#[command(version = "0.1.0")]
struct Cli {
    /// Index file path
    #[arg(short = 'i', long)]
    index: Option<PathBuf>,
    
    /// Remove files from index (comma-separated list)
    #[arg(short = 'r', long, value_delimiter = ',')]
    remove: Vec<String>,
    
    /// Add files to index (comma-separated list)
    #[arg(short = 'a', long, value_delimiter = ',')]
    add: Vec<String>,
    
    /// Replace files with new path (old_path:new_path format, comma-separated)
    #[arg(short, long, value_delimiter = ',')]
    replace: Vec<String>,
    
    /// File containing list of operations (one per line)
    #[arg(short = 'f', long)]
    file: Option<PathBuf>,
    
    /// Verbose output
    #[arg(short, long)]
    verbose: bool,
    
    #[command(subcommand)]
    command: Option<Commands>,
}

#[derive(Subcommand)]
enum Commands {
    /// Remove files from index
    Remove {
        /// Index file
        index: PathBuf,
        /// Files to remove
        files: Vec<String>,
    },
    
    /// Add files to index
    Add {
        /// Index file
        index: PathBuf,
        /// Files to add
        files: Vec<String>,
    },
    
    /// Replace files in index
    Replace {
        /// Index file
        index: PathBuf,
        /// Replacement operations (old:new format)
        operations: Vec<String>,
    },
    
    /// List files in index
    List {
        /// Index file
        index: PathBuf,
        /// Search pattern
        #[arg(short, long)]
        pattern: Option<String>,
    },
    
    /// Validate index integrity
    Validate {
        /// Index file
        index: PathBuf,
    },
}

fn main() -> Result<()> {
    let cli = Cli::parse();
    
    // Initialize logging
    init_logging(cli.verbose);
    
    info!("BigGrep Index Manager v{} starting", biggrep_core::VERSION);
    
    match &cli.command {
        Some(command) => execute_command(command, &cli),
        None => execute_operation(&cli),
    }
}

fn execute_command(command: &Commands, cli: &Cli) -> Result<()> {
    match command {
        Commands::Remove { index, files } => {
            let operation = IndexOperation::Remove(files.clone());
            manage_index(index, &operation, cli.verbose)
        }
        Commands::Add { index, files } => {
            let operation = IndexOperation::Add(files.clone());
            manage_index(index, &operation, cli.verbose)
        }
        Commands::Replace { index, operations } => {
            let parsed_ops = parse_replace_operations(operations)?;
            let operation = IndexOperation::Replace(parsed_ops.0, parsed_ops.1);
            manage_index(index, &operation, cli.verbose)
        }
        Commands::List { index, pattern } => {
            list_index_files(index, pattern.as_deref(), cli.verbose)
        }
        Commands::Validate { index } => {
            validate_index(index, cli.verbose)
        }
    }
}

fn execute_operation(cli: &Cli) -> Result<()> {
    // Check for file-based operations
    if let Some(ref file_path) = cli.file {
        return execute_file_operations(file_path, cli);
    }
    
    // Determine primary index file
    let index = if let Some(ref idx) = cli.index {
        idx.clone()
    } else {
        anyhow::bail!("No index file specified. Use -i option or provide subcommand.");
    };
    
    // Determine operation from command-line arguments
    let operation = if !cli.remove.is_empty() {
        IndexOperation::Remove(cli.remove.clone())
    } else if !cli.add.is_empty() {
        IndexOperation::Add(cli.add.clone())
    } else if !cli.replace.is_empty() {
        let (old_paths, new_paths) = parse_replace_operations(&cli.replace)?;
        IndexOperation::Replace(old_paths, new_paths)
    } else {
        anyhow::bail!("No operation specified. Use -r (remove), -a (add), or -r with replacement strings.");
    };
    
    manage_index(&index, &operation, cli.verbose)
}

fn execute_file_operations(file_path: &PathBuf, cli: &Cli) -> Result<()> {
    let file = fs::File::open(file_path)
        .with_context(|| format!("Failed to open operations file: {:?}", file_path))?;
    
    let reader = BufReader::new(file);
    
    let mut operations = Vec::new();
    
    for line in reader.lines() {
        let line = line?.trim().to_string();
        if line.is_empty() || line.starts_with('#') {
            continue;
        }
        
        let parts: Vec<&str> = line.splitn(2, ':').collect();
        if parts.len() == 1 {
            operations.push(IndexOperation::Add(vec![parts[0].to_string()]));
        } else {
            operations.push(IndexOperation::Replace(
                vec![parts[0].to_string()],
                vec![parts[1].to_string()]
            ));
        }
    }
    
    let index = cli.index
        .as_ref()
        .context("Index file required when using -f option")?;
    
    for op in operations {
        manage_index(index, &op, cli.verbose)?;
    }
    
    Ok(())
}

fn parse_replace_operations(replace_list: &[String]) -> Result<(Vec<String>, Vec<String>)> {
    let mut old_paths = Vec::new();
    let mut new_paths = Vec::new();
    
    for replace_op in replace_list {
        let parts: Vec<&str> = replace_op.splitn(2, ':').collect();
        if parts.len() != 2 {
            return Err(anyhow::anyhow!(
                "Invalid replace operation format: {}. Use old_path:new_path format",
                replace_op
            ));
        }
        old_paths.push(parts[0].to_string());
        new_paths.push(parts[1].to_string());
    }
    
    Ok((old_paths, new_paths))
}

fn manage_index(index_path: &Path, operation: &IndexOperation, verbose: bool) -> Result<()> {
    info!("Managing index: {:?}", index_path);
    
    if !index_path.exists() {
        return Err(anyhow::anyhow!("Index file does not exist: {:?}", index_path));
    }
    
    // Open file for read/write
    let mut file = fs::OpenOptions::new()
        .read(true)
        .write(true)
        .open(index_path)
        .with_context(|| format!("Failed to open index file: {:?}", index_path))?;
    
    // Read and validate header
    let header = read_header(&mut file)?;
    header.validate()?;
    
    if verbose {
        info!("Index header: {:?}", header);
    }
    
    // Read fileid_map
    let file_entries = read_fileid_map(&mut file, &header)?;
    
    if verbose {
        info!("Loaded {} file entries from index", file_entries.len());
    }
    
    // Apply operation
    let (modified_entries, operation_summary) = match operation {
        IndexOperation::Remove(files_to_remove) => {
            let mut removed_count = 0;
            let mut kept_entries = Vec::new();
            
            for entry in file_entries {
                let should_remove = files_to_remove.iter()
                    .any(|file| entry.path == *file);
                
                if should_remove {
                    removed_count += 1;
                    if verbose {
                        info!("Removing: {}", entry.path);
                    }
                } else {
                    kept_entries.push(entry);
                }
            }
            
            (kept_entries, format!("Removed {} files", removed_count))
        }
        IndexOperation::Add(files_to_add) => {
            let mut existing_paths: HashMap<String, u32> = file_entries.iter()
                .map(|e| (e.path.clone(), e.id))
                .collect();
            
            let mut new_entries = file_entries;
            let mut added_count = 0;
            
            for file_path in files_to_add {
                if existing_paths.contains_key(file_path) {
                    warn!("File already exists in index: {}", file_path);
                    continue;
                }
                
                let new_id = existing_paths.values().max().unwrap_or(0) + 1;
                let entry = FileEntry {
                    id: new_id,
                    path: file_path.clone(),
                    metadata: None,
                };
                
                new_entries.push(entry);
                existing_paths.insert(file_path.clone(), new_id);
                added_count += 1;
                
                if verbose {
                    info!("Adding: {} (ID: {})", file_path, new_id);
                }
            }
            
            // Renumber entries to ensure sequential IDs
            new_entries.sort_by(|a, b| a.id.cmp(&b.id));
            for (i, entry) in new_entries.iter_mut().enumerate() {
                entry.id = (i as u32) + 1;
            }
            
            (new_entries, format!("Added {} files", added_count))
        }
        IndexOperation::Replace(old_paths, new_paths) => {
            let mut replaced_count = 0;
            let mut modified_entries = file_entries;
            
            for (old_path, new_path) in old_paths.iter().zip(new_paths.iter()) {
                let found = modified_entries.iter_mut()
                    .find(|entry| entry.path == *old_path);
                
                if let Some(entry) = found {
                    entry.path = new_path.clone();
                    replaced_count += 1;
                    
                    if verbose {
                        info!("Replacing: {} -> {}", old_path, new_path);
                    }
                } else {
                    warn!("File not found in index: {}", old_path);
                }
            }
            
            (modified_entries, format!("Replaced {} files", replaced_count))
        }
    };
    
    // Write modified fileid_map back to index
    write_fileid_map(&mut file, &header, &modified_entries)?;
    
    // Optionally update header with new file count
    update_header(&mut file, &header, modified_entries.len() as u32)?;
    
    info!("Index operation completed: {}", operation_summary);
    
    Ok(())
}

fn read_header(file: &mut fs::File) -> Result<BgiHeader> {
    let mut header = BgiHeader::new();
    
    file.seek(SeekFrom::Start(0))?;
    let mut buffer = [0u8; 64]; // Assume 64-byte header
    
    file.read_exact(&mut buffer)?;
    
    let mut cursor = io::Cursor::new(&buffer);
    
    // Read magic
    cursor.read_exact(&mut header.magic)?;
    
    // Read other fields
    header.version = cursor.read_u32::<BigEndian>()?;
    header.flags = cursor.read_u32::<BigEndian>()?;
    header.ngram_order = cursor.read_u32::<BigEndian>()?;
    header.num_ngrams = cursor.read_u64::<BigEndian>()?;
    header.num_files = cursor.read_u32::<BigEndian>()?;
    header.index_offset = cursor.read_u64::<BigEndian>()?;
    header.hints_offset = cursor.read_u64::<BigEndian>()?;
    header.hints_size = cursor.read_u32::<BigEndian>()?;
    header.fileid_map_offset = cursor.read_u64::<BigEndian>()?;
    header.fileid_map_size = cursor.read_u32::<BigEndian>()?;
    header.fmt_minor = cursor.read_u32::<BigEndian>()?;
    
    Ok(header)
}

fn read_fileid_map(file: &mut fs::File, header: &BgiHeader) -> Result<Vec<FileEntry>> {
    if header.fileid_map_offset == 0 || header.fileid_map_size == 0 {
        return Ok(Vec::new());
    }
    
    // Seek to fileid_map section
    file.seek(SeekFrom::Start(header.fileid_map_offset))?;
    
    // Read the compressed or uncompressed data
    let mut map_data = vec![0u8; header.fileid_map_size as usize];
    file.read_exact(&mut map_data)?;
    
    // Decompress if necessary
    let decompressed_data = if header.is_compressed() {
        let mut decoder = ZlibDecoder::new(&map_data[..]);
        let mut decompressed = Vec::new();
        decoder.read_to_end(&mut decompressed)?;
        decompressed
    } else {
        map_data
    };
    
    // Parse lines
    let content = String::from_utf8(decompressed)?;
    let mut entries = Vec::new();
    
    for line in content.lines() {
        if !line.trim().is_empty() {
            let entry = FileEntry::from_line(line)?;
            entries.push(entry);
        }
    }
    
    Ok(entries)
}

fn write_fileid_map(file: &mut fs::File, header: &BgiHeader, entries: &[FileEntry]) -> Result<()> {
    // Convert entries to lines
    let content = entries.iter()
        .map(|entry| entry.to_line())
        .collect::<Vec<_>>()
        .join("\n");
    
    // Add trailing newline
    let mut data = content.into_bytes();
    data.push(b'\n');
    
    // Compress if necessary
    let output_data = if header.is_compressed() {
        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
        encoder.write_all(&data)?;
        encoder.finish()?
    } else {
        data
    };
    
    // Seek to fileid_map section and truncate
    file.seek(SeekFrom::Start(header.fileid_map_offset))?;
    file.set_len(header.fileid_map_offset)?;
    
    // Write new data
    file.write_all(&output_data)?;
    
    Ok(())
}

fn update_header(file: &mut fs::File, old_header: &BgiHeader, new_file_count: u32) -> Result<()> {
    let mut new_header = old_header.clone();
    new_header.num_files = new_file_count;
    
    file.seek(SeekFrom::Start(0))?;
    
    // Write header fields
    file.write_all(&new_header.magic)?;
    file.write_u32::<BigEndian>(new_header.version)?;
    file.write_u32::<BigEndian>(new_header.flags)?;
    file.write_u32::<BigEndian>(new_header.ngram_order)?;
    file.write_u64::<BigEndian>(new_header.num_ngrams)?;
    file.write_u32::<BigEndian>(new_header.num_files)?;
    file.write_u64::<BigEndian>(new_header.index_offset)?;
    file.write_u64::<BigEndian>(new_header.hints_offset)?;
    file.write_u32::<BigEndian>(new_header.hints_size)?;
    file.write_u64::<BigEndian>(new_header.fileid_map_offset)?;
    file.write_u32::<BigEndian>(new_header.fileid_map_size)?;
    file.write_u32::<BigEndian>(new_header.fmt_minor)?;
    
    Ok(())
}

fn list_index_files(index_path: &Path, pattern: Option<&str>, verbose: bool) -> Result<()> {
    info!("Listing files in index: {:?}", index_path);
    
    if !index_path.exists() {
        return Err(anyhow::anyhow!("Index file does not exist: {:?}", index_path));
    }
    
    let mut file = fs::File::open(index_path)
        .with_context(|| format!("Failed to open index file: {:?}", index_path))?;
    
    let header = read_header(&mut file)?;
    header.validate()?;
    
    let entries = read_fileid_map(&mut file, &header)?;
    
    let filtered_entries: Vec<&FileEntry> = if let Some(pattern_str) = pattern {
        entries.iter()
            .filter(|entry| entry.path.contains(pattern_str))
            .collect()
    } else {
        entries.iter().collect()
    };
    
    println!("Index: {:?}", index_path);
    println!("Total files: {}", entries.len());
    if let Some(pattern_str) = pattern {
        println!("Filtered by pattern '{}': {}", pattern_str, filtered_entries.len());
    }
    println!();
    
    for entry in &filtered_entries {
        println!("{:>6}: {}", entry.id, entry.path);
        if verbose && entry.metadata.is_some() {
            println!("        {}", entry.metadata.as_ref().unwrap());
        }
    }
    
    Ok(())
}

fn validate_index(index_path: &Path, verbose: bool) -> Result<()> {
    info!("Validating index: {:?}", index_path);
    
    if !index_path.exists() {
        return Err(anyhow::anyhow!("Index file does not exist: {:?}", index_path));
    }
    
    let mut file = fs::File::open(index_path)
        .with_context(|| format!("Failed to open index file: {:?}", index_path))?;
    
    let header = read_header(&mut file)?;
    header.validate()?;
    
    if verbose {
        info!("Header validation passed");
        info!("Index details:");
        info!("  N-gram order: {}", header.ngram_order);
        info!("  Number of files: {}", header.num_files);
        info!("  Number of N-grams: {}", header.num_ngrams);
        info!("  Compressed: {}", header.is_compressed());
    }
    
    // Validate fileid_map
    let entries = read_fileid_map(&mut file, &header)?;
    
    // Check for duplicate IDs
    let mut ids = HashMap::new();
    let mut duplicates = 0;
    
    for entry in &entries {
        if ids.contains_key(&entry.id) {
            duplicates += 1;
        } else {
            ids.insert(entry.id, entry.path.clone());
        }
    }
    
    // Check for sequential IDs
    let mut is_sequential = true;
    for (i, entry) in entries.iter().enumerate() {
        if entry.id != (i as u32) + 1 {
            is_sequential = false;
            break;
        }
    }
    
    println!("Index validation results:");
    println!("  Total entries: {}", entries.len());
    println!("  Duplicate IDs: {}", duplicates);
    println!("  Sequential IDs: {}", if is_sequential { "Yes" } else { "No" });
    
    if verbose {
        if !is_sequential {
            warn!("File IDs are not sequential");
        }
        if duplicates > 0 {
            warn!("Found {} duplicate file IDs", duplicates);
        }
        
        // Check for empty or invalid paths
        let mut invalid_paths = 0;
        for entry in &entries {
            if entry.path.trim().is_empty() {
                invalid_paths += 1;
            }
        }
        
        if invalid_paths > 0 {
            warn!("Found {} entries with empty paths", invalid_paths);
        }
    }
    
    if duplicates == 0 && is_sequential && entries.len() as u32 == header.num_files {
        println!("  Status: PASSED");
        Ok(())
    } else {
        println!("  Status: FAILED");
        Err(anyhow::anyhow!("Index validation failed"))
    }
}

fn init_logging(verbose: bool) {
    use env_logger::{Builder, Env};
    
    let env = Env::default()
        .filter_or("BIGGREP_LOG", if verbose { "DEBUG" } else { "INFO" });
    
    Builder::from_env(env)
        .format_timestamp(None)
        .init();
}

// Default implementation for Cli
impl Default for Cli {
    fn default() -> Self {
        Self {
            index: None,
            remove: Vec::new(),
            add: Vec::new(),
            replace: Vec::new(),
            file: None,
            verbose: false,
            command: None,
        }
    }
}