// rs-bgindex: BigGrep Index Builder
// High-performance N-gram indexing tool with 3-gram/4-gram mixing,
// producer-consumer threading, and PFOR/VarByte compression

use anyhow::{anyhow, Context, Result};
use byteorder::{BigEndian, ByteOrder, LittleEndian, ReadBytesExt, WriteBytesExt};
use clap::{Parser, ValueEnum};
use crossbeam_channel::{bounded, unbounded, Receiver, Sender};
use log::{debug, error, info, warn};
use memmap2::{Mmap, MmapOptions};
use rayon::prelude::*;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{self, BufRead, BufReader, Read, Write};
use std::os::unix::io::AsRawFd;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::{Duration, Instant};

// Compression constants
const VARBYTE_FLAG: u8 = 0x01;
const PFOR_FLAG: u8 = 0x02;
const BITS_PER_BLOCK: usize = 32;

// LoserTree implementation for N-way merge
struct LoserTree<T: Clone + Default + Ord> {
    size: usize,
    tree: Vec<usize>,
    leaves: Vec<T>,
}

impl<T: Clone + Default + Ord> LoserTree<T> {
    fn new(size: usize) -> Self {
        let tree_size = if size.is_power_of_two() {
            size
        } else {
            size.next_power_of_two()
        };
        Self {
            size: tree_size,
            tree: vec![0; tree_size],
            leaves: vec![T::default(); tree_size * 2],
        }
    }

    fn init(&mut self) {
        for i in (0..self.size).rev() {
            self.adjust(i);
        }
    }

    fn adjust(&mut self, mut idx: usize) {
        let mut parent = (idx + self.size) >> 1;
        while parent > 0 {
            let left_child = parent;
            let right_child = parent ^ 1;
            if self.leaves[self.tree[left_child]] > self.leaves[self.tree[right_child]] {
                self.tree[parent] = self.tree[right_child];
            } else {
                self.tree[parent] = self.tree[left_child];
            }
            parent >>= 1;
        }
    }

    fn get_winner(&self) -> usize {
        self.tree[0]
    }

    fn next(&mut self, new_leaf: T) {
        self.leaves[self.size + self.get_winner()] = new_leaf;
        self.adjust(self.get_winner());
    }
}

// N-gram types
type Trigram = u32;
type Quadgram = u64;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum NgramSize {
    Trigram,
    Quadgram,
}

// File processing task for producer-consumer
#[derive(Clone)]
struct ShingleTask {
    file_id: u32,
    file_path: PathBuf,
    ngram_size: NgramSize,
}

#[derive(Clone, Debug)]
struct ShingleResult {
    file_id: u32,
    ngrams: Vec<u64>, // Store as u64 but only use lower 3 bytes for trigrams
    unique_count: usize,
}

// Compression task
#[derive(Clone)]
struct CompressTask {
    ngram: u64,
    file_ids: Vec<u32>,
}

#[derive(Clone)]
enum CompressedEntry {
    VarByte {
        ngram: u64,
        data: Vec<u8>,
    },
    Pfor {
        ngram: u64,
        data: Vec<u8>,
        exception_count: usize,
    },
}

// Index header
#[derive(Debug)]
struct IndexHeader {
    magic: u64,
    version: u32,
    ngram_size: u8,
    num_ngrams: u64,
    num_files: u32,
    blocksize: u32,
    exceptions: u32,
    minimum: u32,
    hint_type: u8,
    compressed: bool,
    fileid_map_offset: u64,
    hints_offset: u64,
    index_offset: u64,
    index_size: u64,
}

// Hint entry for fast seeking
#[derive(Debug)]
struct HintEntry {
    prefix: u64,
    offset: u64,
}

// CLI arguments
#[derive(Parser, Debug)]
#[command(name = "rs-bgindex")]
#[command(about = "BigGrep N-gram Index Builder")]
struct Args {
    /// N-gram size (3 or 4)
    #[arg(short = 'n', long, default_value_t = 3)]
    ngram: usize,

    /// Hint type (0-2)
    #[arg(short = 'H', long, default_value_t = 0)]
    hint_type: u8,

    /// PFOR blocksize
    #[arg(short = 'b', long, default_value_t = 32)]
    blocksize: u32,

    /// PFOR max exceptions per block
    #[arg(short = 'e', long, default_value_t = 2)]
    exceptions: u32,

    /// PFOR minimum entries to consider PFOR
    #[arg(short = 'm', long, default_value_t = 4)]
    minimum: u32,

    /// Maximum unique N-grams per file
    #[arg(short = 'M', long)]
    max_unique_ngrams: Option<u32>,

    /// Index file prefix (directory and/or partial filename)
    #[arg(short = 'p', long, default_value = "index")]
    prefix: String,

    /// Write overflow filenames to FILE
    #[arg(short = 'O', long)]
    overflow: Option<PathBuf>,

    /// Number of shingling threads
    #[arg(short = 'S', long, default_value_t = 4)]
    sthreads: usize,

    /// Number of compression threads
    #[arg(short = 'C', long, default_value_t = 5)]
    cthreads: usize,

    /// Show additional info
    #[arg(short = 'v', long)]
    verbose: bool,

    /// Use lockfree queues
    #[arg(short = 'L', long)]
    lockfree: bool,

    /// Log file
    #[arg(short = 'l', long)]
    log: Option<PathBuf>,

    /// Show diagnostic information
    #[arg(short = 'd', long)]
    debug: bool,

    /// Show help and exit
    #[arg(short = 'h', long)]
    help: bool,

    /// Show version and exit
    #[arg(short = 'V', long)]
    version: bool,
}

// Memory-mapped file reader with safety checks
struct MmapReader {
    mmap: Mmap,
    file_id: u32,
}

impl MmapReader {
    fn new(file: &File, file_id: u32) -> Result<Self> {
        let mmap = unsafe {
            MmapOptions::new()
                .map(file)
                .with_context(|| format!("Failed to memory-map file ID {}", file_id))?
        };

        Ok(MmapReader { mmap, file_id })
    }

    fn read_bytes(&self, offset: usize, len: usize) -> &[u8] {
        if offset + len <= self.mmap.len() {
            &self.mmap[offset..offset + len]
        } else {
            &[]
        }
    }
}

// Extract N-grams from memory-mapped file
fn extract_ngrams(
    reader: &MmapReader,
    ngram_size: NgramSize,
    max_unique: Option<u32>,
) -> Result<ShingleResult> {
    let data = &reader.mmap;
    let mut ngrams: Vec<u64> = Vec::with_capacity(1024);

    match ngram_size {
        NgramSize::Trigram => {
            // Fast 3-gram extraction for little-endian
            // Read 4 bytes at a time, mask lower 24 bits
            for chunk in data.chunks_exact(4) {
                if chunk.len() < 4 {
                    break;
                }
                let val = LittleEndian::read_u32(chunk);
                let trigram = (val & 0x00FFFFFF) as u64;
                ngrams.push(trigram);
            }
        }
        NgramSize::Quadgram => {
            for chunk in data.chunks_exact(8) {
                if chunk.len() < 8 {
                    break;
                }
                let val = LittleEndian::read_u64(chunk);
                ngrams.push(val);
            }
        }
    }

    // Sort and deduplicate
    ngrams.sort_unstable();
    ngrams.dedup();

    // Check overflow
    if let Some(max_unique) = max_unique {
        if ngrams.len() > max_unique as usize {
            return Err(anyhow!(
                "File {} exceeds max unique N-grams: {} > {}",
                reader.file_id,
                ngrams.len(),
                max_unique
            ));
        }
    }

    Ok(ShingleResult {
        file_id: reader.file_id,
        ngrams,
        unique_count: ngrams.len(),
    })
}

// VarByte encoding
fn encode_varbyte(mut value: u32) -> Vec<u8> {
    let mut result = Vec::new();
    while value >= 0x80 {
        result.push((value & 0x7F) as u8 | 0x80);
        value >>= 7;
    }
    result.push(value as u8);
    result
}

// VarByte decoding
fn decode_varbyte<R: Read>(reader: &mut R) -> Result<u32> {
    let mut value = 0u32;
    let mut shift = 0u32;
    loop {
        let byte = reader.read_u8()?;
        value |= ((byte & 0x7F) as u32) << shift;
        if byte & 0x80 == 0 {
            break;
        }
        shift += 7;
    }
    Ok(value)
}

// Delta encoding for sorted file IDs
fn delta_encode(file_ids: &[u32]) -> Vec<u32> {
    let mut encoded = Vec::with_capacity(file_ids.len());
    let mut prev = 0u32;
    for &id in file_ids {
        encoded.push(id - prev);
        prev = id;
    }
    encoded
}

// PFOR encoding
fn encode_pfor(delta_ids: &[u32], blocksize: u32, max_exceptions: u32) -> (Vec<u8>, usize) {
    let blocksize = blocksize as usize;
    let mut output = Vec::new();
    let mut exception_count = 0;

    for chunk in delta_ids.chunks(blocksize) {
        if chunk.len() < blocksize {
            // Last block, use VarByte
            let first = encode_varbyte(chunk[0]);
            output.extend_from_slice(&first);
            for &id in &chunk[1..] {
                output.extend_from_slice(&encode_varbyte(id));
            }
            break;
        }

        // Find bit width for most values
        let max_val = chunk.iter().max().copied().unwrap_or(0);
        let bit_width = (32 - max_val.leading_zeros()).max(1);

        if bit_width > 16 {
            // Too many exceptions, use VarByte
            let first = encode_varbyte(chunk[0]);
            output.extend_from_slice(&first);
            for &id in &chunk[1..] {
                output.extend_from_slice(&encode_varbyte(id));
            }
            continue;
        }

        // Count exceptions
        let threshold = 1u32 << bit_width;
        let exceptions: Vec<_> = chunk
            .iter()
            .enumerate()
            .filter(|(_, &val)| val >= threshold)
            .collect();

        if exceptions.len() > max_exceptions as usize {
            // Too many exceptions, use VarByte
            let first = encode_varbyte(chunk[0]);
            output.extend_from_slice(&first);
            for &id in &chunk[1..] {
                output.extend_from_slice(&encode_varbyte(id));
            }
            continue;
        }

        // Write header
        output.push(bit_width as u8);
        output.push(exceptions.len() as u8);

        // Write exceptions
        for &(idx, &val) in &exceptions {
            output.write_u16::<BigEndian>(idx as u16).unwrap();
            output.write_u32::<BigEndian>(val).unwrap();
        }

        // Write packed values
        for (idx, &val) in chunk.iter().enumerate() {
            if val >= threshold {
                continue; // Exception already written
            }
            let shift = bit_width * idx;
            // Pack into output buffer (simplified)
            for byte_idx in 0..(bit_width + 7) / 8 {
                let byte = (val >> (8 * byte_idx)) & 0xFF;
                if shift / 8 + byte_idx >= output.len() {
                    output.push(0);
                }
            }
        }

        exception_count += exceptions.len();
    }

    (output, exception_count)
}

// Merge N-gram lists using LoserTree
fn merge_ngram_lists(
    mut lists: Vec<(u64, Vec<u32>)>,
) -> impl Iterator<Item = (u64, Vec<u32>)> + '_ {
    if lists.is_empty() {
        return std::iter::empty();
    }

    let mut tree = LoserTree::new(lists.len());
    let mut cursors = vec![0usize; lists.len()];
    let mut winners = vec![None; lists.len()];

    std::iter::from_coroutine(move || {
        // Initialize
        for (i, list) in lists.iter_mut().enumerate() {
            cursors[i] = 0;
        }

        loop {
            // Find minimum across all lists
            let mut min_ngram = u64::MAX;
            let mut min_list = None;

            for (i, (ngram, _)) in lists.iter_mut().enumerate() {
                if cursors[i] < ngram.len() {
                    if let Some((_, ref file_ids)) = lists[i].get(cursors[i]) {
                        let current_ngram = lists[i].0[cursors[i]];
                        if min_list.is_none() || current_ngram < min_ngram {
                            min_ngram = current_ngram;
                            min_list = Some((i, file_ids.clone()));
                        }
                    }
                }
            }

            if let Some((list_idx, file_ids)) = min_list {
                cursors[list_idx] += 1;
                yield (min_ngram, file_ids);
            } else {
                break;
            }
        }
    })
}

// Writer thread for compressed index
fn write_index(
    rx: Receiver<CompressedEntry>,
    header: &IndexHeader,
    hints: &mut Vec<HintEntry>,
) -> Result<()> {
    let mut file = File::create(&format!("{}.bgi", header.magic))?;

    // Placeholder for header
    let header_size = std::mem::size_of::<IndexHeader>();
    file.seek(io::SeekFrom::Start(header_size as u64))?;

    let mut current_offset = header_size as u64;
    let mut last_ngram: Option<u64> = None;
    let mut hint_mask = match header.hint_type {
        0 => 0xFFFFFFFFFFFFFFFFu64, // 4-gram, no hint
        1 => 0xFFFFFFFFFFu64,       // 3-gram, 16 N-grams
        2 => 0xFFFFFFFFFFFFFFu64,  // 3-gram, 256 N-grams
        _ => 0xFFFFFFFFFFu64,
    };

    let mut ngram_count = 0;
    let mut hints_written = 0;

    for entry in rx.iter() {
        match entry {
            CompressedEntry::VarByte { ngram, data } => {
                // Check if we need a hint
                if last_ngram.is_none() || (ngram & hint_mask) != (last_ngram.unwrap() & hint_mask) {
                    hints.push(HintEntry {
                        prefix: ngram & hint_mask,
                        offset: current_offset,
                    });
                    hints_written += 1;
                }

                // Write size (VarByte encoded, with flag)
                let size = encode_varbyte(data.len() as u32 | ((VARBYTE_FLAG as u32) << 24));
                file.write_all(&size)?;

                // Write data
                file.write_all(&data)?;

                current_offset += size.len() as u64 + data.len() as u64;
                last_ngram = Some(ngram);
                ngram_count += 1;
            }
            CompressedEntry::Pfor { ngram, data, .. } => {
                // Check if we need a hint
                if last_ngram.is_none() || (ngram & hint_mask) != (last_ngram.unwrap() & hint_mask) {
                    hints.push(HintEntry {
                        prefix: ngram & hint_mask,
                        offset: current_offset,
                    });
                    hints_written += 1;
                }

                // Write size (VarByte encoded, with PFOR flag)
                let size = encode_varbyte(data.len() as u32 | ((PFOR_FLAG as u32) << 24));
                file.write_all(&size)?;

                // Write data
                file.write_all(&data)?;

                current_offset += size.len() as u64 + data.len() as u64;
                last_ngram = Some(ngram);
                ngram_count += 1;
            }
        }
    }

    // Write hints section
    let hints_offset = current_offset;
    for hint in hints {
        file.write_u64::<BigEndian>(hint.prefix)?;
        file.write_u64::<BigEndian>(hint.offset)?;
    }
    current_offset += (hints.len() * 16) as u64;

    // Write fileid_map placeholder
    let fileid_map_offset = current_offset;
    current_offset += 1024; // Placeholder

    // Update header
    let mut final_header = *header;
    final_header.num_ngrams = ngram_count;
    final_header.hints_offset = hints_offset;
    final_header.fileid_map_offset = fileid_map_offset;
    final_header.index_offset = header_size as u64;
    final_header.index_size = hints_offset - (header_size as u64);

    file.seek(io::SeekFrom::Start(0))?;
    file.write_u64::<BigEndian>(final_header.magic)?;
    file.write_u32::<BigEndian>(final_header.version)?;
    file.write_u8(final_header.ngram_size)?;
    file.write_u64::<BigEndian>(final_header.num_ngrams)?;
    file.write_u32::<BigEndian>(final_header.num_files)?;
    file.write_u32::<BigEndian>(final_header.blocksize)?;
    file.write_u32::<BigEndian>(final_header.exceptions)?;
    file.write_u32::<BigEndian>(final_header.minimum)?;
    file.write_u8(final_header.hint_type)?;
    file.write_u8(if final_header.compressed { 1 } else { 0 })?;
    file.write_u64::<BigEndian>(final_header.fileid_map_offset)?;
    file.write_u64::<BigEndian>(final_header.hints_offset)?;
    file.write_u64::<BigEndian>(final_header.index_offset)?;
    file.write_u64::<BigEndian>(final_header.index_size)?;

    info!("Index written successfully:");
    info!("  Total N-grams: {}", ngram_count);
    info!("  Hints: {}", hints_written);
    info!("  Size: {} bytes", current_offset);

    Ok(())
}

// Process file list from stdin
fn process_file_list(
    args: &Args,
) -> Result<(Vec<ShingleTask>, Vec<(u32, String)>, Option<File>)> {
    let stdin = io::stdin();
    let reader = stdin.lock();

    let mut tasks = Vec::new();
    let mut file_map = Vec::new();
    let mut overflow_file = None;

    if let Some(overflow_path) = &args.overflow {
        overflow_file = Some(File::create(overflow_path)?);
    }

    for (line_num, line) in reader.lines().enumerate() {
        let line = line?;
        let trimmed = line.trim();

        if trimmed.is_empty() {
            continue;
        }

        // Parse file_id:path format or just path
        let (file_id, path) = if trimmed.contains(':') {
            let parts: Vec<&str> = trimmed.splitn(2, ':').collect();
            if parts.len() == 2 {
                let file_id = parts[0].parse::<u32>().map_err(|_| {
                    anyhow!("Invalid file ID on line {}: {}", line_num + 1, parts[0])
                })?;
                (file_id, PathBuf::from(parts[1]))
            } else {
                (line_num as u32, PathBuf::from(trimmed))
            }
        } else {
            (line_num as u32, PathBuf::from(trimmed))
        };

        let path_str = path.to_string_lossy().to_string();

        // Check file size
        let metadata = match std::fs::metadata(&path) {
            Ok(m) => m,
            Err(e) => {
                warn!("Failed to get metadata for {}: {}", path_str, e);
                continue;
            }
        };

        if metadata.len() < args.minimum as u64 {
            debug!("Skipping {} (size {} < minimum {})", path_str, metadata.len(), args.minimum);
            continue;
        }

        let ngram_size = if let Some(max_unique) = args.max_unique_ngrams {
            // We don't know yet, assume 3-gram
            NgramSize::Trigram
        } else {
            match args.ngram {
                3 => NgramSize::Trigram,
                4 => NgramSize::Quadgram,
                _ => return Err(anyhow!("Invalid N-gram size: {}", args.ngram)),
            }
        };

        tasks.push(ShingleTask {
            file_id,
            file_path: path.clone(),
            ngram_size,
        });
        file_map.push((file_id, path_str));
    }

    Ok((tasks, file_map, overflow_file))
}

// Shingling worker thread
fn shingle_worker(
    rx: Receiver<ShingleTask>,
    tx: Sender<ShingleResult>,
    args: &Args,
) {
    let _ = thread::spawn(move || {
        while let Ok(task) = rx.recv() {
            debug!("Processing file {}: {:?}", task.file_id, task.file_path);

            match File::open(&task.file_path) {
                Ok(file) => {
                    match MmapReader::new(&file, task.file_id) {
                        Ok(reader) => {
                            match extract_ngrams(
                                &reader,
                                task.ngram_size,
                                args.max_unique_ngrams,
                            ) {
                                Ok(result) => {
                                    if tx.send(result).is_err() {
                                        break;
                                    }
                                }
                                Err(e) => {
                                    error!(
                                        "Error processing file {}: {}",
                                        task.file_id,
                                        e
                                    );
                                }
                            }
                        }
                        Err(e) => {
                            error!("Failed to mmap file {}: {}", task.file_id, e);
                        }
                    }
                }
                Err(e) => {
                    error!("Failed to open file {}: {}", task.file_path.display(), e);
                }
            }
        }
    });
}

// Compression worker thread
fn compression_worker(
    rx: Receiver<CompressTask>,
    tx: Sender<CompressedEntry>,
    args: &Args,
) {
    let _ = thread::spawn(move || {
        while let Ok(task) = rx.recv() {
            if task.file_ids.is_empty() {
                continue;
            }

            // Delta encode
            let delta_ids = delta_encode(&task.file_ids);

            // Try PFOR
            let (encoded, exception_count) = encode_pfor(
                &delta_ids,
                args.blocksize,
                args.exceptions,
            );

            let entry = if !encoded.is_empty() && task.file_ids.len() >= args.minimum as usize
                && exception_count <= args.exceptions as usize
            {
                CompressedEntry::Pfor {
                    ngram: task.ngram,
                    data: encoded,
                    exception_count,
                }
            } else {
                // Fall back to VarByte
                let mut data = Vec::new();
                // First ID
                data.extend_from_slice(&encode_varbyte(task.file_ids[0]));
                // Rest as deltas
                for &delta in &delta_ids[1..] {
                    data.extend_from_slice(&encode_varbyte(delta));
                }

                CompressedEntry::VarByte {
                    ngram: task.ngram,
                    data,
                }
            };

            if tx.send(entry).is_err() {
                break;
            }
        }
    });
}

fn main() -> Result<()> {
    let args = Args::parse();

    // Initialize logging
    if args.log.is_some() {
        env_logger::Builder::from_default_env()
            .target(env_logger::Target::File(args.log.as_ref().unwrap().clone()))
            .init();
    } else {
        env_logger::Builder::from_default_env().init();
    }

    info!("Starting rs-bgindex");
    info!("Arguments: {:?}", args);

    // Validate arguments
    if ![3, 4].contains(&args.ngram) {
        return Err(anyhow!("N-gram size must be 3 or 4, got {}", args.ngram));
    }

    if args.blocksize % 8 != 0 {
        return Err(anyhow!("Blocksize must be multiple of 8, got {}", args.blocksize));
    }

    // Process file list from stdin
    let (tasks, file_map, overflow_file) = process_file_list(&args)?;

    if tasks.is_empty() {
        return Err(anyhow!("No files to process"));
    }

    info!("Processing {} files", tasks.len());

    // Create channels
    let (shingle_tx, shingle_rx) = if args.lockfree {
        bounded::<ShingleTask>(1000)
    } else {
        bounded::<ShingleTask>(1000)
    };

    let (compress_tx, compress_rx) = if args.lockfree {
        bounded::<CompressTask>(1000)
    } else {
        bounded::<CompressTask>(1000)
    };

    let (compressed_tx, compressed_rx) = unbounded::<CompressedEntry>();

    // Start shingling workers
    for _ in 0..args.sthreads {
        shingle_worker(
            shingle_rx.clone(),
            compress_tx.clone(),
            &args,
        );
    }

    // Start compression workers
    for _ in 0..args.cthreads {
        compression_worker(
            compress_rx.clone(),
            compressed_tx.clone(),
            &args,
        );
    }

    // Send tasks to workers
    thread::spawn(move || {
        for task in tasks {
            if shingle_tx.send(task).is_err() {
                break;
            }
        }
    });

    // Collect results
    info!("Collecting shingle results...");
    let start_time = Instant::now();
    let mut all_ngrams: Vec<(u64, Vec<u32>)> = Vec::new();
    let mut total_unique = 0;

    let mut results = Vec::new();
    while let Ok(result) = compress_rx.recv() {
        results.push(result);
        if results.len() % 1000 == 0 {
            info!("Collected {} results...", results.len());
        }
    }

    info!("Shingling completed in {:?}", start_time.elapsed());

    // Build per-N-gram lists
    let mut ngram_map: BTreeMap<u64, Vec<u32>> = BTreeMap::new();
    for result in results {
        for &ngram in &result.ngrams {
            ngram_map.entry(ngram).or_default().push(result.file_id);
        }
        total_unique += result.unique_count;
    }

    info!("Total unique N-grams: {}", ngram_map.len());

    // Convert to sorted vectors
    let mut ngram_lists: Vec<(u64, Vec<u32>)> = ngram_map.into_iter().collect();
    for (_, ids) in &mut ngram_lists {
        ids.sort_unstable();
        ids.dedup();
    }

    // Merge N-gram lists
    info!("Merging N-gram lists...");
    let merge_start = Instant::now();

    // Create compression tasks
    let mut compress_tasks = Vec::new();
    for (ngram, mut file_ids) in merge_ngram_lists(ngram_lists) {
        file_ids.sort_unstable();
        file_ids.dedup();
        compress_tasks.push(CompressTask {
            ngram,
            file_ids,
        });
    }

    info!("Merging completed in {:?}", merge_start.elapsed());
    info!("Generated {} compression tasks", compress_tasks.len());

    // Send compression tasks
    for task in compress_tasks {
        if compress_tx.send(task).is_err() {
            break;
        }
    }

    // Drop senders to signal completion
    drop(compress_tx);
    drop(compressed_tx);

    // Write index
    let header = IndexHeader {
        magic: 0x424749_494e4445u64, // "BGIINDEX"
        version: 1,
        ngram_size: args.ngram as u8,
        num_ngrams: 0, // Will be filled by writer
        num_files: file_map.len() as u32,
        blocksize: args.blocksize,
        exceptions: args.exceptions,
        minimum: args.minimum,
        hint_type: args.hint_type,
        compressed: true,
        fileid_map_offset: 0,
        hints_offset: 0,
        index_offset: 0,
        index_size: 0,
    };

    info!("Writing index...");
    let mut hints = Vec::new();
    write_index(compressed_rx, &header, &mut hints)?;

    info!("Index build completed in {:?}", start_time.elapsed());

    Ok(())
}
