diff --git a/README.md b/README.md
index fcadadb2..106ff27b 100644
--- a/README.md
+++ b/README.md
@@ -27,16 +27,24 @@ This repository is a work in progress, and still in early development. This repo
 
 ## Installation
 
-To install any component of `gtars`, you must have the rust toolchain installed. You can install it by [following the instructions](https://www.rust-lang.org/tools/install).
+To install `gtars`, you must first [install the rust toolchain](https://www.rust-lang.org/tools/install).
 
 ### Command-line interface
+
 You may build the cli binary locally by navigating to `gtars-cli` and using `cargo build --release`. This will create a binary in `target/release/gtars` at the top level of the workspace. You can then add this to your path, or run it directly.
 
 Alternatively, you can run `cargo install --path gtars-cli` from the top level of the workspace. This will install the binary to your cargo bin directory (usually `~/.cargo/bin`).
 
+We feature-gate binary dependencies maximize compatibility and minimize install size. You can specify features during installation like so:
+
+```
+cargo install --path gtars-cli gtars-cli --features "uniwig tokenizers"
+```
+
 Finally, you can download precompiled binaries from the [releases page](https://github.com/databio/gtars/releases).
 
 ### Python bindings
+
 You can install the Python bindings via pip. First, ensure you have a recent version of pip installed. Then run:
 
 ```bash
@@ -50,6 +58,11 @@ from gtars import __version__
 print(__version__)
 ```
 
+### Dev Python bindings
+
+
+
+
 ## Usage
 
 `gtars` provides several useful tools. There are 3 ways to use `gtars`. 
diff --git a/gtars-core/src/models/region_set.rs b/gtars-core/src/models/region_set.rs
index d7b5ab07..9dfc59e3 100644
--- a/gtars-core/src/models/region_set.rs
+++ b/gtars-core/src/models/region_set.rs
@@ -22,7 +22,9 @@ use bigtools::{BedEntry, BigBedWrite};
 use crate::models::Region;
 #[cfg(feature = "http")]
 use crate::utils::get_dynamic_reader_from_url;
-use crate::utils::{get_chrom_sizes, get_dynamic_reader};
+use crate::utils::get_dynamic_reader;
+#[cfg(feature = "bigbed")]
+use crate::utils::get_chrom_sizes;
 
 ///
 /// RegionSet struct, the representation of the interval region set file,
diff --git a/gtars-core/src/utils.rs b/gtars-core/src/utils.rs
index ed26d524..2a56701c 100644
--- a/gtars-core/src/utils.rs
+++ b/gtars-core/src/utils.rs
@@ -2,14 +2,19 @@ use std::collections::HashMap;
 use std::ffi::OsStr;
 use std::fs::File;
 use std::io::prelude::*;
-use std::io::{BufRead, BufReader, Cursor};
+use std::io::{BufRead, BufReader};
+#[cfg(feature = "http")]
+use std::io::Cursor;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 
 use anyhow::{Context, Result};
-use flate2::read::{GzDecoder, MultiGzDecoder};
+#[cfg(feature = "http")]
+use flate2::read::GzDecoder;
+use flate2::read::MultiGzDecoder;
 #[cfg(feature = "http")]
 use reqwest::blocking::Client;
+#[cfg(feature = "http")]
 use std::error::Error;
 
 use crate::models::region::Region;
diff --git a/gtars-python/README.md b/gtars-python/README.md
new file mode 100644
index 00000000..aff2af7f
--- /dev/null
+++ b/gtars-python/README.md
@@ -0,0 +1,45 @@
+# gtars
+
+This is a Python package that wraps the `gtars` crate so you can call gtars code from Python.
+
+Documentation for Python bindings is hosted at: https://docs.bedbase.org/gtars/
+
+## Brief instructions
+
+To install the development version, you'll have to build it locally. The easy way to build/install is to do this, which reads the configuration in `pyproject.toml` for easy building/installing:
+
+```
+python -m pip install -e .
+```
+
+Or, you can have more control over it in two steps using `maturin` directly.
+Build Python bindings like this:
+
+```console
+cd gtars-python
+python_version=$(python --version | awk '{print $2}' | cut -d '.' -f1-2 )
+maturin build --interpreter $python_version  --release
+```
+
+
+Then install the local wheel that was just built:
+
+```console
+gtars_version=`grep '^version =' Cargo.toml | cut -d '"' -f 2`
+python_version_nodot=$(python --version | awk '{print $2}' | cut -d '.' -f1-2 | tr -d '.')
+ll ../target/wheels/gtars*
+wheel_path=$(find ../target/wheels/gtars-${gtars_version}-cp${python_version_nodot}-cp${python_version_nodot}-manylinux*.whl)
+echo $wheel_path
+pip install --force-reinstall ${wheel_path}
+```
+
+
+## Importing into python
+
+Once installed, you can import and use the package in Python. For example:
+
+```
+from gtars import refget
+sc = refget.digest_fasta("../tests/data/fasta/base.fa")
+sc2 = refget.load_fasta("../tests/data/fasta/base.fa")
+```
diff --git a/gtars-python/py_src/gtars/refget/__init__.pyi b/gtars-python/py_src/gtars/refget/__init__.pyi
index 3aa41f12..c6536955 100644
--- a/gtars-python/py_src/gtars/refget/__init__.pyi
+++ b/gtars-python/py_src/gtars/refget/__init__.pyi
@@ -1,3 +1,18 @@
+"""Type stubs and documentation for the gtars.refget module.
+
+This file serves two purposes:
+
+1. **Type Hints**: Provides type annotations for IDE autocomplete and static
+   type checking tools like mypy.
+
+2. **Documentation**: Contains Google-style docstrings that mkdocstrings uses
+   to generate the API reference documentation website.
+
+Note: The actual implementation is in Rust (gtars-python/src/refget/mod.rs)
+and compiled via PyO3. This stub file provides the Python interface definition
+and structured documentation that tools can parse properly.
+"""
+
 from typing import Union, Optional, List, Type
 from enum import Enum
 from os import PathLike
@@ -93,74 +108,422 @@ class StorageMode(Enum):
     Encoded: int
 
 class GlobalRefgetStore:
-    """
-    A global store for refget sequences, allowing import, retrieval, and storage operations.
+    """A global store for GA4GH refget sequences with lazy-loading support.
+
+    GlobalRefgetStore provides content-addressable storage for reference genome
+    sequences following the GA4GH refget specification. Supports both local and
+    remote stores with on-demand sequence loading.
+
+    Attributes:
+        cache_path: Local directory path where the store is located or cached.
+            None for in-memory stores.
+        remote_url: Remote URL of the store if loaded remotely, None otherwise.
+
+    Examples:
+        Create a new store and import sequences::
+
+            from gtars.refget import GlobalRefgetStore, StorageMode
+            store = GlobalRefgetStore(StorageMode.Encoded)
+            store.import_fasta("genome.fa")
+
+        Load an existing local store::
+
+            store = GlobalRefgetStore.load_local("/data/hg38")
+            seq = store.get_substring("chr1_digest", 0, 1000)
+
+        Load a remote store with caching::
+
+            store = GlobalRefgetStore.load_remote(
+                "/local/cache",
+                "https://example.com/hg38"
+            )
     """
 
-    def __init__(self, mode: StorageMode) -> None: ...
-    def import_fasta(self, file_path: Union[str, PathLike]) -> None:
+    cache_path: Optional[str]
+    remote_url: Optional[str]
+
+    def __init__(self, mode: StorageMode) -> None:
+        """Create a new empty GlobalRefgetStore.
+
+        Args:
+            mode: Storage mode - StorageMode.Raw (uncompressed) or
+                StorageMode.Encoded (bit-packed, space-efficient).
+
+        Example::
+
+            store = GlobalRefgetStore(StorageMode.Encoded)
         """
-        Import a fasta into the GlobalRefgetStore
+        ...
+
+    @classmethod
+    def load_local(cls, cache_path: Union[str, PathLike]) -> "GlobalRefgetStore":
+        """Load a local RefgetStore from a directory.
+
+        Loads metadata from the local store immediately; sequence data is loaded
+        on-demand when first accessed. This is efficient for large genomes where
+        you may only need specific sequences.
+
+        Args:
+            cache_path: Local directory containing the refget store (must have
+                index.json and sequences.farg files).
+
+        Returns:
+            GlobalRefgetStore with metadata loaded, sequences lazy-loaded.
+
+        Raises:
+            IOError: If the store directory or index files cannot be read.
+
+        Example::
+
+            store = GlobalRefgetStore.load_local("/data/hg38_store")
+            seq = store.get_substring("chr1_digest", 0, 1000)
         """
         ...
-    def get_sequence_by_id(self, digest: str) -> Optional[SequenceRecord]:
+
+    @classmethod
+    def load_remote(
+        cls, cache_path: Union[str, PathLike], remote_url: str
+    ) -> "GlobalRefgetStore":
+        """Load a remote RefgetStore with local caching.
+
+        Fetches metadata (index.json, sequences.farg) from a remote URL immediately.
+        Sequence data (.seq files) are downloaded on-demand when first accessed and
+        cached locally. This is ideal for working with large remote genomes where
+        you only need specific sequences.
+
+        Args:
+            cache_path: Local directory to cache downloaded metadata and sequences.
+                Created if it doesn't exist.
+            remote_url: Base URL of the remote refget store (e.g.,
+                "https://example.com/hg38" or "s3://bucket/hg38").
+
+        Returns:
+            GlobalRefgetStore with metadata loaded, sequences fetched on-demand.
+
+        Raises:
+            IOError: If remote metadata cannot be fetched or cache cannot be written.
+
+        Example::
+
+            store = GlobalRefgetStore.load_remote(
+                "/data/cache/hg38",
+                "https://refget-server.com/hg38"
+            )
+            # First access fetches from remote and caches
+            seq = store.get_substring("chr1_digest", 0, 1000)
+            # Second access uses cache
+            seq2 = store.get_substring("chr1_digest", 1000, 2000)
         """
-        Retrieves a sequence record by its SHA512t24u or MD5 digest.
+        ...
+
+    def import_fasta(self, file_path: Union[str, PathLike]) -> None:
+        """Import sequences from a FASTA file into the store.
+
+        Reads all sequences from a FASTA file and adds them to the store.
+        Computes GA4GH digests and creates a sequence collection.
+
+        Args:
+            file_path: Path to the FASTA file.
+
+        Raises:
+            IOError: If the file cannot be read or parsed.
+
+        Example::
+
+            store = GlobalRefgetStore(StorageMode.Encoded)
+            store.import_fasta("genome.fa")
         """
         ...
+
+    def get_sequence_by_id(self, digest: str) -> Optional[SequenceRecord]:
+        """Retrieve a sequence record by its digest (SHA-512/24u or MD5).
+
+        Searches for a sequence by its GA4GH SHA-512/24u digest. If not found
+        and the input looks like an MD5 digest (32 hex characters), tries MD5 lookup.
+
+        Args:
+            digest: Sequence digest (SHA-512/24u base64url or MD5 hex string).
+
+        Returns:
+            The sequence record if found, None otherwise.
+
+        Example::
+
+            record = store.get_sequence_by_id("aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2")
+            if record:
+                print(f"Found: {record.metadata.name}")
+        """
+        ...
+
     def get_sequence_by_collection_and_name(
         self, collection_digest: str, sequence_name: str
     ) -> Optional[SequenceRecord]:
-        """
-        Retrieve a SequenceRecord from the store by its collection digest and name
-        """
+        """Retrieve a sequence by collection digest and sequence name.
 
+        Looks up a sequence within a specific collection using its name
+        (e.g., "chr1", "chrM"). This is useful when you know the genome assembly
+        (collection) and chromosome name.
+
+        Args:
+            collection_digest: The collection's SHA-512/24u digest.
+            sequence_name: Name of the sequence within that collection.
+
+        Returns:
+            The sequence record if found, None otherwise.
+
+        Example::
+
+            record = store.get_sequence_by_collection_and_name(
+                "uC_UorBNf3YUu1YIDainBhI94CedlNeH",
+                "chr1"
+            )
+        """
         ...
+
     def get_substring(self, seq_digest: str, start: int, end: int) -> Optional[str]:
-        """
-        Retrieves a substring from an encoded sequence by its SHA512t24u digest.
+        """Extract a substring from a sequence.
+
+        Retrieves a specific region from a sequence using 0-based, half-open
+        coordinates [start, end). Automatically loads sequence data if not
+        already cached (for lazy-loaded stores).
+
         Args:
-                seq_digest - str - the path to import from
-                start - int - The start index of the substring (inclusive)
-                end - int - The end index of the substring (exclusive)
+            seq_digest: Sequence digest (SHA-512/24u).
+            start: Start position (0-based, inclusive).
+            end: End position (0-based, exclusive).
+
         Returns:
-                substring - str - returns substring if found, None if not found
+            The substring sequence if found, None otherwise.
+
+        Example::
+
+            # Get first 1000 bases of chr1
+            seq = store.get_substring("chr1_digest", 0, 1000)
+            print(f"First 50bp: {seq[:50]}")
+        """
+        ...
+
+    def list_sequences(self) -> List[SequenceMetadata]:
+        """List all sequence metadata in the store.
 
+        Returns:
+            List of metadata for all sequences in the store.
         """
         ...
+
+    def list_collections(self) -> List[SequenceCollection]:
+        """List all sequence collections in the store.
+
+        Returns:
+            List of all sequence collections.
+        """
+        ...
+
     def write_store_to_directory(
         self, root_path: Union[str, PathLike], seqdata_path_template: str
     ) -> None:
+        """Write the store to a directory on disk.
+
+        Persists the store with all sequences and metadata to disk using the
+        RefgetStore directory format.
+
+        Args:
+            root_path: Directory path to write the store to.
+            seqdata_path_template: Path template for sequence files (e.g.,
+                "sequences/%s2/%s.seq" where %s2 = first 2 chars of digest,
+                %s = full digest).
+
+        Example::
+
+            store.write_store_to_directory(
+                "/data/my_store",
+                "sequences/%s2/%s.seq"
+            )
         """
-        Write a GlobalRefgetStore object to a directory
+        ...
+
+    def get_seqs_bed_file(
+        self,
+        collection_digest: str,
+        bed_file_path: Union[str, PathLike],
+        output_fasta_path: Union[str, PathLike],
+    ) -> None:
+        """Extract sequences for BED regions and write to FASTA.
+
+        Args:
+            collection_digest: Collection digest to look up sequence names.
+            bed_file_path: Path to BED file with regions.
+            output_fasta_path: Path to write output FASTA file.
         """
         ...
-    @classmethod
-    def load_from_directory(
-        cls: Type["GlobalRefgetStore"], root_path: Union[str, PathLike]
-    ) -> "GlobalRefgetStore":
+
+    def get_seqs_bed_file_to_vec(
+        self, collection_digest: str, bed_file_path: Union[str, PathLike]
+    ) -> List[RetrievedSequence]:
+        """Extract sequences for BED regions and return as list.
+
+        Args:
+            collection_digest: Collection digest to look up sequence names.
+            bed_file_path: Path to BED file with regions.
+
+        Returns:
+            List of retrieved sequence segments.
         """
-        Load a GlobalRefgetStore from a directory path
+        ...
+
+    def export_fasta(
+        self,
+        collection_digest: str,
+        output_path: Union[str, PathLike],
+        sequence_names: Optional[List[str]] = None,
+        line_width: Optional[int] = None,
+    ) -> None:
+        """Export sequences from a collection to a FASTA file.
+
+        Args:
+            collection_digest: Collection to export from.
+            output_path: Path to write FASTA file.
+            sequence_names: Optional list of sequence names to export. If None,
+                exports all sequences in the collection.
+            line_width: Optional line width for wrapping sequences. If None,
+                uses default of 80.
         """
+        ...
 
+    def export_fasta_by_digests(
+        self,
+        digests: List[str],
+        output_path: Union[str, PathLike],
+        line_width: Optional[int] = None,
+    ) -> None:
+        """Export sequences by their digests to a FASTA file.
+
+        Args:
+            digests: List of sequence digests to export.
+            output_path: Path to write FASTA file.
+            line_width: Optional line width for wrapping sequences. If None,
+                uses default of 80.
+        """
         ...
+
     def __str__(self) -> str: ...
     def __repr__(self) -> str: ...
 
 def sha512t24u_digest(readable: Union[str, bytes]) -> str:
-    """
-    Computes the GA4GH SHA512t24u digest for a given string or bytes.
+    """Compute the GA4GH SHA-512/24u digest for a sequence.
+
+    This function computes the GA4GH refget standard digest (truncated SHA-512,
+    base64url encoded) for a given sequence string or bytes.
+
+    Args:
+        readable: Input sequence as str or bytes.
+
+    Returns:
+        The SHA-512/24u digest (32 character base64url string).
+
+    Raises:
+        TypeError: If input is not str or bytes.
+
+    Example::
+        from gtars.refget import sha512t24u_digest
+        digest = sha512t24u_digest("ACGT")
+        print(digest)
+        # Output: 'aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2'
     """
     ...
 
 def md5_digest(readable: Union[str, bytes]) -> str:
-    """
-    Computes the MD5 digest for a given string or bytes.
+    """Compute the MD5 digest for a sequence.
+
+    This function computes the MD5 hash for a given sequence string or bytes.
+    MD5 is supported for backward compatibility with legacy systems.
+
+    Args:
+        readable: Input sequence as str or bytes.
+
+    Returns:
+        The MD5 digest (32 character hexadecimal string).
+
+    Raises:
+        TypeError: If input is not str or bytes.
+
+    Example::
+        from gtars.refget import md5_digest
+        digest = md5_digest("ACGT")
+        print(digest)
+        # Output: 'f1f8f4bf413b16ad135722aa4591043e'
     """
     ...
 
 def digest_fasta(fasta: Union[str, PathLike]) -> SequenceCollection:
+    """Digest all sequences in a FASTA file and compute collection-level digests.
+
+    This function reads a FASTA file and computes GA4GH-compliant digests for
+    each sequence, as well as collection-level digests (Level 1 and Level 2)
+    following the GA4GH refget specification.
+
+    Args:
+        fasta: Path to FASTA file (str or PathLike).
+
+    Returns:
+        Collection containing all sequences with their metadata and computed digests.
+
+    Raises:
+        IOError: If the FASTA file cannot be read or parsed.
+
+    Example::
+        from gtars.refget import digest_fasta
+        collection = digest_fasta("genome.fa")
+        print(f"Collection digest: {collection.digest}")
+        print(f"Number of sequences: {len(collection)}")
+    """
+    ...
+
+def compute_fai(fasta: Union[str, PathLike]) -> List["FaiRecord"]:
+    """Compute FASTA index (FAI) metadata for all sequences in a FASTA file.
+
+    This function computes the FAI index metadata (offset, line_bases, line_bytes)
+    for each sequence in a FASTA file, compatible with samtools faidx format.
+    Only works with uncompressed FASTA files.
+
+    Args:
+        fasta: Path to FASTA file (str or PathLike). Must be uncompressed.
+
+    Returns:
+        List of FAI records, one per sequence, containing name, length,
+        and FAI metadata (offset, line_bases, line_bytes).
+
+    Raises:
+        IOError: If the FASTA file cannot be read or is compressed.
+
+    Example::
+        from gtars.refget import compute_fai
+        fai_records = compute_fai("genome.fa")
+        for record in fai_records:
+        ...     print(f"{record.name}: {record.length} bp")
     """
-    Digests a FASTA file and returns a SequenceCollection object.
+    ...
+
+def load_fasta(fasta: Union[str, PathLike]) -> SequenceCollection:
+    """Load a FASTA file with sequence data into a SequenceCollection.
+
+    This function reads a FASTA file and loads all sequences with their data
+    into memory. Unlike digest_fasta(), this includes the actual sequence data,
+    not just metadata.
+
+    Args:
+        fasta: Path to FASTA file (str or PathLike).
+
+    Returns:
+        Collection containing all sequences with their metadata and sequence data loaded.
+
+    Raises:
+        IOError: If the FASTA file cannot be read or parsed.
+
+    Example::
+        from gtars.refget import load_fasta
+        collection = load_fasta("genome.fa")
+        first_seq = collection[0]
+        print(f"Sequence: {first_seq.data[:50]}...")
     """
     ...
diff --git a/gtars-python/src/refget/mod.rs b/gtars-python/src/refget/mod.rs
index 28d3746e..27de93cb 100644
--- a/gtars-python/src/refget/mod.rs
+++ b/gtars-python/src/refget/mod.rs
@@ -8,13 +8,18 @@ use pyo3::types::{PyBytes, PyString, PyType};
 
 use gtars_refget::alphabet::AlphabetType;
 use gtars_refget::collection::{
-    SeqColDigestLvl1, SequenceCollection, SequenceMetadata, SequenceRecord,
+    FaiMetadata, SeqColDigestLvl1, SequenceCollection, SequenceMetadata, SequenceRecord,
 };
 use gtars_refget::digest::{md5, sha512t24u};
+use gtars_refget::fasta::FaiRecord;
 use gtars_refget::store::GlobalRefgetStore;
 use gtars_refget::store::StorageMode;
 // use gtars::refget::store::RetrievedSequence; // This is the Rust-native struct
 
+/// Compute the GA4GH SHA-512/24u digest for a sequence.
+///
+/// Accepts either a string or bytes and returns the truncated SHA-512 digest
+/// as a 32-character base64url string.
 #[pyfunction]
 pub fn sha512t24u_digest(readable: &Bound<'_, PyAny>) -> PyResult<String> {
     if let Ok(s) = readable.cast::<PyString>() {
@@ -26,6 +31,10 @@ pub fn sha512t24u_digest(readable: &Bound<'_, PyAny>) -> PyResult<String> {
     }
 }
 
+/// Compute the MD5 digest for a sequence.
+///
+/// Accepts either a string or bytes and returns the MD5 hash as a 32-character
+/// hexadecimal string. Supported for backward compatibility with legacy systems.
 #[pyfunction]
 pub fn md5_digest(readable: &Bound<'_, PyAny>) -> PyResult<String> {
     if let Ok(s) = readable.cast::<PyString>() {
@@ -37,8 +46,10 @@ pub fn md5_digest(readable: &Bound<'_, PyAny>) -> PyResult<String> {
     }
 }
 
-// This can take either a PosixPath or a string
-// The `&Bound<'_, PyAny>` references any Python object, bound to the Python runtime.
+/// Read a FASTA file and compute GA4GH digests for all sequences.
+///
+/// Returns a SequenceCollection with computed sequence-level and collection-level
+/// digests following the GA4GH refget specification.
 #[pyfunction]
 pub fn digest_fasta(fasta: &Bound<'_, PyAny>) -> PyResult<PySequenceCollection> {
     let fasta = fasta.to_string();
@@ -51,6 +62,38 @@ pub fn digest_fasta(fasta: &Bound<'_, PyAny>) -> PyResult<PySequenceCollection>
     }
 }
 
+/// Compute FASTA index (FAI) metadata for all sequences in a FASTA file.
+///
+/// Returns a list of FAI records compatible with samtools faidx format.
+/// Only works with uncompressed FASTA files.
+#[pyfunction]
+pub fn compute_fai(fasta: &Bound<'_, PyAny>) -> PyResult<Vec<PyFaiRecord>> {
+    let fasta = fasta.to_string();
+    match gtars_refget::fasta::compute_fai(&fasta) {
+        Ok(fai_records) => Ok(fai_records.into_iter().map(PyFaiRecord::from).collect()),
+        Err(e) => Err(PyErr::new::<pyo3::exceptions::PyIOError, _>(format!(
+            "Error computing FAI: {}",
+            e
+        ))),
+    }
+}
+
+/// Load a FASTA file with sequence data into a SequenceCollection.
+///
+/// Unlike digest_fasta(), this loads the actual sequence data into memory,
+/// not just metadata and digests.
+#[pyfunction]
+pub fn load_fasta(fasta: &Bound<'_, PyAny>) -> PyResult<PySequenceCollection> {
+    let fasta = fasta.to_string();
+    match gtars_refget::fasta::load_fasta(&fasta) {
+        Ok(sequence_collection) => Ok(PySequenceCollection::from(sequence_collection)),
+        Err(e) => Err(PyErr::new::<pyo3::exceptions::PyIOError, _>(format!(
+            "Error loading FASTA file: {}",
+            e
+        ))),
+    }
+}
+
 #[pyclass(name = "AlphabetType")]
 #[derive(Clone)]
 pub enum PyAlphabetType {
@@ -75,6 +118,30 @@ pub struct PySequenceMetadata {
     pub md5: String,
     #[pyo3(get, set)]
     pub alphabet: PyAlphabetType,
+    #[pyo3(get, set)]
+    pub fai: Option<PyFaiMetadata>,
+}
+
+#[pyclass(name = "FaiMetadata")]
+#[derive(Clone)]
+pub struct PyFaiMetadata {
+    #[pyo3(get, set)]
+    pub offset: u64,
+    #[pyo3(get, set)]
+    pub line_bases: u32,
+    #[pyo3(get, set)]
+    pub line_bytes: u32,
+}
+
+#[pyclass(name = "FaiRecord")]
+#[derive(Clone)]
+pub struct PyFaiRecord {
+    #[pyo3(get, set)]
+    pub name: String,
+    #[pyo3(get, set)]
+    pub length: usize,
+    #[pyo3(get, set)]
+    pub fai: Option<PyFaiMetadata>,
 }
 
 #[pyclass(name = "SequenceRecord")]
@@ -83,7 +150,7 @@ pub struct PySequenceRecord {
     #[pyo3(get, set)]
     pub metadata: PySequenceMetadata,
     #[pyo3(get, set)]
-    pub data: Option<Vec<u8>>,
+    pub sequence: Option<Vec<u8>>,
 }
 
 #[pyclass(name = "SeqColDigestLvl1")]
@@ -108,8 +175,6 @@ pub struct PySequenceCollection {
     pub lvl1: PySeqColDigestLvl1,
     #[pyo3(get, set)]
     pub file_path: Option<String>,
-    #[pyo3(get, set)]
-    pub has_data: bool,
 }
 
 #[pyclass(name = "RetrievedSequence")]
@@ -182,7 +247,10 @@ impl PyAlphabetType {
 #[pymethods]
 impl PySequenceMetadata {
     fn __repr__(&self) -> String {
-        format!("<SequenceMetadata for {}>", self.name)
+        format!(
+            "SequenceMetadata(name='{}', length={}, sha512t24u='{}', md5='{}', alphabet={})",
+            self.name, self.length, self.sha512t24u, self.md5, self.alphabet.__str__()
+        )
     }
 
     fn __str__(&self) -> PyResult<String> {
@@ -193,21 +261,111 @@ impl PySequenceMetadata {
     }
 }
 
+#[pymethods]
+impl PyFaiMetadata {
+    fn __repr__(&self) -> String {
+        format!("<FaiMetadata offset={} line_bases={} line_bytes={}>",
+                self.offset, self.line_bases, self.line_bytes)
+    }
+
+    fn __str__(&self) -> String {
+        format!(
+            "FaiMetadata:\n  offset: {}\n  line_bases: {}\n  line_bytes: {}",
+            self.offset, self.line_bases, self.line_bytes
+        )
+    }
+}
+
+#[pymethods]
+impl PyFaiRecord {
+    fn __repr__(&self) -> String {
+        format!("<FaiRecord name='{}' length={}>", self.name, self.length)
+    }
+
+    fn __str__(&self) -> String {
+        let fai_str = if let Some(ref fai) = self.fai {
+            format!("\n  FAI offset: {}\n  FAI line_bases: {}\n  FAI line_bytes: {}",
+                    fai.offset, fai.line_bases, fai.line_bytes)
+        } else {
+            "\n  FAI: None (gzipped file)".to_string()
+        };
+        format!("FaiRecord:\n  name: {}\n  length: {}{}",
+                self.name, self.length, fai_str)
+    }
+}
+
 #[pymethods]
 impl PySequenceRecord {
     fn __repr__(&self) -> String {
-        format!("<SequenceRecord for {}>", self.metadata.name)
+        format!(
+            "SequenceRecord(name='{}', length={}, sha512t24u='{}', has_data={})",
+            self.metadata.name,
+            self.metadata.length,
+            self.metadata.sha512t24u,
+            self.sequence.is_some()
+        )
     }
 
     fn __str__(&self) -> String {
         format!("SequenceRecord for {}", self.metadata.name)
     }
+
+    /// Decode the sequence data to a string.
+    ///
+    /// This method decodes the sequence data stored in this record. It handles
+    /// both raw (uncompressed UTF-8) and encoded (bit-packed) data automatically
+    /// based on the alphabet type.
+    ///
+    /// Returns:
+    ///     Optional[str]: The decoded sequence string if data is loaded, None otherwise.
+    ///
+    /// Example:
+    ///     >>> record = store.get_sequence_by_collection_and_name(digest, "chr1")
+    ///     >>> sequence = record.decode()
+    ///     >>> if sequence:
+    ///     ...     print(f"First 50 bases: {sequence[:50]}")
+    pub fn decode(&self) -> Option<String> {
+        // Convert PySequenceRecord to Rust SequenceRecord
+        let metadata = SequenceMetadata {
+            name: self.metadata.name.clone(),
+            length: self.metadata.length,
+            sha512t24u: self.metadata.sha512t24u.clone(),
+            md5: self.metadata.md5.clone(),
+            alphabet: match self.metadata.alphabet {
+                PyAlphabetType::Dna2bit => AlphabetType::Dna2bit,
+                PyAlphabetType::Dna3bit => AlphabetType::Dna3bit,
+                PyAlphabetType::DnaIupac => AlphabetType::DnaIupac,
+                PyAlphabetType::Protein => AlphabetType::Protein,
+                PyAlphabetType::Ascii => AlphabetType::Ascii,
+                PyAlphabetType::Unknown => AlphabetType::Unknown,
+            },
+            fai: self.metadata.fai.as_ref().map(|fai| FaiMetadata {
+                offset: fai.offset,
+                line_bases: fai.line_bases,
+                line_bytes: fai.line_bytes,
+            }),
+        };
+
+        let rust_record = match &self.sequence {
+            None => SequenceRecord::Stub(metadata),
+            Some(seq) => SequenceRecord::Full {
+                metadata,
+                sequence: seq.clone(),
+            },
+        };
+
+        // Call the Rust decode method
+        rust_record.decode()
+    }
 }
 
 #[pymethods]
 impl PySeqColDigestLvl1 {
     fn __repr__(&self) -> String {
-        "<SeqColDigestLvl1>".to_string()
+        format!(
+            "SeqColDigestLvl1(sequences='{}', names='{}', lengths='{}')",
+            self.sequences_digest, self.names_digest, self.lengths_digest
+        )
     }
 
     fn __str__(&self) -> String {
@@ -222,8 +380,9 @@ impl PySeqColDigestLvl1 {
 impl PySequenceCollection {
     fn __repr__(&self) -> String {
         format!(
-            "<SequenceCollection with {} sequences>",
-            self.sequences.len()
+            "SequenceCollection(n_sequences={}, digest='{}')",
+            self.sequences.len(),
+            self.digest
         )
     }
 
@@ -253,6 +412,103 @@ impl PySequenceCollection {
             ))
         }
     }
+
+    /// Write the collection to a FASTA file.
+    ///
+    /// Args:
+    ///     file_path (str): Path to the output FASTA file
+    ///     line_width (int, optional): Number of bases per line (default: 70)
+    ///
+    /// Raises:
+    ///     IOError: If any sequence doesn't have data loaded
+    ///
+    /// Example:
+    ///     >>> collection = load_fasta("genome.fa")
+    ///     >>> collection.write_fasta("output.fa")
+    ///     >>> collection.write_fasta("output.fa", line_width=60)
+    fn write_fasta(&self, file_path: &str, line_width: Option<usize>) -> PyResult<()> {
+        // Convert Python sequences back to Rust SequenceCollection
+        let rust_collection = SequenceCollection {
+            sequences: self.sequences.iter().map(|py_rec| {
+                let metadata = SequenceMetadata {
+                    name: py_rec.metadata.name.clone(),
+                    length: py_rec.metadata.length,
+                    sha512t24u: py_rec.metadata.sha512t24u.clone(),
+                    md5: py_rec.metadata.md5.clone(),
+                    alphabet: match &py_rec.metadata.alphabet {
+                        PyAlphabetType::Dna2bit => AlphabetType::Dna2bit,
+                        PyAlphabetType::Dna3bit => AlphabetType::Dna3bit,
+                        PyAlphabetType::DnaIupac => AlphabetType::DnaIupac,
+                        PyAlphabetType::Protein => AlphabetType::Protein,
+                        PyAlphabetType::Ascii => AlphabetType::Ascii,
+                        PyAlphabetType::Unknown => AlphabetType::Unknown,
+                    },
+                    fai: py_rec.metadata.fai.as_ref().map(|f| FaiMetadata {
+                        offset: f.offset,
+                        line_bases: f.line_bases,
+                        line_bytes: f.line_bytes,
+                    }),
+                };
+
+                match &py_rec.sequence {
+                    None => SequenceRecord::Stub(metadata),
+                    Some(seq) => SequenceRecord::Full {
+                        metadata,
+                        sequence: seq.clone(),
+                    },
+                }
+            }).collect(),
+            digest: self.digest.clone(),
+            lvl1: SeqColDigestLvl1 {
+                sequences_digest: self.lvl1.sequences_digest.clone(),
+                names_digest: self.lvl1.names_digest.clone(),
+                lengths_digest: self.lvl1.lengths_digest.clone(),
+            },
+            file_path: None,
+        };
+
+        rust_collection
+            .write_fasta(file_path, line_width)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!(
+                "Failed to write FASTA: {}",
+                e
+            )))
+    }
+
+    /// Iterate over sequences in the collection.
+    ///
+    /// Allows Pythonic iteration: `for seq in collection:`
+    ///
+    /// Yields:
+    ///     SequenceRecord: Each sequence record in the collection.
+    ///
+    /// Example:
+    ///     >>> collection = digest_fasta("genome.fa")
+    ///     >>> for seq in collection:
+    ///     ...     print(f"{seq.metadata.name}: {seq.metadata.length} bp")
+    fn __iter__(slf: PyRef<'_, Self>) -> PyResult<Py<PySequenceCollectionIterator>> {
+        let py = slf.py();
+        Py::new(py, PySequenceCollectionIterator {
+            iter: slf.sequences.clone().into_iter(),
+        })
+    }
+}
+
+/// Iterator for SequenceCollection - simple wrapper around Vec iterator.
+#[pyclass]
+pub struct PySequenceCollectionIterator {
+    iter: std::vec::IntoIter<PySequenceRecord>,
+}
+
+#[pymethods]
+impl PySequenceCollectionIterator {
+    fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
+        slf
+    }
+
+    fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<PySequenceRecord> {
+        slf.iter.next()
+    }
 }
 
 // Conversion from Rust AlphabetType to Python PyAlphabetType
@@ -269,6 +525,28 @@ impl From<AlphabetType> for PyAlphabetType {
     }
 }
 
+// Conversion from Rust FaiMetadata to Python PyFaiMetadata
+impl From<FaiMetadata> for PyFaiMetadata {
+    fn from(value: FaiMetadata) -> Self {
+        PyFaiMetadata {
+            offset: value.offset,
+            line_bases: value.line_bases,
+            line_bytes: value.line_bytes,
+        }
+    }
+}
+
+// Conversion from Rust FaiRecord to Python PyFaiRecord
+impl From<FaiRecord> for PyFaiRecord {
+    fn from(value: FaiRecord) -> Self {
+        PyFaiRecord {
+            name: value.name,
+            length: value.length,
+            fai: value.fai.map(PyFaiMetadata::from),
+        }
+    }
+}
+
 // Conversion from Rust SequenceMetadata to Python PySequenceMetadata
 impl From<SequenceMetadata> for PySequenceMetadata {
     fn from(value: SequenceMetadata) -> Self {
@@ -278,6 +556,7 @@ impl From<SequenceMetadata> for PySequenceMetadata {
             sha512t24u: value.sha512t24u,
             md5: value.md5,
             alphabet: PyAlphabetType::from(value.alphabet),
+            fai: value.fai.map(PyFaiMetadata::from),
         }
     }
 }
@@ -285,9 +564,15 @@ impl From<SequenceMetadata> for PySequenceMetadata {
 // Conversion from Rust SequenceRecord to Python PySequenceRecord
 impl From<SequenceRecord> for PySequenceRecord {
     fn from(value: SequenceRecord) -> Self {
-        PySequenceRecord {
-            metadata: PySequenceMetadata::from(value.metadata),
-            data: value.data,
+        match value {
+            SequenceRecord::Stub(metadata) => PySequenceRecord {
+                metadata: PySequenceMetadata::from(metadata),
+                sequence: None,
+            },
+            SequenceRecord::Full { metadata, sequence } => PySequenceRecord {
+                metadata: PySequenceMetadata::from(metadata),
+                sequence: Some(sequence),
+            },
         }
     }
 }
@@ -315,7 +600,6 @@ impl From<SequenceCollection> for PySequenceCollection {
             digest: value.digest,
             lvl1: PySeqColDigestLvl1::from(value.lvl1),
             file_path: value.file_path.map(|p| p.to_string_lossy().to_string()),
-            has_data: value.has_data,
         }
     }
 }
@@ -352,20 +636,137 @@ pub struct PyGlobalRefgetStore {
 
 #[pymethods]
 impl PyGlobalRefgetStore {
-    #[new]
-    fn new(mode: PyStorageMode) -> Self {
+    /// Create a disk-backed RefgetStore.
+    ///
+    /// Sequences are written to disk immediately and loaded on-demand (lazy loading).
+    /// Only metadata is kept in memory.
+    ///
+    /// Args:
+    ///     cache_path (str or Path): Directory for storing sequences and metadata
+    ///     mode: Storage mode (StorageMode.Raw or StorageMode.Encoded)
+    ///
+    /// Returns:
+    ///     GlobalRefgetStore: A configured disk-backed store
+    ///
+    /// Example:
+    ///     >>> from gtars.refget import GlobalRefgetStore
+    ///     >>> store = GlobalRefgetStore.on_disk("/data/store")
+    ///     >>> store.add_sequence_collection_from_fasta("genome.fa")
+    #[classmethod]
+    fn on_disk(_cls: &Bound<'_, PyType>, cache_path: &Bound<'_, PyAny>) -> PyResult<Self> {
+        let cache_path = cache_path.to_string();
+        let store = GlobalRefgetStore::on_disk(cache_path).map_err(|e| {
+            PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error with disk-backed store: {}", e))
+        })?;
+        Ok(Self { inner: store })
+    }
+
+    /// Create an in-memory RefgetStore.
+    ///
+    /// All sequences kept in RAM for fast access.
+    /// Defaults to Encoded storage mode (2-bit packing for space efficiency).
+    /// Use set_mode() to change storage mode after creation.
+    ///
+    /// Returns:
+    ///     GlobalRefgetStore: A new in-memory store
+    ///
+    /// Example:
+    ///     >>> from gtars.refget import GlobalRefgetStore
+    ///     >>> store = GlobalRefgetStore.in_memory()
+    ///     >>> store.add_sequence_collection_from_fasta("genome.fa")
+    #[classmethod]
+    fn in_memory(_cls: &Bound<'_, PyType>) -> Self {
         Self {
-            inner: GlobalRefgetStore::new(mode.into()),
+            inner: GlobalRefgetStore::in_memory(),
         }
     }
 
-    fn import_fasta(&mut self, file_path: &str) -> PyResult<()> {
-        self.inner.import_fasta(file_path).map_err(|e| {
+    /// Change the storage mode, re-encoding/decoding existing sequences as needed.
+    ///
+    /// When switching from Raw to Encoded:
+    /// - All Full sequences in memory are encoded (2-bit packed)
+    ///
+    /// When switching from Encoded to Raw:
+    /// - All Full sequences in memory are decoded back to raw bytes
+    ///
+    /// Args:
+    ///     mode: The storage mode to switch to (StorageMode.Raw or StorageMode.Encoded)
+    ///
+    /// Example:
+    ///     >>> from gtars.refget import GlobalRefgetStore, StorageMode
+    ///     >>> store = GlobalRefgetStore.in_memory()
+    ///     >>> store.set_mode(StorageMode.Raw)
+    fn set_mode(&mut self, mode: PyStorageMode) {
+        self.inner.set_mode(mode.into());
+    }
+
+    /// Enable 2-bit encoding for space efficiency.
+    /// Re-encodes any existing Raw sequences in memory.
+    ///
+    /// Example:
+    ///     >>> store = GlobalRefgetStore.in_memory()
+    ///     >>> store.disable_encoding()  # Switch to Raw
+    ///     >>> store.enable_encoding()   # Back to Encoded
+    fn enable_encoding(&mut self) {
+        self.inner.enable_encoding();
+    }
+
+    /// Disable encoding, use raw byte storage.
+    /// Decodes any existing Encoded sequences in memory.
+    ///
+    /// Example:
+    ///     >>> store = GlobalRefgetStore.in_memory()
+    ///     >>> store.disable_encoding()  # Switch to Raw mode
+    fn disable_encoding(&mut self) {
+        self.inner.disable_encoding();
+    }
+
+    /// Add a sequence collection from a FASTA file.
+    ///
+    /// Reads a FASTA file, digests the sequences, creates a SequenceCollection,
+    /// and adds it to the store along with all its sequences.
+    ///
+    /// Args:
+    ///     file_path (str or Path): Path to the FASTA file to import.
+    ///     force (bool, optional): If True, overwrite existing collections/sequences.
+    ///                            If False (default), skip duplicates.
+    ///
+    /// Raises:
+    ///     IOError: If the file cannot be read or processed.
+    ///
+    /// Example:
+    ///     >>> store = GlobalRefgetStore.in_memory()
+    ///     >>> store.add_sequence_collection_from_fasta("genome.fa")  # skip duplicates
+    ///     >>> store.add_sequence_collection_from_fasta("genome.fa", force=True)  # overwrite
+    #[pyo3(signature = (file_path, force=false))]
+    fn add_sequence_collection_from_fasta(&mut self, file_path: &Bound<'_, PyAny>, force: bool) -> PyResult<()> {
+        let file_path = file_path.to_string();
+        let result = if force {
+            self.inner.add_sequence_collection_from_fasta_force(file_path)
+        } else {
+            self.inner.add_sequence_collection_from_fasta(file_path)
+        };
+        result.map_err(|e| {
             PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error importing FASTA: {}", e))
         })
     }
 
-    fn get_sequence_by_id(&self, digest: &str) -> PyResult<Option<PySequenceRecord>> {
+    /// Retrieve a sequence record by its digest (SHA-512/24u or MD5).
+    ///
+    /// Searches for a sequence by its GA4GH SHA-512/24u digest. If not found
+    /// and the input looks like an MD5 digest (32 hex characters), tries MD5 lookup.
+    ///
+    /// Args:
+    ///     digest: Sequence digest (SHA-512/24u base64url or MD5 hex string).
+    ///
+    /// Returns:
+    ///     Optional[SequenceRecord]: The sequence record if found, None otherwise.
+    ///
+    /// Example:
+    ///     >>> record = store.get_sequence_by_id("aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2")
+    ///     >>> if record:
+    ///     ...     print(f"Found: {record.metadata.name}")
+    fn get_sequence_by_id(&mut self, digest: &str) -> PyResult<Option<PySequenceRecord>> {
         // Try as SHA512t24u first (32 bytes)
         let result = self
             .inner
@@ -383,8 +784,26 @@ impl PyGlobalRefgetStore {
         Ok(result)
     }
 
+    /// Retrieve a sequence by collection digest and sequence name.
+    ///
+    /// Looks up a sequence within a specific collection using its name
+    /// (e.g., "chr1", "chrM"). This is useful when you know the genome assembly
+    /// (collection) and chromosome name.
+    ///
+    /// Args:
+    ///     collection_digest: The collection's SHA-512/24u digest.
+    ///     sequence_name: Name of the sequence within that collection.
+    ///
+    /// Returns:
+    ///     Optional[SequenceRecord]: The sequence record if found, None otherwise.
+    ///
+    /// Example:
+    ///     >>> record = store.get_sequence_by_collection_and_name(
+    ///     ...     "uC_UorBNf3YUu1YIDainBhI94CedlNeH",
+    ///     ...     "chr1"
+    ///     ... )
     fn get_sequence_by_collection_and_name(
-        &self,
+        &mut self,
         collection_digest: &str,
         sequence_name: &str,
     ) -> Option<PySequenceRecord> {
@@ -393,82 +812,477 @@ impl PyGlobalRefgetStore {
             .map(|record| PySequenceRecord::from(record.clone()))
     }
 
-    fn get_substring(&self, seq_digest: &str, start: usize, end: usize) -> Option<String> {
+    /// Extract a substring from a sequence.
+    ///
+    /// Retrieves a specific region from a sequence using 0-based, half-open
+    /// coordinates [start, end). Automatically loads sequence data if not
+    /// already cached (for lazy-loaded stores).
+    ///
+    /// Args:
+    ///     seq_digest: Sequence digest (SHA-512/24u).
+    ///     start: Start position (0-based, inclusive).
+    ///     end: End position (0-based, exclusive).
+    ///
+    /// Returns:
+    ///     Optional[str]: The substring sequence if found, None otherwise.
+    ///
+    /// Example:
+    ///     >>> # Get first 1000 bases of chr1
+    ///     >>> seq = store.get_substring("chr1_digest", 0, 1000)
+    ///     >>> print(f"First 50bp: {seq[:50]}")
+    fn get_substring(&mut self, seq_digest: &str, start: usize, end: usize) -> Option<String> {
         self.inner.get_substring(seq_digest, start, end)
     }
 
-    fn write_store_to_directory(
+    #[getter]
+    fn cache_path(&self) -> Option<String> {
+        self.inner.local_path().map(|p| p.display().to_string())
+    }
+
+    #[getter]
+    fn remote_url(&self) -> Option<String> {
+        self.inner.remote_source().map(|s| s.to_string())
+    }
+
+    #[getter]
+    fn storage_mode(&self) -> PyStorageMode {
+        self.inner.storage_mode().into()
+    }
+
+    /// Returns a list of sequence metadata for all sequences in the store.
+    ///
+    /// This is a lightweight operation that returns only metadata (name, length, digests)
+    /// without loading sequence data. Use `sequence_records()` if you need full records.
+    ///
+    /// Returns:
+    ///     list[SequenceMetadata]: List of sequence metadata objects.
+    ///
+    /// Example:
+    ///     >>> for metadata in store.sequence_metadata():
+    ///     ...     print(f"{metadata.name}: {metadata.length} bp")
+    fn sequence_metadata(&self) -> Vec<PySequenceMetadata> {
+        self.inner
+            .sequence_metadata()
+            .map(|meta| PySequenceMetadata::from(meta.clone()))
+            .collect()
+    }
+
+    /// Returns a list of complete sequence records from the store.
+    ///
+    /// This returns full `SequenceRecord` objects including both metadata and sequence data.
+    /// Use `sequence_metadata()` if you only need metadata (more efficient).
+    ///
+    /// Returns:
+    ///     list[SequenceRecord]: List of sequence record objects.
+    ///
+    /// Example:
+    ///     >>> for record in store.sequence_records():
+    ///     ...     print(f"{record.metadata.name}: {record.decode()}")
+    fn sequence_records(&self) -> Vec<PySequenceRecord> {
+        self.inner
+            .sequence_records()
+            .map(|rec| PySequenceRecord::from(rec.clone()))
+            .collect()
+    }
+
+    fn collections(&self) -> Vec<PySequenceCollection> {
+        self.inner
+            .collections()
+            .map(|col| PySequenceCollection::from(col.clone()))
+            .collect()
+    }
+
+    /// Returns statistics about the store.
+    ///
+    /// Returns:
+    ///     dict: Dictionary with keys 'n_sequences', 'n_collections', 'storage_mode'
+    ///
+    /// Example:
+    ///     >>> stats = store.stats()
+    ///     >>> print(f"Store has {stats['n_sequences']} sequences in {stats['n_collections']} collections")
+    ///     >>> print(f"Storage mode: {stats['storage_mode']}")
+    fn stats(&self) -> std::collections::HashMap<String, String> {
+        let (n_sequences, n_collections, mode_str) = self.inner.stats();
+        let mut stats = std::collections::HashMap::new();
+        stats.insert("n_sequences".to_string(), n_sequences.to_string());
+        stats.insert("n_collections".to_string(), n_collections.to_string());
+        stats.insert("storage_mode".to_string(), mode_str.to_string());
+        stats
+    }
+
+    /// Write the store using its configured paths.
+    ///
+    /// Convenience method for disk-backed stores.
+    /// Uses the store's own local_path and seqdata_path_template.
+    fn write(&self) -> PyResult<()> {
+        self.inner.write().map_err(|e| {
+            PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error writing store: {}", e))
+        })
+    }
+
+    #[pyo3(signature = (root_path, seqdata_path_template=None))]
+    fn write_store_to_dir(
         &self,
-        root_path: &str,
-        seqdata_path_template: &str,
+        root_path: &Bound<'_, PyAny>,
+        seqdata_path_template: Option<&str>,
     ) -> PyResult<()> {
+        let root_path = root_path.to_string();
         self.inner
-            .write_store_to_directory(root_path, seqdata_path_template)
+            .write_store_to_dir(root_path, seqdata_path_template)
             .map_err(|e| {
                 PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error writing store: {}", e))
             })
     }
 
+    /// Load a local RefgetStore from a directory.
+    ///
+    /// Loads metadata from the local store immediately; sequence data is loaded
+    /// on-demand when first accessed. This is efficient for large genomes where
+    /// you may only need specific sequences.
+    ///
+    /// Args:
+    ///     cache_path (str or Path): Local directory containing the refget store (must have
+    ///         index.json and sequences.farg files).
+    ///
+    /// Returns:
+    ///         GlobalRefgetStore: Store with metadata loaded, sequences lazy-loaded.
+    ///
+    /// Raises:
+    ///     IOError: If the store directory or index files cannot be read.
+    ///
+    /// Example:
+    ///     >>> from gtars.refget import GlobalRefgetStore
+    ///     >>> store = GlobalRefgetStore.load_local("/data/hg38_store")
+    ///     >>> seq = store.get_substring("chr1_digest", 0, 1000)
     #[classmethod]
-    fn load_from_directory(_cls: &Bound<'_, PyType>, root_path: &str) -> PyResult<Self> {
-        let store = GlobalRefgetStore::load_from_directory(root_path).map_err(|e| {
-            PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error loading store: {}", e))
+    fn load_local(_cls: &Bound<'_, PyType>, cache_path: &Bound<'_, PyAny>) -> PyResult<Self> {
+        let cache_path = cache_path.to_string();
+        let store = GlobalRefgetStore::load_local(cache_path).map_err(|e| {
+            PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error loading local store: {}", e))
         })?;
         Ok(Self { inner: store })
     }
 
-    fn get_seqs_bed_file(
-        &self,
+    /// Load a remote RefgetStore with local caching.
+    ///
+    /// Fetches metadata (index.json, sequences.farg) from a remote URL immediately.
+    /// Sequence data (.seq files) are downloaded on-demand when first accessed and
+    /// cached locally. This is ideal for working with large remote genomes where
+    /// you only need specific sequences.
+    ///
+    /// Args:
+    ///     cache_path (str or Path): Local directory to cache downloaded metadata and sequences.
+    ///         Created if it doesn't exist.
+    ///     remote_url (str): Base URL of the remote refget store (e.g.,
+    ///         "https://example.com/hg38" or "s3://bucket/hg38").
+    ///
+    /// Returns:
+    ///     GlobalRefgetStore: Store with metadata loaded, sequences fetched on-demand.
+    ///
+    /// Raises:
+    ///     IOError: If remote metadata cannot be fetched or cache cannot be written.
+    ///
+    /// Args:
+    ///     cache_path (str or Path): Local directory for caching
+    ///     remote_url (str): Remote URL to fetch data from
+    ///     cache_to_disk: If True (default), cache sequence data to disk. If False, keep only in memory.
+    ///
+    /// Example:
+    ///     >>> from gtars.refget import GlobalRefgetStore
+    ///     >>> # With disk caching (default)
+    ///     >>> store = GlobalRefgetStore.load_remote(
+    ///     ...     "/data/cache/hg38",
+    ///     ...     "https://refget-server.com/hg38"
+    ///     ... )
+    ///     >>> # Memory-only mode (no sequence data caching to disk)
+    ///     >>> store = GlobalRefgetStore.load_remote(
+    ///     ...     "/tmp/cache",
+    ///     ...     "https://refget-server.com/hg38",
+    ///     ...     cache_to_disk=False
+    ///     ... )
+    #[classmethod]
+    #[pyo3(signature = (cache_path, remote_url, cache_to_disk=true))]
+    fn load_remote(_cls: &Bound<'_, PyType>, cache_path: &Bound<'_, PyAny>, remote_url: &Bound<'_, PyAny>, cache_to_disk: bool) -> PyResult<Self> {
+        let cache_path = cache_path.to_string();
+        let remote_url = remote_url.to_string();
+        let store = GlobalRefgetStore::load_remote(cache_path, remote_url, cache_to_disk).map_err(|e| {
+            PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error loading remote store: {}", e))
+        })?;
+        Ok(Self { inner: store })
+    }
+
+    /// Export sequences from BED file regions to a FASTA file.
+    ///
+    /// Reads a BED file defining genomic regions and exports the sequences
+    /// for those regions to a FASTA file.
+    ///
+    /// Args:
+    ///     collection_digest: The collection's SHA-512/24u digest.
+    ///     bed_file_path: Path to BED file defining regions.
+    ///     output_file_path: Path to write the output FASTA file.
+    ///
+    /// Raises:
+    ///     IOError: If files cannot be read/written or sequences not found.
+    ///
+    /// Example:
+    ///     >>> store.export_fasta_from_regions(
+    ///     ...     "uC_UorBNf3YUu1YIDainBhI94CedlNeH",
+    ///     ...     "regions.bed",
+    ///     ...     "output.fa"
+    ///     ... )
+    fn export_fasta_from_regions(
+        &mut self,
         collection_digest: &str,
-        bed_file_path: &str,
-        output_file_path: &str,
+        bed_file_path: &Bound<'_, PyAny>,
+        output_file_path: &Bound<'_, PyAny>,
     ) -> PyResult<()> {
-        // Rust function expects K: AsRef<[u8]>, &str works directly
+        let bed_file_path = bed_file_path.to_string();
+        let output_file_path = output_file_path.to_string();
         self.inner
-            .get_seqs_bed_file(collection_digest, bed_file_path, output_file_path)
+            .export_fasta_from_regions(collection_digest, &bed_file_path, &output_file_path)
             .map_err(|e| {
                 PyErr::new::<pyo3::exceptions::PyIOError, _>(format!(
-                    "Error retrieving sequences and writing to file: {}",
+                    "Error exporting FASTA from regions: {}",
                     e
                 ))
             })
     }
 
-    fn get_seqs_bed_file_to_vec(
-        &self,
+    /// Get substrings for BED file regions as a list.
+    ///
+    /// Reads a BED file and returns a list of sequences for each region.
+    /// This is a convenience wrapper around the iterator for cases where
+    /// you want all results in memory at once.
+    ///
+    /// Note: For large BED files, consider using the Rust API's iterator
+    /// directly to avoid loading all results into memory.
+    ///
+    /// Args:
+    ///     collection_digest: The collection's SHA-512/24u digest.
+    ///     bed_file_path: Path to BED file defining regions.
+    ///
+    /// Returns:
+    ///     list[RetrievedSequence]: List of sequences with region metadata.
+    ///
+    /// Raises:
+    ///     IOError: If files cannot be read or sequences not found.
+    ///
+    /// Example:
+    ///     >>> sequences = store.substrings_from_regions(
+    ///     ...     "uC_UorBNf3YUu1YIDainBhI94CedlNeH",
+    ///     ...     "regions.bed"
+    ///     ... )
+    ///     >>> for seq in sequences:
+    ///     ...     print(f"{seq.chrom_name}:{seq.start}-{seq.end}")
+    fn substrings_from_regions(
+        &mut self,
         collection_digest: &str,
-        bed_file_path: &str,
+        bed_file_path: &Bound<'_, PyAny>,
     ) -> PyResult<Vec<PyRetrievedSequence>> {
-        // Corrected: use `let ... = ...?;` to bind the result
-        let rust_results = self
+        let bed_file_path = bed_file_path.to_string();
+        // Get iterator and collect results
+        let iter = self
             .inner
-            .get_seqs_bed_file_to_vec(collection_digest, bed_file_path)
+            .substrings_from_regions(collection_digest, &bed_file_path)
             .map_err(|e| {
                 PyErr::new::<pyo3::exceptions::PyIOError, _>(format!(
-                    "Error retrieving sequences to list: {}",
+                    "Error getting substrings from regions: {}",
                     e
                 ))
             })?;
 
-        // Now `rust_results` is available
-        let py_results: Vec<PyRetrievedSequence> = rust_results
-            .into_iter()
+        // Collect results, filtering out errors
+        let py_results: Vec<PyRetrievedSequence> = iter
+            .filter_map(Result::ok)
             .map(PyRetrievedSequence::from)
             .collect();
 
         Ok(py_results)
     }
 
+    /// Export sequences from a collection to a FASTA file.
+    ///
+    /// This method exports sequences from a specific sequence collection (genome assembly)
+    /// using the collection digest. You can optionally filter which sequences to export
+    /// by providing sequence names.
+    ///
+    /// **Use this method when:** You have a collection digest and want to export sequences
+    /// from that specific collection, optionally filtering by chromosome/sequence names.
+    ///
+    /// **Contrast with export_fasta_by_digests:** That method exports sequences by their
+    /// individual sequence digests (not collection digest) and bypasses collection
+    /// information entirely.
+    ///
+    /// Args:
+    ///     collection_digest: The collection's SHA-512/24u digest (identifies the genome
+    ///         assembly/sequence collection).
+    ///     output_path: Path to write the output FASTA file.
+    ///     sequence_names: Optional list of sequence names to export (e.g., ["chr1", "chr2"]).
+    ///         If None, exports all sequences in the collection.
+    ///     line_width: Number of bases per line in output FASTA (default: 80).
+    ///
+    /// Raises:
+    ///     IOError: If file cannot be written or collection/sequences not found.
+    ///
+    /// Example:
+    ///     >>> # Export all sequences from a collection
+    ///     >>> store.export_fasta("uC_UorBNf3YUu1YIDainBhI94CedlNeH", "output.fa", None, None)
+    ///     >>> # Export only chr1 and chr2
+    ///     >>> store.export_fasta("uC_UorBNf3YUu1YIDainBhI94CedlNeH", "output.fa", ["chr1", "chr2"], None)
+    fn export_fasta(
+        &mut self,
+        collection_digest: &str,
+        output_path: &Bound<'_, PyAny>,
+        sequence_names: Option<Vec<String>>,
+        line_width: Option<usize>,
+    ) -> PyResult<()> {
+        let output_path = output_path.to_string();
+        let sequence_names_refs = sequence_names.as_ref().map(|names| {
+            names.iter().map(|s| s.as_str()).collect::<Vec<&str>>()
+        });
+        self.inner
+            .export_fasta(collection_digest, &output_path, sequence_names_refs, line_width)
+            .map_err(|e| {
+                PyErr::new::<pyo3::exceptions::PyIOError, _>(format!(
+                    "Error exporting FASTA: {}",
+                    e
+                ))
+            })
+    }
+
+    /// Export sequences by their individual sequence digests to a FASTA file.
+    ///
+    /// This method exports sequences directly by their SHA-512/24u sequence digests,
+    /// bypassing collection information. Useful when you have specific sequence
+    /// identifiers and want to retrieve them regardless of which collection(s) they
+    /// belong to.
+    ///
+    /// **Use this method when:** You have a list of sequence digests (SHA-512/24u)
+    /// and want to export those specific sequences from the global sequence store.
+    ///
+    /// **Contrast with export_fasta:** That method uses a collection digest and
+    /// optionally filters by sequence names within that collection.
+    ///
+    /// Args:
+    ///     seq_digests: List of SHA-512/24u sequence digests (not collection digests)
+    ///         to export. Each digest identifies an individual sequence.
+    ///     output_path: Path to write the output FASTA file.
+    ///     line_width: Number of bases per line in output FASTA (default: 80).
+    ///
+    /// Raises:
+    ///     IOError: If file cannot be written or sequences not found.
+    ///
+    /// Example:
+    ///     >>> # Export specific sequences by their digests
+    ///     >>> seq_digests = [
+    ///     ...     "aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2",
+    ///     ...     "bXE123dAxcJAqme6QYQ7EZ07-fiw8Kw2"
+    ///     ... ]
+    ///     >>> store.export_fasta_by_digests(seq_digests, "output.fa", None)
+    fn export_fasta_by_digests(
+        &mut self,
+        seq_digests: Vec<String>,
+        output_path: &Bound<'_, PyAny>,
+        line_width: Option<usize>,
+    ) -> PyResult<()> {
+        let output_path = output_path.to_string();
+        let digests_refs: Vec<&str> = seq_digests.iter().map(|s| s.as_str()).collect();
+        self.inner
+            .export_fasta_by_digests(digests_refs, &output_path, line_width)
+            .map_err(|e| {
+                PyErr::new::<pyo3::exceptions::PyIOError, _>(format!(
+                    "Error exporting FASTA by digests: {}",
+                    e
+                ))
+            })
+    }
+
     fn __str__(&self) -> String {
         format!("{}", self.inner)
     }
 
     fn __repr__(&self) -> String {
+        let (n_sequences, n_collections, mode_str) = self.inner.stats();
+
+        let location = if let Some(remote) = self.inner.remote_source() {
+            let cache = self.inner.local_path()
+                .map(|p| p.display().to_string())
+                .unwrap_or_else(|| "None".to_string());
+            format!("cache='{}', remote='{}'", cache, remote)
+        } else if let Some(path) = self.inner.local_path() {
+            format!("cache='{}'", path.display())
+        } else {
+            "memory-only".to_string()
+        };
+
         format!(
-            "<GlobalRefgetStore with {} sequences>",
-            self.inner.list_sequence_digests().len()
+            "GlobalRefgetStore(n_sequences={}, n_collections={}, mode={}, {})",
+            n_sequences, n_collections, mode_str, location
         )
     }
+
+    /// Return the number of sequences in the store.
+    ///
+    /// Allows using `len(store)` in Python to get the total number of sequences
+    /// across all collections.
+    ///
+    /// Returns:
+    ///     int: Total number of sequences in the store.
+    ///
+    /// Example:
+    ///     >>> store = GlobalRefgetStore(StorageMode.Encoded)
+    ///     >>> store.import_fasta("genome.fa")
+    ///     >>> print(f"Store contains {len(store)} sequences")
+    fn __len__(&self) -> usize {
+        self.inner.sequence_digests().count()
+    }
+
+    /// Iterate over all sequences in the store.
+    ///
+    /// Allows using `for sequence in store:` in Python to iterate over all
+    /// sequence metadata in the store.
+    ///
+    /// Yields:
+    ///     SequenceMetadata: Metadata for each sequence in the store.
+    ///
+    /// Example:
+    ///     >>> store = GlobalRefgetStore(StorageMode.Encoded)
+    ///     >>> store.import_fasta("genome.fa")
+    ///     >>> for seq_meta in store:
+    ///     ...     print(f"{seq_meta.name}: {seq_meta.length} bp")
+    fn __iter__(slf: PyRef<'_, Self>) -> PyResult<PyRefgetStoreIterator> {
+        let sequences = slf.inner.sequence_metadata()
+            .map(|meta| PySequenceMetadata::from(meta.clone()))
+            .collect();
+        Ok(PyRefgetStoreIterator {
+            sequences,
+            index: 0,
+        })
+    }
+}
+
+/// Iterator for GlobalRefgetStore that yields SequenceMetadata.
+#[pyclass]
+pub struct PyRefgetStoreIterator {
+    sequences: Vec<PySequenceMetadata>,
+    index: usize,
+}
+
+#[pymethods]
+impl PyRefgetStoreIterator {
+    fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
+        slf
+    }
+
+    fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<PySequenceMetadata> {
+        if slf.index < slf.sequences.len() {
+            let item = slf.sequences[slf.index].clone();
+            slf.index += 1;
+            Some(item)
+        } else {
+            None
+        }
+    }
 }
 
 // This represents the Python module to be created
@@ -477,8 +1291,12 @@ pub fn refget(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(sha512t24u_digest, m)?)?;
     m.add_function(wrap_pyfunction!(md5_digest, m)?)?;
     m.add_function(wrap_pyfunction!(digest_fasta, m)?)?;
+    m.add_function(wrap_pyfunction!(compute_fai, m)?)?;
+    m.add_function(wrap_pyfunction!(load_fasta, m)?)?;
     m.add_class::<PyAlphabetType>()?;
     m.add_class::<PySequenceMetadata>()?;
+    m.add_class::<PyFaiMetadata>()?;
+    m.add_class::<PyFaiRecord>()?;
     m.add_class::<PySequenceRecord>()?;
     m.add_class::<PySeqColDigestLvl1>()?;
     m.add_class::<PySequenceCollection>()?;
diff --git a/gtars-python/tests/test_refget.py b/gtars-python/tests/test_refget.py
index 2e26e54d..aa0b3cf7 100644
--- a/gtars-python/tests/test_refget.py
+++ b/gtars-python/tests/test_refget.py
@@ -3,6 +3,7 @@
     GlobalRefgetStore,
     StorageMode,
     digest_fasta,
+    load_fasta,
     sha512t24u_digest,
     md5_digest,
     RetrievedSequence,
@@ -25,7 +26,6 @@ def test_digest_fasta_basic(self):
         assert hasattr(result, "digest")
         assert hasattr(result, "lvl1")
         assert hasattr(result, "file_path")
-        assert hasattr(result, "has_data")
 
         # Test that sequences is a list
         assert isinstance(result.sequences, list)
@@ -79,9 +79,11 @@ def test_global_refget_store_basic(self):
         store_encoded = GlobalRefgetStore(StorageMode.Encoded)
 
         # Test string representations
-        # print(store_raw.__repr__)
-        assert repr(store_raw).startswith("<GlobalRefgetStore")
-        assert repr(store_encoded).startswith("<GlobalRefgetStore")
+        # New repr format shows n_sequences and location
+        assert repr(store_raw).startswith("GlobalRefgetStore")
+        assert repr(store_encoded).startswith("GlobalRefgetStore")
+        assert "n_sequences=0" in repr(store_raw)  # Empty store
+        assert "memory-only" in repr(store_raw)  # No local path
 
     def test_store_import_and_retrieve(self):
         """Test importing FASTA and retrieving sequences"""
@@ -89,7 +91,7 @@ def test_store_import_and_retrieve(self):
         fasta_path = "../../gtars/tests/data/fasta/base.fa"
         # fasta_path = os.path.abspath("../../gtars/tests/data/fasta/base.fa")
         # Import FASTA
-        store.import_fasta(fasta_path)
+        store.add_sequence_collection_from_fasta(fasta_path)
 
         # Get known sequence by SHA512t24u digest
         sha512 = "iYtREV555dUFKg2_agSJW6suquUyPpMw"
@@ -106,7 +108,7 @@ def test_store_substring(self):
         """Test substring retrieval"""
         store = GlobalRefgetStore(StorageMode.Raw)
         fasta_path = "../../gtars/tests/data/fasta/base.fa"
-        store.import_fasta(fasta_path)
+        store.add_sequence_collection_from_fasta(fasta_path)
 
         # Get a substring from a known sequence
         sha512 = "iYtREV555dUFKg2_agSJW6suquUyPpMw"
@@ -118,14 +120,14 @@ def test_store_persistence(self):
         """Test saving and loading store"""
         store = GlobalRefgetStore(StorageMode.Raw)
         fasta_path = "../../gtars/tests/data/fasta/base.fa"
-        store.import_fasta(fasta_path)
+        store.add_sequence_collection_from_fasta(fasta_path)
 
         # Save store to temporary directory
         with tempfile.TemporaryDirectory() as tmpdir:
-            store.write_store_to_directory(tmpdir, "sequences/%s2/%s.seq")
+            store.write_store_to_dir(tmpdir, "sequences/%s2/%s.seq")
 
             # Load store back
-            loaded_store = GlobalRefgetStore.load_from_directory(tmpdir)
+            loaded_store = GlobalRefgetStore.load_local(tmpdir)
 
             # Verify same sequences exist
             sha512 = "iYtREV555dUFKg2_agSJW6suquUyPpMw"
@@ -143,7 +145,7 @@ def test_store_errors(self):
 
         # Test importing non-existent file
         with pytest.raises(Exception):
-            store.import_fasta("nonexistent.fa")
+            store.add_sequence_collection_from_fasta("nonexistent.fa")
 
         # Test getting non-existent sequence
         bogus_digest = "not_a_sequence"
@@ -151,7 +153,7 @@ def test_store_errors(self):
 
         # Test invalid substring parameters
         sha512 = "iYtREV555dUFKg2_agSJW6suquUyPpMw"
-        store.import_fasta("../../gtars/tests/data/fasta/base.fa")
+        store.add_sequence_collection_from_fasta("../../gtars/tests/data/fasta/base.fa")
         assert store.get_substring(sha512, 10, 5) is None  # end < start
         assert store.get_substring(sha512, 0, 100) is None  # end > length
 
@@ -161,7 +163,7 @@ def test_store_collection_operations(self):
         fasta_path = "../../gtars/tests/data/fasta/base.fa"
 
         # Import sequences and get sequence by collection and name
-        store.import_fasta(fasta_path)
+        store.add_sequence_collection_from_fasta(fasta_path)
 
         # Get sequence from default collection
         result = digest_fasta(fasta_path)
@@ -175,8 +177,8 @@ def test_store_collection_operations(self):
 
     def test_get_seqs_from_bed_file_bindings(self):
         """
-        Test both get_seqs_bed_file (writes to file)
-        and get_seqs_bed_file_to_vec (returns list) bindings.
+        Test both export_fasta_from_regions (writes to file)
+        and substrings_from_regions (returns list) bindings.
         """
 
         with tempfile.TemporaryDirectory() as temp_dir_str:
@@ -189,7 +191,7 @@ def test_get_seqs_from_bed_file_bindings(self):
 
 
             store = GlobalRefgetStore(StorageMode.Encoded)
-            imported_collection = store.import_fasta(temp_fasta_path)
+            imported_collection = store.add_sequence_collection_from_fasta(temp_fasta_path)
             result = digest_fasta(temp_fasta_path)
 
             collection_digest = result.digest
@@ -212,7 +214,7 @@ def test_get_seqs_from_bed_file_bindings(self):
                 f.write(bed_content)
 
             temp_output_fa_path = os.path.join(temp_dir, "output.fa")
-            store.get_seqs_bed_file(
+            store.export_fasta_from_regions(
                 collection_digest, temp_bed_path, temp_output_fa_path
             )
 
@@ -228,9 +230,9 @@ def test_get_seqs_from_bed_file_bindings(self):
             assert (
                 output_fa_content.strip() == expected_fa_content.strip()
             ), "Output FASTA file content mismatch"
-            print("✓ get_seqs_bed_file binding test passed.")
+            print("✓ export_fasta_from_regions binding test passed.")
 
-            vec_result = store.get_seqs_bed_file_to_vec(
+            vec_result = store.substrings_from_regions(
                 collection_digest, temp_bed_path
             )
 
@@ -249,4 +251,103 @@ def test_get_seqs_from_bed_file_bindings(self):
                 assert vec_result[i].start == expected_vec[i].start
                 assert vec_result[i].end == expected_vec[i].end
 
-            print("✓ get_seqs_bed_file_to_vec binding test passed.")
+            print("✓ substrings_from_regions binding test passed.")
+
+    def test_decode_with_no_data(self):
+        """Test that decode() returns None when sequence data is not loaded"""
+        fasta_path = "../../gtars/tests/data/fasta/base.fa"
+
+        # digest_fasta should not load sequence data
+        result = digest_fasta(fasta_path)
+
+        for seq_record in result.sequences:
+            assert seq_record.data is None, "digest_fasta should not load sequence data"
+            assert seq_record.decode() is None, "decode() should return None when data is None"
+
+    def test_decode_with_loaded_data(self):
+        """Test that decode() returns correct sequences when data is loaded"""
+        fasta_path = "../../gtars/tests/data/fasta/base.fa"
+
+        # load_fasta should load sequence data
+        result = load_fasta(fasta_path)
+
+        # Expected sequences from base.fa
+        expected_sequences = [
+            ("chrX", "TTGGGGAA"),
+            ("chr1", "GGAA"),
+            ("chr2", "GCGC"),
+        ]
+
+        assert len(result.sequences) == len(expected_sequences)
+
+        for seq_record, (expected_name, expected_seq) in zip(result.sequences, expected_sequences):
+            assert seq_record.metadata.name == expected_name
+            assert seq_record.data is not None, "load_fasta should load sequence data"
+
+            decoded = seq_record.decode()
+            assert decoded is not None, "decode() should return Some when data is present"
+            assert decoded == expected_seq, f"Decoded sequence for {expected_name} should match expected"
+
+    def test_decode_with_store_sequences(self):
+        """Test decode() with sequences retrieved from a store"""
+        store = GlobalRefgetStore(StorageMode.Encoded)
+        fasta_path = "../../gtars/tests/data/fasta/base.fa"
+        store.add_sequence_collection_from_fasta(fasta_path)
+
+        # Get sequence by ID
+        sha512 = "iYtREV555dUFKg2_agSJW6suquUyPpMw"
+        seq = store.get_sequence_by_id(sha512)
+
+        assert seq is not None
+        assert seq.data is not None, "Store should have loaded sequence data"
+
+        decoded = seq.decode()
+        assert decoded is not None
+        assert decoded == "TTGGGGAA", "Should correctly decode sequence from encoded store"
+
+    def test_decode_raw_vs_encoded_storage(self):
+        """Test that decode() works with both Raw and Encoded storage modes"""
+        fasta_path = "../../gtars/tests/data/fasta/base.fa"
+
+        # Test with Raw storage mode
+        store_raw = GlobalRefgetStore(StorageMode.Raw)
+        store_raw.add_sequence_collection_from_fasta(fasta_path)
+        sha512 = "iYtREV555dUFKg2_agSJW6suquUyPpMw"
+        seq_raw = store_raw.get_sequence_by_id(sha512)
+        decoded_raw = seq_raw.decode()
+
+        # Test with Encoded storage mode
+        store_encoded = GlobalRefgetStore(StorageMode.Encoded)
+        store_encoded.add_sequence_collection_from_fasta(fasta_path)
+        seq_encoded = store_encoded.get_sequence_by_id(sha512)
+        decoded_encoded = seq_encoded.decode()
+
+        # Both should produce the same decoded sequence
+        assert decoded_raw == decoded_encoded
+        assert decoded_raw == "TTGGGGAA"
+
+    def test_load_fasta_function(self):
+        """Test the new load_fasta() function"""
+        fasta_path = "../../gtars/tests/data/fasta/base.fa"
+
+        # Test that load_fasta returns a SequenceCollection with data
+        result = load_fasta(fasta_path)
+
+        # Should have same structure as digest_fasta
+        assert hasattr(result, "sequences")
+        assert hasattr(result, "digest")
+        assert hasattr(result, "lvl1")
+        assert hasattr(result, "file_path")
+
+        # But sequences should have data loaded
+        assert len(result.sequences) == 3
+        for seq_record in result.sequences:
+            assert seq_record.data is not None, "load_fasta should load sequence data"
+            assert seq_record.decode() is not None, "Should be able to decode loaded data"
+
+        # Verify digests match digest_fasta
+        digest_result = digest_fasta(fasta_path)
+        assert result.digest == digest_result.digest
+        assert result.lvl1.sequences_digest == digest_result.lvl1.sequences_digest
+        assert result.lvl1.names_digest == digest_result.lvl1.names_digest
+        assert result.lvl1.lengths_digest == digest_result.lvl1.lengths_digest
diff --git a/gtars-r/src/rust/src/refget.rs b/gtars-r/src/rust/src/refget.rs
index e598510a..07c56888 100644
--- a/gtars-r/src/rust/src/refget.rs
+++ b/gtars-r/src/rust/src/refget.rs
@@ -287,8 +287,7 @@ fn sequence_collection_to_list(collection: SequenceCollection) -> List {
         file_path = collection
             .file_path
             .map(|p| p.to_string_lossy().to_string())
-            .unwrap_or_default(),
-        has_data = collection.has_data
+            .unwrap_or_default()
     )
 }
 
diff --git a/gtars-refget/Cargo.toml b/gtars-refget/Cargo.toml
index 9c687039..f1e75aa6 100644
--- a/gtars-refget/Cargo.toml
+++ b/gtars-refget/Cargo.toml
@@ -13,6 +13,7 @@ md-5 = "0.10.5"
 base64-url = "2.0.0"
 memmap2 = "0.5"  # for memory-mapped file I/O
 chrono = { version = "0.4", features = ["serde"] }  # For timestamps
+ureq = "2.10"  # Lightweight HTTP for remote fetching
 
 # shared workspace dependencies
 anyhow = { workspace = true }
diff --git a/gtars-refget/src/alphabet.rs b/gtars-refget/src/alphabet.rs
index f6c66ed9..9558473e 100644
--- a/gtars-refget/src/alphabet.rs
+++ b/gtars-refget/src/alphabet.rs
@@ -101,6 +101,20 @@ impl FromStr for AlphabetType {
     }
 }
 
+impl AlphabetType {
+    /// Returns the number of bits used per symbol for this alphabet type
+    pub fn bits_per_symbol(&self) -> usize {
+        match self {
+            AlphabetType::Dna2bit => 2,
+            AlphabetType::Dna3bit => 3,
+            AlphabetType::DnaIupac => 4,
+            AlphabetType::Protein => 5,
+            AlphabetType::Ascii => 8,
+            AlphabetType::Unknown => 8, // Default to 8 bits for unknown
+        }
+    }
+}
+
 /// A lookup table that maps ASCII characters representing DNA bases
 /// (T, C, A, G, and their lowercase counterparts) to their 2-bit encoding.
 /// This constant is used for efficient conversion of DNA sequences into a
diff --git a/gtars-refget/src/collection.rs b/gtars-refget/src/collection.rs
index 94d5a3f0..63d57910 100644
--- a/gtars-refget/src/collection.rs
+++ b/gtars-refget/src/collection.rs
@@ -26,10 +26,6 @@ pub struct SequenceCollection {
     /// Optional path to the source file from which this collection was loaded.
     /// Used for caching and reference purposes.
     pub file_path: Option<PathBuf>,
-
-    /// Flag indicating whether sequence data is actually loaded in memory.
-    /// When false, only metadata is available.
-    pub has_data: bool,
 }
 
 /// A struct representing the first level of digests for a refget sequence collection.
@@ -105,11 +101,20 @@ impl SeqColDigestLvl1 {
 }
 
 /// A representation of a single sequence that includes metadata and optionally data.
-/// Combines sequence metadata with optional raw/encoded data
+/// Combines sequence metadata with optional raw/encoded data.
+///
+/// This enum has two variants:
+/// - `Stub`: Contains only metadata, no sequence data loaded
+/// - `Full`: Contains both metadata and the actual sequence data
 #[derive(Clone, Debug)]
-pub struct SequenceRecord {
-    pub metadata: SequenceMetadata,
-    pub data: Option<Vec<u8>>,
+pub enum SequenceRecord {
+    /// A sequence record with only metadata, no sequence data
+    Stub(SequenceMetadata),
+    /// A sequence record with both metadata and sequence data
+    Full {
+        metadata: SequenceMetadata,
+        sequence: Vec<u8>,
+    },
 }
 
 use std::fs::{self, File};
@@ -121,22 +126,157 @@ pub struct SequenceMetadata {
     pub sha512t24u: String,
     pub md5: String,
     pub alphabet: AlphabetType,
+    pub fai: Option<FaiMetadata>,
+}
+
+/// FASTA index (FAI) metadata for a sequence.
+/// This data is only present when a sequence was loaded from a FASTA file.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct FaiMetadata {
+    pub offset: u64,       // byte offset to first base of sequence data
+    pub line_bases: u32,   // number of bases per line
+    pub line_bytes: u32,   // number of bytes per line (including newline chars)
+}
+
+impl SequenceMetadata {
+    /// Calculate the disk size in bytes for this sequence
+    ///
+    /// # Arguments
+    /// * `mode` - The storage mode (Raw or Encoded)
+    ///
+    /// # Returns
+    /// The number of bytes this sequence occupies on disk
+    ///
+    /// # Examples
+    /// ```ignore
+    /// // For a 1000bp DNA sequence in Encoded mode with Dna2bit alphabet:
+    /// // disk_size = (1000 * 2 bits).div_ceil(8) = 250 bytes
+    /// let size = metadata.disk_size(&StorageMode::Encoded);
+    /// ```
+    pub fn disk_size(&self, mode: &crate::store::StorageMode) -> usize {
+        match mode {
+            crate::store::StorageMode::Raw => self.length,
+            crate::store::StorageMode::Encoded => {
+                let bits_per_symbol = self.alphabet.bits_per_symbol();
+                let total_bits = self.length * bits_per_symbol;
+                total_bits.div_ceil(8)
+            }
+        }
+    }
 }
 
 impl SequenceRecord {
+    /// Get metadata regardless of variant
+    pub fn metadata(&self) -> &SequenceMetadata {
+        match self {
+            SequenceRecord::Stub(meta) => meta,
+            SequenceRecord::Full { metadata, .. } => metadata,
+        }
+    }
+
+    /// Get sequence data if present
+    pub fn sequence(&self) -> Option<&[u8]> {
+        match self {
+            SequenceRecord::Stub(_) => None,
+            SequenceRecord::Full { sequence, .. } => Some(sequence),
+        }
+    }
+
+    /// Check if data is loaded
+    pub fn has_data(&self) -> bool {
+        matches!(self, SequenceRecord::Full { .. })
+    }
+
+    /// Load data into a Stub record, or replace data in a Full record
+    pub fn with_data(self, sequence: Vec<u8>) -> Self {
+        let metadata = match self {
+            SequenceRecord::Stub(m) => m,
+            SequenceRecord::Full { metadata, .. } => metadata,
+        };
+        SequenceRecord::Full { metadata, sequence }
+    }
+
     /// Utility function to write a single sequence to a file
     pub fn to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
+        let data = match self {
+            SequenceRecord::Stub(_) => {
+                return Err(anyhow::anyhow!("Cannot write file: sequence data not loaded"));
+            }
+            SequenceRecord::Full { sequence, .. } => sequence,
+        };
+
         // Create parent directories if they don't exist
         if let Some(parent) = path.as_ref().parent() {
             fs::create_dir_all(parent)?;
         }
 
         let mut file = File::create(path)?;
-        if let Some(data) = &self.data {
-            file.write_all(data)?;
-        }
+        file.write_all(data)?;
         Ok(())
     }
+
+    /// Decodes the sequence data to a string.
+    ///
+    /// This method attempts to decode the sequence data stored in this record.
+    /// It handles both raw (uncompressed UTF-8) and encoded (bit-packed) data.
+    /// The decoding strategy depends on the alphabet type:
+    /// - For ASCII alphabet: data is already in raw form, just convert to string
+    /// - For other alphabets: attempt encoded decoding first, fall back to raw
+    ///
+    /// # Returns
+    ///
+    /// * `Some(String)` - The decoded sequence if data is loaded
+    /// * `None` - If no data is loaded in this record
+    ///
+    /// # Example
+    ///
+    /// ```rust,no_run
+    /// # use gtars_refget::collection::SequenceRecord;
+    /// # let record: SequenceRecord = todo!();
+    /// let sequence = record.decode();
+    /// if let Some(seq) = sequence {
+    ///     println!("Sequence: {}", seq);
+    /// }
+    /// ```
+    pub fn decode(&self) -> Option<String> {
+        use crate::alphabet::lookup_alphabet;
+        use crate::encoder::decode_substring_from_bytes;
+
+        let (metadata, data) = match self {
+            SequenceRecord::Stub(_) => return None,
+            SequenceRecord::Full { metadata, sequence } => (metadata, sequence),
+        };
+
+        // For ASCII alphabet (8 bits per symbol), the data is always stored raw
+        if metadata.alphabet == crate::alphabet::AlphabetType::Ascii {
+            return String::from_utf8(data.clone()).ok();
+        }
+
+        // Try to detect if data is raw or encoded
+        // Heuristic: for encoded data, the size should be approximately length * bits_per_symbol / 8
+        // For raw data, the size should be approximately equal to length
+        let alphabet = lookup_alphabet(&metadata.alphabet);
+
+        // If data size matches the expected length (not the encoded size), it's probably raw
+        if data.len() == metadata.length {
+            // Try to decode as UTF-8
+            if let Ok(raw_string) = String::from_utf8(data.clone()) {
+                // Data appears to be raw UTF-8
+                return Some(raw_string);
+            }
+        }
+
+        // Data is probably encoded (size matches expected encoded size), try to decode it
+        let decoded_bytes = decode_substring_from_bytes(
+            data,
+            0,
+            metadata.length,
+            alphabet
+        );
+
+        // Convert to string
+        String::from_utf8(decoded_bytes).ok()
+    }
 }
 
 impl SequenceCollection {
@@ -164,7 +304,7 @@ impl SequenceCollection {
     /// Create a SequenceCollection from a vector of SequenceRecords.
     pub fn from_records(records: Vec<SequenceRecord>) -> Self {
         // Compute lvl1 digests from the metadata
-        let metadata_refs: Vec<&SequenceMetadata> = records.iter().map(|r| &r.metadata).collect();
+        let metadata_refs: Vec<&SequenceMetadata> = records.iter().map(|r| r.metadata()).collect();
         let lvl1 = SeqColDigestLvl1::from_metadata(&metadata_refs);
 
         // Compute collection digest from lvl1 digests
@@ -175,7 +315,6 @@ impl SequenceCollection {
             digest: collection_digest,
             lvl1,
             file_path: None,
-            has_data: true,
         }
     }
 
@@ -214,7 +353,7 @@ impl SequenceCollection {
 
         // Write the SequenceCollection to the FARG file
         if write_cache && !farg_file_path.exists() {
-            seqcol.to_farg()?;
+            seqcol.write_farg()?;
             println!("Farg file written to {:?}", farg_file_path);
         } else {
             println!(
@@ -225,24 +364,37 @@ impl SequenceCollection {
         Ok(seqcol)
     }
 
-    /// Write the SequenceCollection to a FASTA refget (FARG) file
-    /// * `file_path` - The path to the FARG file to be written.
-    pub fn to_farg_path<P: AsRef<Path>>(&self, file_path: P) -> Result<()> {
-        // Write the FARGI file
+    /// Write the SequenceCollection to a collection FARG file.
+    ///
+    /// Creates a FARG file with collection-level digest headers followed by
+    /// sequence metadata for all sequences in this collection.
+    ///
+    /// # Arguments
+    /// * `file_path` - The path to the FARG file to be written
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    ///
+    /// # Format
+    /// The file includes:
+    /// - Collection digest headers (##seqcol_digest, ##names_digest, etc.)
+    /// - Column header (#name, length, alphabet, sha512t24u, md5)
+    /// - One line per sequence with metadata
+    pub fn write_collection_farg<P: AsRef<Path>>(&self, file_path: P) -> Result<()> {
         let file_path = file_path.as_ref();
-        println!("Writing farg file: {:?}", file_path);
+        println!("Writing collection farg file: {:?}", file_path);
         let mut file = std::fs::File::create(file_path)?;
 
-        // Write header with digest metadata
+        // Write collection digest headers
         writeln!(file, "##seqcol_digest={}", self.digest)?;
         writeln!(file, "##names_digest={}", self.lvl1.names_digest)?;
         writeln!(file, "##sequences_digest={}", self.lvl1.sequences_digest)?;
         writeln!(file, "##lengths_digest={}", self.lvl1.lengths_digest)?;
         writeln!(file, "#name\tlength\talphabet\tsha512t24u\tmd5")?;
 
-        // Write sequence data
+        // Write sequence metadata
         for result_sr in &self.sequences {
-            let result = result_sr.metadata.clone();
+            let result = result_sr.metadata().clone();
             writeln!(
                 file,
                 "{}\t{}\t{}\t{}\t{}",
@@ -252,17 +404,65 @@ impl SequenceCollection {
         Ok(())
     }
 
-    /// Write the SeqColDigest to a FARG file, using the file path stored in the struct.
-    pub fn to_farg(&self) -> Result<()> {
+    /// Write the SequenceCollection to a FARG file, using the file path stored in the struct.
+    pub fn write_farg(&self) -> Result<()> {
         if let Some(ref file_path) = self.file_path {
             let farg_file_path = file_path.replace_exts_with("farg");
-            self.to_farg_path(farg_file_path)
+            self.write_collection_farg(farg_file_path)
         } else {
             Err(anyhow::anyhow!(
-                "No file path specified for FARG output. Use `to_farg_path` to specify a file path."
+                "No file path specified for FARG output. Use `write_collection_farg` to specify a file path."
             ))
         }
     }
+
+    /// Write the SequenceCollection to a FASTA file.
+    ///
+    /// Writes all sequences in this collection to a FASTA file with the specified line width.
+    /// Only sequences with loaded data (Full variant) will be written.
+    ///
+    /// # Arguments
+    /// * `file_path` - The path to the output FASTA file
+    /// * `line_width` - Number of bases per line (default: 70)
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    ///
+    /// # Errors
+    /// Returns an error if any sequence doesn't have data loaded (Stub variant)
+    pub fn write_fasta<P: AsRef<Path>>(&self, file_path: P, line_width: Option<usize>) -> Result<()> {
+        let line_width = line_width.unwrap_or(70);
+        let mut output_file = File::create(file_path)?;
+
+        for record in &self.sequences {
+            // Check if record has data
+            if !record.has_data() {
+                return Err(anyhow::anyhow!(
+                    "Cannot write FASTA: sequence '{}' does not have data loaded",
+                    record.metadata().name
+                ));
+            }
+
+            // Decode the sequence
+            let decoded_sequence = record.decode().ok_or_else(|| {
+                anyhow::anyhow!(
+                    "Failed to decode sequence '{}'",
+                    record.metadata().name
+                )
+            })?;
+
+            // Write FASTA header
+            writeln!(output_file, ">{}", record.metadata().name)?;
+
+            // Write sequence with line wrapping
+            for chunk in decoded_sequence.as_bytes().chunks(line_width) {
+                output_file.write_all(chunk)?;
+                output_file.write_all(b"\n")?;
+            }
+        }
+
+        Ok(())
+    }
 }
 
 impl Display for SequenceCollection {
@@ -286,12 +486,265 @@ impl Display for SequenceRecord {
         write!(
             f,
             "SequenceRecord: {} (length: {}, alphabet: {}, ga4gh: {:02x?}, md5: {:02x?})",
-            &self.metadata.name,
-            &self.metadata.length,
-            &self.metadata.alphabet,
-            &self.metadata.sha512t24u,
-            &self.metadata.md5
+            &self.metadata().name,
+            &self.metadata().length,
+            &self.metadata().alphabet,
+            &self.metadata().sha512t24u,
+            &self.metadata().md5
         )?;
         Ok(())
     }
 }
+
+// Iterator implementations for SequenceCollection
+// Allows: for seq in &collection { ... }
+impl<'a> IntoIterator for &'a SequenceCollection {
+    type Item = &'a SequenceRecord;
+    type IntoIter = std::slice::Iter<'a, SequenceRecord>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.sequences.iter()
+    }
+}
+
+// Consuming iterator
+// Allows: for seq in collection { ... } (consumes the collection)
+impl IntoIterator for SequenceCollection {
+    type Item = SequenceRecord;
+    type IntoIter = std::vec::IntoIter<SequenceRecord>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.sequences.into_iter()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::fasta::{digest_fasta, load_fasta};
+    use crate::encoder::encode_sequence;
+
+    #[test]
+    fn test_decode_returns_none_when_no_data() {
+        // Test that decode() returns None when data is None
+        let seqcol = digest_fasta("../tests/data/fasta/base.fa")
+            .expect("Failed to load test FASTA file");
+
+        for seq_record in &seqcol.sequences {
+            assert!(!seq_record.has_data(), "digest_fasta should not load sequence data");
+            assert!(matches!(seq_record, SequenceRecord::Stub(_)), "digest_fasta should create Stub records");
+            assert_eq!(seq_record.decode(), None, "decode() should return None for Stub records");
+        }
+    }
+
+    #[test]
+    fn test_decode_with_loaded_data() {
+        // Test that decode() returns the correct sequence when data is loaded
+        let seqcol = load_fasta("../tests/data/fasta/base.fa")
+            .expect("Failed to load test FASTA file");
+
+        // Expected sequences from base.fa
+        let expected_sequences = vec![
+            ("chrX", "TTGGGGAA"),
+            ("chr1", "GGAA"),
+            ("chr2", "GCGC"),
+        ];
+
+        for (seq_record, (expected_name, expected_seq)) in seqcol.sequences.iter().zip(expected_sequences.iter()) {
+            assert_eq!(seq_record.metadata().name, *expected_name);
+            assert!(seq_record.has_data(), "load_fasta should load sequence data");
+            assert!(matches!(seq_record, SequenceRecord::Full { .. }), "load_fasta should create Full records");
+
+            let decoded = seq_record.decode().expect("decode() should return Some when data is present");
+            assert_eq!(decoded, *expected_seq,
+                "Decoded sequence for {} should match expected sequence", expected_name);
+        }
+    }
+
+    #[test]
+    fn test_decode_handles_encoded_data() {
+        // Test that decode() correctly handles bit-packed encoded data
+        use crate::alphabet::{lookup_alphabet, AlphabetType};
+
+        let sequence = b"ACGT";
+        let alphabet = lookup_alphabet(&AlphabetType::Dna2bit);
+        let encoded_data = encode_sequence(sequence, alphabet);
+
+        let record = SequenceRecord::Full {
+            metadata: SequenceMetadata {
+                name: "test_seq".to_string(),
+                length: 4,
+                sha512t24u: "test_digest".to_string(),
+                md5: "test_md5".to_string(),
+                alphabet: AlphabetType::Dna2bit,
+                fai: None,
+            },
+            sequence: encoded_data,
+        };
+
+        let decoded = record.decode().expect("Should decode encoded data");
+        assert_eq!(decoded, "ACGT", "Should correctly decode bit-packed DNA sequence");
+    }
+
+    #[test]
+    fn test_decode_handles_raw_utf8_data() {
+        // Test that decode() handles raw UTF-8 data (not encoded)
+        let raw_sequence = b"ACGTACGT".to_vec();
+
+        let record = SequenceRecord::Full {
+            metadata: SequenceMetadata {
+                name: "test_seq".to_string(),
+                length: 8,
+                sha512t24u: "test_digest".to_string(),
+                md5: "test_md5".to_string(),
+                alphabet: AlphabetType::Ascii,
+                fai: None,
+            },
+            sequence: raw_sequence,
+        };
+
+        let decoded = record.decode().expect("Should decode raw UTF-8 data");
+        assert_eq!(decoded, "ACGTACGT", "Should correctly decode raw sequence data");
+    }
+
+    #[test]
+    fn test_decode_with_iupac_alphabet() {
+        // Test decode with IUPAC DNA alphabet (4-bit encoding)
+        use crate::alphabet::{lookup_alphabet, AlphabetType};
+
+        let sequence = b"ACGTRYMK";
+        let alphabet = lookup_alphabet(&AlphabetType::DnaIupac);
+        let encoded_data = encode_sequence(sequence, alphabet);
+
+        let record = SequenceRecord::Full {
+            metadata: SequenceMetadata {
+                name: "iupac_test".to_string(),
+                length: 8,
+                sha512t24u: "test_digest".to_string(),
+                md5: "test_md5".to_string(),
+                alphabet: AlphabetType::DnaIupac,
+                fai: None,
+            },
+            sequence: encoded_data,
+        };
+
+        let decoded = record.decode().expect("Should decode IUPAC encoded data");
+        assert_eq!(decoded, "ACGTRYMK", "Should correctly decode IUPAC DNA sequence");
+    }
+
+    #[test]
+    fn test_decode_with_protein_alphabet() {
+        // Test decode with protein alphabet (5-bit encoding)
+        use crate::alphabet::{lookup_alphabet, AlphabetType};
+
+        let sequence = b"ACDEFGHIKLMNPQRSTVWY";
+        let alphabet = lookup_alphabet(&AlphabetType::Protein);
+        let encoded_data = encode_sequence(sequence, alphabet);
+
+        let record = SequenceRecord::Full {
+            metadata: SequenceMetadata {
+                name: "protein_test".to_string(),
+                length: 20,
+                sha512t24u: "test_digest".to_string(),
+                md5: "test_md5".to_string(),
+                alphabet: AlphabetType::Protein,
+                fai: None,
+            },
+            sequence: encoded_data,
+        };
+
+        let decoded = record.decode().expect("Should decode protein encoded data");
+        assert_eq!(decoded, "ACDEFGHIKLMNPQRSTVWY", "Should correctly decode protein sequence");
+    }
+
+    #[test]
+    fn test_decode_empty_sequence() {
+        // Test decode with empty data
+        let record = SequenceRecord::Full {
+            metadata: SequenceMetadata {
+                name: "empty_seq".to_string(),
+                length: 0,
+                sha512t24u: "test_digest".to_string(),
+                md5: "test_md5".to_string(),
+                alphabet: AlphabetType::Dna2bit,
+                fai: None,
+            },
+            sequence: Vec::new(),
+        };
+
+        let decoded = record.decode().expect("Should handle empty sequence");
+        assert_eq!(decoded, "", "Empty sequence should decode to empty string");
+    }
+
+    #[test]
+    fn test_decode_detects_raw_vs_encoded() {
+        // Test that decode() correctly distinguishes between raw and encoded data
+        use crate::alphabet::{lookup_alphabet, AlphabetType};
+
+        // Create raw UTF-8 data that matches the sequence length (heuristic for raw data)
+        let raw_sequence = b"GGGGGGGG";  // 8 bytes for 8-symbol sequence
+        let record_raw = SequenceRecord::Full {
+            metadata: SequenceMetadata {
+                name: "raw_test".to_string(),
+                length: 8,
+                sha512t24u: "test_digest".to_string(),
+                md5: "test_md5".to_string(),
+                alphabet: AlphabetType::Dna2bit,
+                fai: None,
+            },
+            sequence: raw_sequence.to_vec(),
+        };
+
+        let decoded_raw = record_raw.decode().expect("Should decode raw data");
+        assert_eq!(decoded_raw, "GGGGGGGG");
+
+        // Create encoded data (2 bits per symbol, so 8 symbols = 2 bytes)
+        let alphabet = lookup_alphabet(&AlphabetType::Dna2bit);
+        let encoded_data = encode_sequence(b"GGGGGGGG", alphabet);
+        assert_eq!(encoded_data.len(), 2, "Encoded 2-bit DNA should be 2 bytes for 8 symbols");
+
+        let record_encoded = SequenceRecord::Full {
+            metadata: SequenceMetadata {
+                name: "encoded_test".to_string(),
+                length: 8,
+                sha512t24u: "test_digest".to_string(),
+                md5: "test_md5".to_string(),
+                alphabet: AlphabetType::Dna2bit,
+                fai: None,
+            },
+            sequence: encoded_data,
+        };
+
+        let decoded_encoded = record_encoded.decode().expect("Should decode encoded data");
+        assert_eq!(decoded_encoded, "GGGGGGGG");
+    }
+
+    #[test]
+    fn test_sequence_collection_iterator() {
+        // Test that SequenceCollection can be iterated over
+        let collection = load_fasta("../tests/data/fasta/base.fa")
+            .expect("Failed to load test FASTA file");
+
+        // Test borrowing iterator (&collection)
+        let mut count = 0;
+        for seq in &collection {
+            assert!(seq.metadata().length > 0, "Sequence should have length");
+            count += 1;
+        }
+        assert_eq!(count, 3, "base.fa should have 3 sequences");
+
+        // Collection should still be usable after borrowing iteration
+        assert_eq!(collection.sequences.len(), 3);
+
+        // Test consuming iterator (collection)
+        let names: Vec<String> = collection
+            .into_iter()
+            .map(|seq| seq.metadata().name.clone())
+            .collect();
+
+        assert_eq!(names.len(), 3);
+        assert!(names.contains(&"chrX".to_string()));
+        assert!(names.contains(&"chr1".to_string()));
+        assert!(names.contains(&"chr2".to_string()));
+    }
+}
diff --git a/gtars-refget/src/encoder.rs b/gtars-refget/src/encoder.rs
index 94f6ddc3..ead77310 100644
--- a/gtars-refget/src/encoder.rs
+++ b/gtars-refget/src/encoder.rs
@@ -80,11 +80,16 @@ impl SequenceEncoder {
 /// is specified by the `bits_per_symbol` parameter. The function returns
 /// a bit-packed vector of bytes containing the encoded sequence.
 ///
+/// **Bit Ordering: MSB-first (Most Significant Bit first)**
+///
+/// Example with 2-bit DNA encoding (A=00, C=01, G=10, T=11):
+/// - Sequence "ACGT" → byte 0x1B (00011011)
+/// - A (00) in bits 7-6, C (01) in bits 5-4, G (10) in bits 3-2, T (11) in bits 1-0
+///
 /// # Arguments
 ///
 /// * `sequence` - The sequence to encode
-/// * `encoding_array` - The encoding array to use
-/// * `bits_per_symbol` - The number of bits used to represent each symbol
+/// * `alphabet` - The alphabet defining the encoding
 ///
 /// # Returns
 ///
diff --git a/gtars-refget/src/fasta.rs b/gtars-refget/src/fasta.rs
index 91d27749..3ca5e1a3 100644
--- a/gtars-refget/src/fasta.rs
+++ b/gtars-refget/src/fasta.rs
@@ -1,16 +1,22 @@
-use anyhow::{Context, Result};
+use anyhow::Result;
 use std::io::BufRead;
 use std::path::Path;
 
 use super::alphabet::{AlphabetGuesser, AlphabetType};
-use super::collection::{SeqColDigestLvl1, SequenceCollection, SequenceMetadata, SequenceRecord};
-
-use gtars_core::utils::get_dynamic_reader;
+use super::collection::{FaiMetadata, SeqColDigestLvl1, SequenceCollection, SequenceMetadata, SequenceRecord};
 
 use md5::Md5;
-use seq_io::fasta::{Reader, Record};
 use sha2::{Digest, Sha512};
 
+/// A lightweight record containing only FAI (FASTA index) metadata for a sequence.
+/// Used by `compute_fai()` for fast FAI-only computation without digest overhead.
+#[derive(Clone, Debug)]
+pub struct FaiRecord {
+    pub name: String,
+    pub length: usize,
+    pub fai: Option<FaiMetadata>,
+}
+
 /// Processes a FASTA file to compute the digests of each sequence in the file.
 ///
 /// This function reads a FASTA file, computes the SHA-512 and MD5 digests for each sequence,
@@ -37,58 +43,520 @@ use sha2::{Digest, Sha512};
 ///
 ///
 pub fn digest_fasta<T: AsRef<Path>>(file_path: T) -> Result<SequenceCollection> {
+    use gtars_core::utils::get_dynamic_reader;
+
+    println!("Processing FASTA file: {}", file_path.as_ref().display());
+
+    // Detect if file is gzipped
+    let is_gzipped = file_path.as_ref().extension()
+        .and_then(|s| s.to_str())
+        .map(|s| s == "gz")
+        .unwrap_or(false);
+
+    // Skip FAI data for gzipped files (can't seek into compressed files)
+    let fai_enabled = !is_gzipped;
+
+    let mut byte_position: u64 = 0;
+
     let file_reader = get_dynamic_reader(file_path.as_ref())?;
-    let mut fasta_reader = Reader::new(file_reader);
+    let mut reader = std::io::BufReader::new(file_reader);
     let mut results = Vec::new();
-    println!("Processing FASTA file: {}", file_path.as_ref().display());
-    while let Some(record) = fasta_reader.next() {
-        // returns a RefRecord object
-        let record = record.with_context(|| {
-            format!(
-                "Failed to read FASTA record from file: {}",
-                file_path.as_ref().display()
-            )
-        })?;
-        let id = record.id().with_context(|| {
-            format!(
-                "FASTA record #{} is missing a sequence ID",
-                results.len() + 1
-            )
-        })?;
-        let mut sha512_hasher = Sha512::new();
-        let mut md5_hasher = Md5::new();
-        let mut length = 0;
-        let mut sequence = Vec::new();
-        let mut alphabet_guesser = AlphabetGuesser::new();
-        for seq_line in record.seq_lines() {
-            let seq_line = seq_line.to_ascii_uppercase();
-            sha512_hasher.update(&seq_line);
-            md5_hasher.update(&seq_line);
-            length += seq_line.len();
-            sequence.extend_from_slice(&seq_line);
-            alphabet_guesser.update(&seq_line);
+    let mut line = String::new();
+
+    let mut current_id: Option<String> = None;
+    let mut current_offset: u64 = 0;
+    let mut current_line_bases: Option<u32> = None;
+    let mut current_line_bytes: Option<u32> = None;
+    let mut sha512_hasher = Sha512::new();
+    let mut md5_hasher = Md5::new();
+    let mut length = 0;
+    let mut alphabet_guesser = AlphabetGuesser::new();
+
+    loop {
+        let bytes_read = reader.read_line(&mut line)?;
+        if bytes_read == 0 {
+            // EOF - finalize the last sequence if any
+            if let Some(id) = current_id.take() {
+                let sha512 = base64_url::encode(&sha512_hasher.finalize_reset()[0..24]);
+                let md5 = format!("{:x}", md5_hasher.finalize_reset());
+                let alphabet = alphabet_guesser.guess();
+
+                let fai = if fai_enabled {
+                    if let (Some(lb), Some(lby)) = (current_line_bases, current_line_bytes) {
+                        Some(super::collection::FaiMetadata {
+                            offset: current_offset,
+                            line_bases: lb,
+                            line_bytes: lby,
+                        })
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
+                let metadata = SequenceMetadata {
+                    name: id.to_string(),
+                    length,
+                    sha512t24u: sha512,
+                    md5,
+                    alphabet,
+                    fai,
+                };
+
+                results.push(SequenceRecord::Stub(metadata));
+            }
+            break;
         }
-        let sha512 = base64_url::encode(&sha512_hasher.finalize_reset()[0..24]);
-        let md5 = format!("{:x}", md5_hasher.finalize_reset());
-        let alphabet = alphabet_guesser.guess();
-
-        // Create SequenceRecord
-        let metadata = SequenceMetadata {
-            name: id.to_string(),
-            length,
-            sha512t24u: sha512,
-            md5,
-            alphabet,
-        };
 
-        results.push(SequenceRecord {
-            metadata,
-            data: None,
-        });
+        if line.starts_with('>') {
+            // Save previous sequence if any
+            if let Some(id) = current_id.take() {
+                let sha512 = base64_url::encode(&sha512_hasher.finalize_reset()[0..24]);
+                let md5 = format!("{:x}", md5_hasher.finalize_reset());
+                let alphabet = alphabet_guesser.guess();
+
+                let fai = if fai_enabled {
+                    if let (Some(lb), Some(lby)) = (current_line_bases, current_line_bytes) {
+                        Some(super::collection::FaiMetadata {
+                            offset: current_offset,
+                            line_bases: lb,
+                            line_bytes: lby,
+                        })
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
+                let metadata = SequenceMetadata {
+                    name: id.to_string(),
+                    length,
+                    sha512t24u: sha512,
+                    md5,
+                    alphabet,
+                    fai,
+                };
+
+                results.push(SequenceRecord::Stub(metadata));
+            }
+
+            // Start new sequence
+            let id = line[1..].trim().to_string();
+            current_id = Some(id);
+
+            // Track position for FAI
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+                current_offset = byte_position;
+            }
+            current_line_bases = None;
+            current_line_bytes = None;
+            sha512_hasher = Sha512::new();
+            md5_hasher = Md5::new();
+            length = 0;
+            alphabet_guesser = AlphabetGuesser::new();
+        } else if current_id.is_some() && !line.trim().is_empty() {
+            // Sequence line
+            let trimmed = line.trim_end();
+            let line_len_bytes = bytes_read as u32;
+            let line_len_bases = trimmed.len() as u32;
+
+            // Record line dimensions from first sequence line
+            if current_line_bases.is_none() {
+                current_line_bases = Some(line_len_bases);
+                current_line_bytes = Some(line_len_bytes);
+            }
+
+            // Update digests and length
+            let seq_upper = trimmed.to_ascii_uppercase();
+            sha512_hasher.update(seq_upper.as_bytes());
+            md5_hasher.update(seq_upper.as_bytes());
+            length += trimmed.len();
+            alphabet_guesser.update(seq_upper.as_bytes());
+
+            // Track position for FAI
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+            }
+        } else {
+            // Track position for empty lines or other content
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+            }
+        }
+
+        line.clear();
     }
 
     // Compute lvl1 digests from the sequence records
-    let metadata_refs: Vec<&SequenceMetadata> = results.iter().map(|r| &r.metadata).collect();
+    let metadata_refs: Vec<&SequenceMetadata> = results.iter().map(|r| r.metadata()).collect();
+    let lvl1 = SeqColDigestLvl1::from_metadata(&metadata_refs);
+
+    // Compute collection digest from lvl1 digests
+    let collection_digest = lvl1.to_digest();
+
+    Ok(SequenceCollection {
+        sequences: results,
+        digest: collection_digest,
+        lvl1,
+        file_path: Some(file_path.as_ref().to_path_buf()),
+    })
+}
+
+/// Computes FAI (FASTA index) metadata for sequences in a FASTA file without computing digests.
+///
+/// This is a lightweight alternative to `digest_fasta()` for cases where only FAI metadata
+/// is needed. It skips the expensive digest computation (SHA512, MD5) and alphabet detection,
+/// making it much faster for FAI-only indexing (similar to `samtools faidx`).
+///
+/// # Arguments
+///
+/// * `file_path` - Path to the FASTA file to index
+///
+/// # Returns
+///
+/// A vector of `FaiRecord` structs containing sequence name, length, and FAI metadata.
+/// For gzipped files, the `fai` field will be `None` since byte offsets are not meaningful
+/// for compressed data.
+///
+/// # Examples
+///
+/// ```
+/// use gtars_refget::fasta::compute_fai;
+///
+/// // Using a test file from the repository
+/// let fai_records = compute_fai("../tests/data/fasta/base.fa").expect("Failed to compute FAI");
+/// assert_eq!(fai_records.len(), 3);
+///
+/// for record in fai_records {
+///     println!("{}: {} bp", record.name, record.length);
+///     if let Some(fai) = record.fai {
+///         println!("  offset: {}, line_bases: {}, line_bytes: {}",
+///                  fai.offset, fai.line_bases, fai.line_bytes);
+///     }
+/// }
+/// ```
+pub fn compute_fai<T: AsRef<Path>>(file_path: T) -> Result<Vec<FaiRecord>> {
+    use gtars_core::utils::get_dynamic_reader;
+
+    // Detect if file is gzipped
+    let is_gzipped = file_path.as_ref().extension()
+        .and_then(|s| s.to_str())
+        .map(|s| s == "gz")
+        .unwrap_or(false);
+
+    // Skip FAI data for gzipped files (can't seek into compressed files)
+    let fai_enabled = !is_gzipped;
+
+    let mut byte_position: u64 = 0;
+
+    let file_reader = get_dynamic_reader(file_path.as_ref())?;
+    let mut reader = std::io::BufReader::new(file_reader);
+    let mut results = Vec::new();
+    let mut line = String::new();
+
+    let mut current_id: Option<String> = None;
+    let mut current_offset: u64 = 0;
+    let mut current_line_bases: Option<u32> = None;
+    let mut current_line_bytes: Option<u32> = None;
+    let mut length = 0;
+
+    loop {
+        let bytes_read = reader.read_line(&mut line)?;
+        if bytes_read == 0 {
+            // EOF - finalize the last sequence if any
+            if let Some(id) = current_id.take() {
+                let fai = if fai_enabled {
+                    if let (Some(lb), Some(lby)) = (current_line_bases, current_line_bytes) {
+                        Some(FaiMetadata {
+                            offset: current_offset,
+                            line_bases: lb,
+                            line_bytes: lby,
+                        })
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
+                results.push(FaiRecord {
+                    name: id,
+                    length,
+                    fai,
+                });
+            }
+            break;
+        }
+
+        if line.starts_with('>') {
+            // Save previous sequence if any
+            if let Some(id) = current_id.take() {
+                let fai = if fai_enabled {
+                    if let (Some(lb), Some(lby)) = (current_line_bases, current_line_bytes) {
+                        Some(FaiMetadata {
+                            offset: current_offset,
+                            line_bases: lb,
+                            line_bytes: lby,
+                        })
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
+                results.push(FaiRecord {
+                    name: id,
+                    length,
+                    fai,
+                });
+            }
+
+            // Start new sequence
+            let id = line[1..].trim().to_string();
+            current_id = Some(id);
+
+            // Track position for FAI
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+                current_offset = byte_position;
+            }
+            current_line_bases = None;
+            current_line_bytes = None;
+            length = 0;
+        } else if current_id.is_some() && !line.trim().is_empty() {
+            // Sequence line
+            let trimmed = line.trim_end();
+            let line_len_bytes = bytes_read as u32;
+            let line_len_bases = trimmed.len() as u32;
+
+            // Record line dimensions from first sequence line
+            if current_line_bases.is_none() {
+                current_line_bases = Some(line_len_bases);
+                current_line_bytes = Some(line_len_bytes);
+            }
+
+            // Update length (no need for uppercase or hashing)
+            length += trimmed.len();
+
+            // Track position for FAI
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+            }
+        } else {
+            // Track position for empty lines or other content
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+            }
+        }
+
+        line.clear();
+    }
+
+    Ok(results)
+}
+
+/// Loads a FASTA file with sequence data in memory (not just metadata).
+///
+/// This function reads a FASTA file and returns a SequenceCollection where each
+/// SequenceRecord contains both metadata AND the actual sequence data. This is useful
+/// when you need direct access to sequences without going through a RefgetStore.
+///
+/// # Arguments
+///
+/// * `file_path` - A path to the FASTA file to be loaded.
+///
+/// # Returns
+///
+/// A SequenceCollection with:
+/// - `metadata`: Complete SequenceMetadata (name, length, digests, alphabet)
+/// - `fai`: FAI metadata if not gzipped
+/// - `data`: Some(Vec<u8>) containing the raw sequence bytes (uppercased)
+///
+/// # Errors
+///
+/// This function will return an error if:
+/// - The file cannot be opened or read
+/// - The FASTA format is invalid or corrupted
+/// - A sequence record is missing an ID
+///
+/// # Examples
+///
+/// ```
+/// use gtars_refget::fasta::load_fasta;
+///
+/// let seqcol = load_fasta("../tests/data/fasta/base.fa").expect("Failed to load FASTA");
+/// for seq_record in &seqcol.sequences {
+///     println!("Sequence: {}", seq_record.metadata().name);
+///     if let Some(data) = seq_record.sequence() {
+///         println!("  Data: {} bytes", data.len());
+///     }
+/// }
+/// ```
+pub fn load_fasta<P: AsRef<Path>>(file_path: P) -> Result<SequenceCollection> {
+    use gtars_core::utils::get_dynamic_reader;
+
+    println!("Loading FASTA file with data: {}", file_path.as_ref().display());
+
+    // Detect if file is gzipped
+    let is_gzipped = file_path.as_ref().extension()
+        .and_then(|s| s.to_str())
+        .map(|s| s == "gz")
+        .unwrap_or(false);
+
+    // Skip FAI data for gzipped files (can't seek into compressed files)
+    let fai_enabled = !is_gzipped;
+
+    let mut byte_position: u64 = 0;
+
+    let file_reader = get_dynamic_reader(file_path.as_ref())?;
+    let mut reader = std::io::BufReader::new(file_reader);
+    let mut results = Vec::new();
+    let mut line = String::new();
+
+    let mut current_id: Option<String> = None;
+    let mut current_offset: u64 = 0;
+    let mut current_line_bases: Option<u32> = None;
+    let mut current_line_bytes: Option<u32> = None;
+    let mut sha512_hasher = Sha512::new();
+    let mut md5_hasher = Md5::new();
+    let mut length = 0;
+    let mut alphabet_guesser = AlphabetGuesser::new();
+    let mut sequence_data = Vec::new(); // Accumulate sequence data
+
+    loop {
+        let bytes_read = reader.read_line(&mut line)?;
+        if bytes_read == 0 {
+            // EOF - finalize the last sequence if any
+            if let Some(id) = current_id.take() {
+                let sha512 = base64_url::encode(&sha512_hasher.finalize_reset()[0..24]);
+                let md5 = format!("{:x}", md5_hasher.finalize_reset());
+                let alphabet = alphabet_guesser.guess();
+
+                let fai = if fai_enabled {
+                    if let (Some(lb), Some(lby)) = (current_line_bases, current_line_bytes) {
+                        Some(super::collection::FaiMetadata {
+                            offset: current_offset,
+                            line_bases: lb,
+                            line_bytes: lby,
+                        })
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
+                let metadata = SequenceMetadata {
+                    name: id.to_string(),
+                    length,
+                    sha512t24u: sha512,
+                    md5,
+                    alphabet,
+                    fai,
+                };
+
+                results.push(SequenceRecord::Full {
+                    metadata,
+                    sequence: sequence_data.clone(),
+                });
+            }
+            break;
+        }
+
+        if line.starts_with('>') {
+            // Save previous sequence if any
+            if let Some(id) = current_id.take() {
+                let sha512 = base64_url::encode(&sha512_hasher.finalize_reset()[0..24]);
+                let md5 = format!("{:x}", md5_hasher.finalize_reset());
+                let alphabet = alphabet_guesser.guess();
+
+                let fai = if fai_enabled {
+                    if let (Some(lb), Some(lby)) = (current_line_bases, current_line_bytes) {
+                        Some(super::collection::FaiMetadata {
+                            offset: current_offset,
+                            line_bases: lb,
+                            line_bytes: lby,
+                        })
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
+                let metadata = SequenceMetadata {
+                    name: id.to_string(),
+                    length,
+                    sha512t24u: sha512,
+                    md5,
+                    alphabet,
+                    fai,
+                };
+
+                results.push(SequenceRecord::Full {
+                    metadata,
+                    sequence: sequence_data.clone(),
+                });
+            }
+
+            // Start new sequence
+            let id = line[1..].trim().to_string();
+            current_id = Some(id);
+
+            // Track position for FAI
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+                current_offset = byte_position;
+            }
+            current_line_bases = None;
+            current_line_bytes = None;
+            sha512_hasher = Sha512::new();
+            md5_hasher = Md5::new();
+            length = 0;
+            alphabet_guesser = AlphabetGuesser::new();
+            sequence_data = Vec::new(); // Reset for new sequence
+        } else if current_id.is_some() && !line.trim().is_empty() {
+            // Sequence line
+            let trimmed = line.trim_end();
+            let line_len_bytes = bytes_read as u32;
+            let line_len_bases = trimmed.len() as u32;
+
+            // Record line dimensions from first sequence line
+            if current_line_bases.is_none() {
+                current_line_bases = Some(line_len_bases);
+                current_line_bytes = Some(line_len_bytes);
+            }
+
+            // Update digests and length
+            let seq_upper = trimmed.to_ascii_uppercase();
+            sha512_hasher.update(seq_upper.as_bytes());
+            md5_hasher.update(seq_upper.as_bytes());
+            length += trimmed.len();
+            alphabet_guesser.update(seq_upper.as_bytes());
+
+            // Accumulate sequence data
+            sequence_data.extend_from_slice(seq_upper.as_bytes());
+
+            // Track position for FAI
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+            }
+        } else {
+            // Track position for empty lines or other content
+            if fai_enabled {
+                byte_position += bytes_read as u64;
+            }
+        }
+
+        line.clear();
+    }
+
+    // Compute lvl1 digests from the sequence records
+    let metadata_refs: Vec<&SequenceMetadata> = results.iter().map(|r| r.metadata()).collect();
     let lvl1 = SeqColDigestLvl1::from_metadata(&metadata_refs);
 
     // Compute collection digest from lvl1 digests
@@ -99,7 +567,6 @@ pub fn digest_fasta<T: AsRef<Path>>(file_path: T) -> Result<SequenceCollection>
         digest: collection_digest,
         lvl1,
         file_path: Some(file_path.as_ref().to_path_buf()),
-        has_data: true,
     })
 }
 
@@ -124,15 +591,15 @@ pub fn read_fasta_refget_file<T: AsRef<Path>>(file_path: T) -> Result<SequenceCo
 
         // Parse header metadata lines
         if line.starts_with("##") {
-            if let Some(stripped) = line.strip_prefix("##")
-                && let Some((key, value)) = stripped.split_once('=')
-            {
-                match key {
-                    "seqcol_digest" => seqcol_digest = value.to_string(),
-                    "names_digest" => names_digest = value.to_string(),
-                    "sequences_digest" => sequences_digest = value.to_string(),
-                    "lengths_digest" => lengths_digest = value.to_string(),
-                    _ => {} // Ignore unknown metadata keys
+            if let Some(stripped) = line.strip_prefix("##") {
+                if let Some((key, value)) = stripped.split_once('=') {
+                    match key {
+                        "seqcol_digest" => seqcol_digest = value.to_string(),
+                        "names_digest" => names_digest = value.to_string(),
+                        "sequences_digest" => sequences_digest = value.to_string(),
+                        "lengths_digest" => lengths_digest = value.to_string(),
+                        _ => {} // Ignore unknown metadata keys
+                    }
                 }
             }
             continue;
@@ -155,12 +622,10 @@ pub fn read_fasta_refget_file<T: AsRef<Path>>(file_path: T) -> Result<SequenceCo
             alphabet: parts[2].parse().unwrap_or(AlphabetType::Unknown),
             sha512t24u: parts[3].to_string(),
             md5: parts[4].to_string(),
+            fai: None,  // FARG files don't store FAI data
         };
 
-        let record = SequenceRecord {
-            metadata: result,
-            data: None,
-        };
+        let record = SequenceRecord::Stub(result);
         results.push(record);
     }
     // If the digests were not found in the file, compute them
@@ -169,7 +634,7 @@ pub fn read_fasta_refget_file<T: AsRef<Path>>(file_path: T) -> Result<SequenceCo
     if (sequences_digest.is_empty() || names_digest.is_empty() || lengths_digest.is_empty())
         && !results.is_empty()
     {
-        let metadata_vec: Vec<&SequenceMetadata> = results.iter().map(|r| &r.metadata).collect();
+        let metadata_vec: Vec<&SequenceMetadata> = results.iter().map(|r| r.metadata()).collect();
         lvl1 = SeqColDigestLvl1::from_metadata(&metadata_vec);
     } else {
         lvl1 = SeqColDigestLvl1 {
@@ -190,7 +655,6 @@ pub fn read_fasta_refget_file<T: AsRef<Path>>(file_path: T) -> Result<SequenceCo
         digest: seqcol_digest,
         lvl1,
         file_path: Some(file_path.as_ref().to_path_buf()),
-        has_data: false,
     })
 }
 
@@ -205,27 +669,46 @@ mod tests {
         let results = seqcol.sequences;
         println!("{:?}", results);
         assert_eq!(results.len(), 3);
-        assert_eq!(results[0].metadata.length, 8);
+        assert_eq!(results[0].metadata().length, 8);
         assert_eq!(
-            results[0].metadata.sha512t24u,
+            results[0].metadata().sha512t24u,
             "iYtREV555dUFKg2_agSJW6suquUyPpMw"
         );
-        assert_eq!(results[0].metadata.md5, "5f63cfaa3ef61f88c9635fb9d18ec945");
-        assert_eq!(results[0].metadata.alphabet, AlphabetType::Dna2bit);
-        assert_eq!(results[1].metadata.length, 4);
+        assert_eq!(results[0].metadata().md5, "5f63cfaa3ef61f88c9635fb9d18ec945");
+        assert_eq!(results[0].metadata().alphabet, AlphabetType::Dna2bit);
+        assert_eq!(results[1].metadata().length, 4);
         assert_eq!(
-            results[1].metadata.sha512t24u,
+            results[1].metadata().sha512t24u,
             "YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj"
         );
-        assert_eq!(results[1].metadata.md5, "31fc6ca291a32fb9df82b85e5f077e31");
-        assert_eq!(results[1].metadata.alphabet, AlphabetType::Dna2bit);
-        assert_eq!(results[2].metadata.length, 4);
+        assert_eq!(results[1].metadata().md5, "31fc6ca291a32fb9df82b85e5f077e31");
+        assert_eq!(results[1].metadata().alphabet, AlphabetType::Dna2bit);
+        assert_eq!(results[2].metadata().length, 4);
         assert_eq!(
-            results[2].metadata.sha512t24u,
+            results[2].metadata().sha512t24u,
             "AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6"
         );
-        assert_eq!(results[2].metadata.md5, "92c6a56c9e9459d8a42b96f7884710bc");
-        assert_eq!(results[2].metadata.alphabet, AlphabetType::Dna2bit);
+        assert_eq!(results[2].metadata().md5, "92c6a56c9e9459d8a42b96f7884710bc");
+        assert_eq!(results[2].metadata().alphabet, AlphabetType::Dna2bit);
+
+        // Test FAI metadata
+        assert!(results[0].metadata().fai.is_some());
+        let fai0 = results[0].metadata().fai.as_ref().unwrap();
+        assert_eq!(fai0.offset, 6);  // After ">chrX\n"
+        assert_eq!(fai0.line_bases, 8);
+        assert_eq!(fai0.line_bytes, 9);  // Includes \n
+
+        assert!(results[1].metadata().fai.is_some());
+        let fai1 = results[1].metadata().fai.as_ref().unwrap();
+        assert_eq!(fai1.offset, 21);  // After ">chr1\n"
+        assert_eq!(fai1.line_bases, 4);
+        assert_eq!(fai1.line_bytes, 5);
+
+        assert!(results[2].metadata().fai.is_some());
+        let fai2 = results[2].metadata().fai.as_ref().unwrap();
+        assert_eq!(fai2.offset, 32);  // After ">chr2\n"
+        assert_eq!(fai2.line_bases, 4);
+        assert_eq!(fai2.line_bytes, 5);
     }
 
     #[test]
@@ -234,13 +717,13 @@ mod tests {
             digest_fasta("../tests/data/fasta/base.fa.gz").expect("Can't open test fasta file");
         let results = seqcol.sequences;
         println!("{:?}", results);
-        assert_eq!(results[0].metadata.length, 8);
+        assert_eq!(results[0].metadata().length, 8);
         assert_eq!(
-            results[0].metadata.sha512t24u,
+            results[0].metadata().sha512t24u,
             "iYtREV555dUFKg2_agSJW6suquUyPpMw"
         );
-        assert_eq!(results[0].metadata.md5, "5f63cfaa3ef61f88c9635fb9d18ec945");
-        assert_eq!(results[0].metadata.alphabet, AlphabetType::Dna2bit);
+        assert_eq!(results[0].metadata().md5, "5f63cfaa3ef61f88c9635fb9d18ec945");
+        assert_eq!(results[0].metadata().alphabet, AlphabetType::Dna2bit);
     }
 
     #[test]
@@ -253,7 +736,7 @@ mod tests {
     fn digests_fa_to_farg() {
         let seqcol = SequenceCollection::from_path_no_cache("../tests/data/fasta/base.fa")
             .expect("Failed to create SequenceCollection from FASTA file");
-        seqcol.to_farg().expect("Failed to write farg file");
+        seqcol.write_farg().expect("Failed to write farg file");
 
         let loaded_seqcol = read_fasta_refget_file("../tests/data/fasta/base.farg")
             .expect("Failed to read refget file");
@@ -261,11 +744,11 @@ mod tests {
         println!("Loaded SequenceCollection: {}", loaded_seqcol);
         // Test round-trip integrity
         for (original, read) in seqcol.sequences.iter().zip(loaded_seqcol.sequences.iter()) {
-            assert_eq!(original.metadata.name, read.metadata.name);
-            assert_eq!(original.metadata.length, read.metadata.length);
-            assert_eq!(original.metadata.sha512t24u, read.metadata.sha512t24u);
-            assert_eq!(original.metadata.md5, read.metadata.md5);
-            assert_eq!(original.metadata.alphabet, read.metadata.alphabet);
+            assert_eq!(original.metadata().name, read.metadata().name);
+            assert_eq!(original.metadata().length, read.metadata().length);
+            assert_eq!(original.metadata().sha512t24u, read.metadata().sha512t24u);
+            assert_eq!(original.metadata().md5, read.metadata().md5);
+            assert_eq!(original.metadata().alphabet, read.metadata().alphabet);
         }
     }
 
@@ -289,4 +772,434 @@ mod tests {
             "cGRMZIb3AVgkcAfNv39RN7hnT5Chk7RX"
         );
     }
+
+    // Helper function to parse samtools FAI file
+    fn parse_fai_file(fai_path: &str) -> Vec<(String, u64, u64, u32, u32)> {
+        use std::fs::File;
+        use std::io::{BufRead, BufReader};
+
+        let file = File::open(fai_path).expect("Failed to open FAI file");
+        let reader = BufReader::new(file);
+        let mut results = Vec::new();
+
+        for line in reader.lines() {
+            let line = line.expect("Failed to read line");
+            let parts: Vec<&str> = line.split('\t').collect();
+            if parts.len() >= 5 {
+                let name = parts[0].to_string();
+                let length = parts[1].parse::<u64>().expect("Failed to parse length");
+                let offset = parts[2].parse::<u64>().expect("Failed to parse offset");
+                let line_bases = parts[3].parse::<u32>().expect("Failed to parse line_bases");
+                let line_bytes = parts[4].parse::<u32>().expect("Failed to parse line_bytes");
+                results.push((name, length, offset, line_bases, line_bytes));
+            }
+        }
+        results
+    }
+
+    #[test]
+    fn fai_wrapped_40() {
+        let seqcol = digest_fasta("../tests/data/fasta/wrapped_40.fa")
+            .expect("Failed to digest FASTA file");
+        let fai_data = parse_fai_file("../tests/data/fasta/wrapped_40.fa.fai");
+
+        assert_eq!(seqcol.sequences.len(), fai_data.len());
+
+        for (seq, (name, length, offset, line_bases, line_bytes)) in
+            seqcol.sequences.iter().zip(fai_data.iter())
+        {
+            assert_eq!(seq.metadata().name, *name);
+            assert_eq!(seq.metadata().length, *length as usize);
+            assert!(seq.metadata().fai.is_some(), "FAI data should be present");
+            let fai = seq.metadata().fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn fai_wrapped_20() {
+        let seqcol = digest_fasta("../tests/data/fasta/wrapped_20.fa")
+            .expect("Failed to digest FASTA file");
+        let fai_data = parse_fai_file("../tests/data/fasta/wrapped_20.fa.fai");
+
+        assert_eq!(seqcol.sequences.len(), fai_data.len());
+
+        for (seq, (name, length, offset, line_bases, line_bytes)) in
+            seqcol.sequences.iter().zip(fai_data.iter())
+        {
+            assert_eq!(seq.metadata().name, *name);
+            assert_eq!(seq.metadata().length, *length as usize);
+            assert!(seq.metadata().fai.is_some(), "FAI data should be present");
+            let fai = seq.metadata().fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn fai_unwrapped() {
+        let seqcol = digest_fasta("../tests/data/fasta/unwrapped.fa")
+            .expect("Failed to digest FASTA file");
+        let fai_data = parse_fai_file("../tests/data/fasta/unwrapped.fa.fai");
+
+        assert_eq!(seqcol.sequences.len(), fai_data.len());
+
+        for (seq, (name, length, offset, line_bases, line_bytes)) in
+            seqcol.sequences.iter().zip(fai_data.iter())
+        {
+            assert_eq!(seq.metadata().name, *name);
+            assert_eq!(seq.metadata().length, *length as usize);
+            assert!(seq.metadata().fai.is_some(), "FAI data should be present");
+            let fai = seq.metadata().fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn fai_base_mixed_lengths() {
+        // base.fa has mixed sequence lengths (8bp, 4bp, 4bp) on single lines
+        let seqcol = digest_fasta("../tests/data/fasta/base.fa")
+            .expect("Failed to digest FASTA file");
+        let fai_data = parse_fai_file("../tests/data/fasta/base.fa.fai");
+
+        assert_eq!(seqcol.sequences.len(), fai_data.len());
+        assert_eq!(seqcol.sequences.len(), 3, "base.fa should have 3 sequences");
+
+        for (seq, (name, length, offset, line_bases, line_bytes)) in
+            seqcol.sequences.iter().zip(fai_data.iter())
+        {
+            assert_eq!(seq.metadata().name, *name);
+            assert_eq!(seq.metadata().length, *length as usize);
+            assert!(seq.metadata().fai.is_some(), "FAI data should be present");
+            let fai = seq.metadata().fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn fai_gzipped_returns_none() {
+        let seqcol = digest_fasta("../tests/data/fasta/base.fa.gz")
+            .expect("Failed to digest gzipped FASTA file");
+
+        // All sequences should have None for FAI data
+        for seq in seqcol.sequences.iter() {
+            assert!(seq.metadata().fai.is_none(),
+                "Gzipped files should not have FAI data for sequence {}",
+                seq.metadata().name);
+        }
+    }
+
+    #[test]
+    fn fai_crlf_endings() {
+        let seqcol = digest_fasta("../tests/data/fasta/crlf_endings.fa")
+            .expect("Failed to digest FASTA file with CRLF endings");
+        let fai_data = parse_fai_file("../tests/data/fasta/crlf_endings.fa.fai");
+
+        assert_eq!(seqcol.sequences.len(), fai_data.len());
+
+        for (seq, (name, length, offset, line_bases, line_bytes)) in
+            seqcol.sequences.iter().zip(fai_data.iter())
+        {
+            assert_eq!(seq.metadata().name, *name);
+            assert_eq!(seq.metadata().length, *length as usize);
+            assert!(seq.metadata().fai.is_some(), "FAI data should be present");
+            let fai = seq.metadata().fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {} (should be +2 for CRLF)", name);
+            // Verify CRLF: line_bytes should be line_bases + 2
+            assert_eq!(fai.line_bytes, fai.line_bases + 2,
+                "CRLF files should have line_bytes = line_bases + 2 for {}",
+                name);
+        }
+    }
+
+    #[test]
+    fn fai_same_sequence_different_wrapping() {
+        // Test that wrapped_40, wrapped_20, and unwrapped all contain identical sequences
+        // despite different line wrapping styles
+        let seqcol_40 = digest_fasta("../tests/data/fasta/wrapped_40.fa")
+            .expect("Failed to digest wrapped_40.fa");
+        let seqcol_20 = digest_fasta("../tests/data/fasta/wrapped_20.fa")
+            .expect("Failed to digest wrapped_20.fa");
+        let seqcol_unwrapped = digest_fasta("../tests/data/fasta/unwrapped.fa")
+            .expect("Failed to digest unwrapped.fa");
+
+        // All should have same number of sequences
+        assert_eq!(seqcol_40.sequences.len(), seqcol_20.sequences.len());
+        assert_eq!(seqcol_40.sequences.len(), seqcol_unwrapped.sequences.len());
+
+        // Compare each sequence across all three files
+        for ((seq40, seq20), seq_unwrap) in seqcol_40.sequences.iter()
+            .zip(seqcol_20.sequences.iter())
+            .zip(seqcol_unwrapped.sequences.iter())
+        {
+            // Same name
+            assert_eq!(seq40.metadata().name, seq20.metadata().name);
+            assert_eq!(seq40.metadata().name, seq_unwrap.metadata().name);
+
+            // Same length
+            assert_eq!(seq40.metadata().length, seq20.metadata().length,
+                "Length mismatch for {}", seq40.metadata().name);
+            assert_eq!(seq40.metadata().length, seq_unwrap.metadata().length,
+                "Length mismatch for {}", seq40.metadata().name);
+
+            // Same digests (proves sequences are identical)
+            assert_eq!(seq40.metadata().sha512t24u, seq20.metadata().sha512t24u,
+                "SHA512 digest mismatch for {} - sequences not identical",
+                seq40.metadata().name);
+            assert_eq!(seq40.metadata().sha512t24u, seq_unwrap.metadata().sha512t24u,
+                "SHA512 digest mismatch for {} - sequences not identical",
+                seq40.metadata().name);
+            assert_eq!(seq40.metadata().md5, seq20.metadata().md5,
+                "MD5 digest mismatch for {} - sequences not identical",
+                seq40.metadata().name);
+            assert_eq!(seq40.metadata().md5, seq_unwrap.metadata().md5,
+                "MD5 digest mismatch for {} - sequences not identical",
+                seq40.metadata().name);
+
+            // But different FAI data (different wrapping)
+            assert!(seq40.metadata().fai.is_some() && seq20.metadata().fai.is_some() && seq_unwrap.metadata().fai.is_some());
+            let fai40 = seq40.metadata().fai.as_ref().unwrap();
+            let fai20 = seq20.metadata().fai.as_ref().unwrap();
+            let fai_unwrap = seq_unwrap.metadata().fai.as_ref().unwrap();
+
+            // Different line_bases due to different wrapping (for multi-line sequences)
+            // Note: short sequences like chr3 that fit on one line may have same line_bases
+            // across different wrapping styles, which is correct behavior
+            if seq40.metadata().length > 40 {
+                // Long enough to require wrapping in wrapped_40
+                assert_ne!(fai40.line_bases, fai20.line_bases,
+                    "wrapped_40 and wrapped_20 should have different line_bases for {}",
+                    seq40.metadata().name);
+            }
+            if seq40.metadata().length > 20 {
+                // Long enough to require wrapping in wrapped_20
+                assert_ne!(fai20.line_bases, fai_unwrap.line_bases,
+                    "wrapped_20 and unwrapped should have different line_bases for {}",
+                    seq40.metadata().name);
+            }
+        }
+    }
+
+    // Tests for compute_fai() function
+
+    #[test]
+    fn test_compute_fai_wrapped_40() {
+        let fai_records = compute_fai("../tests/data/fasta/wrapped_40.fa")
+            .expect("Failed to compute FAI");
+        let fai_data = parse_fai_file("../tests/data/fasta/wrapped_40.fa.fai");
+
+        assert_eq!(fai_records.len(), fai_data.len());
+
+        for (record, (name, length, offset, line_bases, line_bytes)) in
+            fai_records.iter().zip(fai_data.iter())
+        {
+            assert_eq!(record.name, *name);
+            assert_eq!(record.length, *length as usize);
+            assert!(record.fai.is_some(), "FAI data should be present");
+            let fai = record.fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn test_compute_fai_wrapped_20() {
+        let fai_records = compute_fai("../tests/data/fasta/wrapped_20.fa")
+            .expect("Failed to compute FAI");
+        let fai_data = parse_fai_file("../tests/data/fasta/wrapped_20.fa.fai");
+
+        assert_eq!(fai_records.len(), fai_data.len());
+
+        for (record, (name, length, offset, line_bases, line_bytes)) in
+            fai_records.iter().zip(fai_data.iter())
+        {
+            assert_eq!(record.name, *name);
+            assert_eq!(record.length, *length as usize);
+            assert!(record.fai.is_some(), "FAI data should be present");
+            let fai = record.fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn test_compute_fai_unwrapped() {
+        let fai_records = compute_fai("../tests/data/fasta/unwrapped.fa")
+            .expect("Failed to compute FAI");
+        let fai_data = parse_fai_file("../tests/data/fasta/unwrapped.fa.fai");
+
+        assert_eq!(fai_records.len(), fai_data.len());
+
+        for (record, (name, length, offset, line_bases, line_bytes)) in
+            fai_records.iter().zip(fai_data.iter())
+        {
+            assert_eq!(record.name, *name);
+            assert_eq!(record.length, *length as usize);
+            assert!(record.fai.is_some(), "FAI data should be present");
+            let fai = record.fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn test_compute_fai_base() {
+        let fai_records = compute_fai("../tests/data/fasta/base.fa")
+            .expect("Failed to compute FAI");
+        let fai_data = parse_fai_file("../tests/data/fasta/base.fa.fai");
+
+        assert_eq!(fai_records.len(), fai_data.len());
+        assert_eq!(fai_records.len(), 3, "base.fa should have 3 sequences");
+
+        for (record, (name, length, offset, line_bases, line_bytes)) in
+            fai_records.iter().zip(fai_data.iter())
+        {
+            assert_eq!(record.name, *name);
+            assert_eq!(record.length, *length as usize);
+            assert!(record.fai.is_some(), "FAI data should be present");
+            let fai = record.fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {}", name);
+        }
+    }
+
+    #[test]
+    fn test_compute_fai_crlf() {
+        let fai_records = compute_fai("../tests/data/fasta/crlf_endings.fa")
+            .expect("Failed to compute FAI");
+        let fai_data = parse_fai_file("../tests/data/fasta/crlf_endings.fa.fai");
+
+        assert_eq!(fai_records.len(), fai_data.len());
+
+        for (record, (name, length, offset, line_bases, line_bytes)) in
+            fai_records.iter().zip(fai_data.iter())
+        {
+            assert_eq!(record.name, *name);
+            assert_eq!(record.length, *length as usize);
+            assert!(record.fai.is_some(), "FAI data should be present");
+            let fai = record.fai.as_ref().unwrap();
+            assert_eq!(fai.offset, *offset, "Offset mismatch for {}", name);
+            assert_eq!(fai.line_bases, *line_bases, "Line bases mismatch for {}", name);
+            assert_eq!(fai.line_bytes, *line_bytes, "Line bytes mismatch for {} (should be +2 for CRLF)", name);
+            // Verify CRLF: line_bytes should be line_bases + 2
+            assert_eq!(fai.line_bytes, fai.line_bases + 2,
+                "CRLF files should have line_bytes = line_bases + 2 for {}",
+                name);
+        }
+    }
+
+    #[test]
+    fn test_compute_fai_gzipped() {
+        let fai_records = compute_fai("../tests/data/fasta/base.fa.gz")
+            .expect("Failed to compute FAI for gzipped file");
+
+        // All sequences should have None for FAI data
+        for record in fai_records.iter() {
+            assert!(record.fai.is_none(),
+                "Gzipped files should not have FAI data for sequence {}",
+                record.name);
+        }
+    }
+
+    #[test]
+    fn test_load_fasta_with_data() {
+        let seqcol = load_fasta("../tests/data/fasta/base.fa")
+            .expect("Failed to load FASTA with data");
+
+        assert_eq!(seqcol.sequences.len(), 3);
+
+        // Check first sequence (chrX)
+        let seq0 = &seqcol.sequences[0];
+        assert_eq!(seq0.metadata().name, "chrX");
+        assert_eq!(seq0.metadata().length, 8);
+        assert!(seq0.has_data(), "Sequence should have data");
+
+        if let Some(data) = seq0.sequence() {
+            assert_eq!(data.len(), 8);
+            assert_eq!(std::str::from_utf8(data).unwrap(), "TTGGGGAA");
+        }
+
+        // Check second sequence (chr1)
+        let seq1 = &seqcol.sequences[1];
+        assert_eq!(seq1.metadata().name, "chr1");
+        assert_eq!(seq1.metadata().length, 4);
+        assert!(seq1.has_data(), "Sequence should have data");
+
+        if let Some(data) = seq1.sequence() {
+            assert_eq!(data.len(), 4);
+            assert_eq!(std::str::from_utf8(data).unwrap(), "GGAA");
+        }
+
+        // Check third sequence (chr2)
+        let seq2 = &seqcol.sequences[2];
+        assert_eq!(seq2.metadata().name, "chr2");
+        assert_eq!(seq2.metadata().length, 4);
+        assert!(seq2.has_data(), "Sequence should have data");
+
+        if let Some(data) = seq2.sequence() {
+            assert_eq!(data.len(), 4);
+            assert_eq!(std::str::from_utf8(data).unwrap(), "GCGC");
+        }
+
+        // Verify digests match digest_fasta() output
+        let digest_seqcol = digest_fasta("../tests/data/fasta/base.fa")
+            .expect("Failed to digest FASTA");
+
+        for (loaded, digested) in seqcol.sequences.iter().zip(digest_seqcol.sequences.iter()) {
+            assert_eq!(loaded.metadata().name, digested.metadata().name);
+            assert_eq!(loaded.metadata().length, digested.metadata().length);
+            assert_eq!(loaded.metadata().sha512t24u, digested.metadata().sha512t24u);
+            assert_eq!(loaded.metadata().md5, digested.metadata().md5);
+            assert_eq!(loaded.metadata().alphabet, digested.metadata().alphabet);
+        }
+    }
+
+    #[test]
+    fn test_load_fasta_wrapped() {
+        let seqcol = load_fasta("../tests/data/fasta/wrapped_40.fa")
+            .expect("Failed to load wrapped FASTA");
+
+        // Verify all sequences have data
+        for seq in &seqcol.sequences {
+            assert!(seq.has_data(), "Sequence {} should have data", seq.metadata().name);
+
+            if let Some(data) = seq.sequence() {
+                assert_eq!(data.len(), seq.metadata().length,
+                    "Data length mismatch for {}", seq.metadata().name);
+
+                // Verify data is uppercase
+                let data_str = std::str::from_utf8(data).unwrap();
+                assert_eq!(data_str, data_str.to_uppercase(),
+                    "Sequence data should be uppercased");
+            }
+        }
+    }
+
+    #[test]
+    fn test_load_fasta_gzipped() {
+        let seqcol = load_fasta("../tests/data/fasta/base.fa.gz")
+            .expect("Failed to load gzipped FASTA");
+
+        // Should still load data from gzipped files
+        assert_eq!(seqcol.sequences.len(), 3);
+        assert!(seqcol.sequences[0].has_data(), "Should have sequence data");
+
+        for seq in &seqcol.sequences {
+            assert!(seq.has_data(), "Gzipped file should still have sequence data");
+            assert!(seq.metadata().fai.is_none(), "Gzipped file should not have FAI metadata");
+        }
+    }
 }
diff --git a/gtars-refget/src/lib.rs b/gtars-refget/src/lib.rs
index 3d0e0245..d6448083 100644
--- a/gtars-refget/src/lib.rs
+++ b/gtars-refget/src/lib.rs
@@ -94,16 +94,17 @@ mod tests {
         // Create a new sequence store, and dd sequences to the store
         println!("Adding sequences from FASTA file...");
         let start = Instant::now();
-        let mut store = GlobalRefgetStore::new(StorageMode::Encoded);
-        store.import_fasta(&fasta_path).unwrap();
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&fasta_path).unwrap();
         let duration = start.elapsed();
         println!("⏱️  Time taken to load: {:.2?}", duration);
 
-        let mut store2 = GlobalRefgetStore::new(StorageMode::Raw);
-        store2.import_fasta(&fasta_path).unwrap();
+        let mut store2 = GlobalRefgetStore::in_memory();
+        store2.disable_encoding();  // Switch to Raw mode
+        store2.add_sequence_collection_from_fasta(&fasta_path).unwrap();
 
         // Get list of sequences
-        let sequences = store.list_sequence_digests();
+        let sequences: Vec<_> = store.sequence_digests().collect();
         assert!(!sequences.is_empty(), "No sequences found in the store");
 
         // Look up the first sequence by digest
@@ -147,16 +148,16 @@ mod tests {
         let temp_dir = tempdir().expect("Failed to create temporary directory");
         let temp_path = temp_dir.path();
         // Create a new sequence store
-        let mut store = GlobalRefgetStore::new(StorageMode::Encoded);
+        let mut store = GlobalRefgetStore::in_memory();
         // let fasta_path = "../tests/data/subset.fa.gz";
         let fasta_path = "../tests/data/fasta/base.fa.gz";
         let temp_fasta = temp_path.join("base.fa.gz");
         std::fs::copy(fasta_path, &temp_fasta).expect("Failed to copy base.fa.gz to tempdir");
 
         // Add sequences to the store
-        store.import_fasta(temp_fasta).unwrap();
+        store.add_sequence_collection_from_fasta(temp_fasta).unwrap();
         println!("Listing sequences in the store...");
-        // let sequences = store.list_sequence_digests();
+        // let sequences = store.sequence_digests();
         // let digest = &sequences[0];
         // let digest_str = String::from_utf8(digest.to_vec()).expect("Invalid ASCII data");
         // let digest = "Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO";  // from subset.fa.gz
diff --git a/gtars-refget/src/store.rs b/gtars-refget/src/store.rs
index 900a2eb3..5b07a669 100644
--- a/gtars-refget/src/store.rs
+++ b/gtars-refget/src/store.rs
@@ -1,23 +1,26 @@
 use super::alphabet::{AlphabetType, lookup_alphabet};
 use seq_io::fasta::{Reader, Record};
 use std::collections::HashMap;
+use std::ffi::OsStr;
 use std::fmt::{Display, Formatter};
 use std::path::{Path, PathBuf};
+use std::time::Instant;
 
 use super::encoder::SequenceEncoder;
-use super::encoder::decode_substring_from_bytes;
+use super::encoder::{decode_substring_from_bytes, decode_string_from_bytes, encode_sequence};
 use crate::fasta::read_fasta_refget_file;
 use crate::hashkeyable::HashKeyable;
 use anyhow::anyhow;
 use anyhow::{Context, Result};
 use chrono::Utc;
 use flate2::read::GzDecoder;
+use flate2::write::GzEncoder;
+use flate2::Compression;
 use gtars_core::utils::{get_dynamic_reader, get_file_info, parse_bedlike_file};
-use memmap2::Mmap;
 use serde::{Deserialize, Serialize};
-use std::fs::{self, File, OpenOptions, create_dir_all};
+use std::fs::{self, File, create_dir_all};
 use std::io::{BufRead, BufReader, Read, Write};
-use std::{io, str};
+use std::str;
 // Import the HashKeyable trait for converting types to a 32-byte key
 
 // Import collection types
@@ -26,9 +29,10 @@ use super::collection::{SequenceCollection, SequenceMetadata, SequenceRecord};
 // const DEFAULT_COLLECTION_ID: [u8; 32] = [0u8; 32]; // Default collection ID for the name lookup table
 
 const DEFAULT_COLLECTION_ID: &str = "DEFAULT_REFGET_SEQUENCE_COLLECTION"; // Default collection ID for the name lookup table
+const DEFAULT_SEQDATA_PATH_TEMPLATE: &str = "sequences/%s2/%s.seq"; // Default template for sequence file paths
 
 /// Enum storing whether sequences will be stored in Raw or Encoded form
-#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq)]
 pub enum StorageMode {
     Raw,
     Encoded,
@@ -60,6 +64,14 @@ pub struct GlobalRefgetStore {
     collections: HashMap<[u8; 32], SequenceCollection>,
     /// Storage strategy for sequences
     mode: StorageMode,
+    /// Where the store lives on disk (local store or cache directory)
+    local_path: Option<PathBuf>,
+    /// Where to pull sequences from (if remote-backed)
+    remote_source: Option<String>,
+    /// Template for sequence file paths (e.g., "sequences/%s2/%s.seq")
+    seqdata_path_template: Option<String>,
+    /// Whether to cache fetched remote files to disk
+    cache_to_disk: bool,
 }
 
 /// Metadata for the entire store.
@@ -80,11 +92,11 @@ struct StoreMetadata {
     created_at: String,
 }
 
-pub struct GetSeqsBedFileIter<'a, K>
+pub struct SubstringsFromRegions<'a, K>
 where
     K: AsRef<[u8]>,
 {
-    store: &'a GlobalRefgetStore,
+    store: &'a mut GlobalRefgetStore,
     reader: BufReader<Box<dyn Read>>,
     collection_digest: K,
     previous_parsed_chr: String,
@@ -92,7 +104,7 @@ where
     line_num: usize,
 }
 
-impl<K> Iterator for GetSeqsBedFileIter<'_, K>
+impl<K> Iterator for SubstringsFromRegions<'_, K>
 where
     K: AsRef<[u8]>,
 {
@@ -153,7 +165,7 @@ where
                 }
             };
 
-            self.current_seq_digest = result.metadata.sha512t24u.clone();
+            self.current_seq_digest = result.metadata().sha512t24u.clone();
         }
 
         let retrieved_substring = match self.store.get_substring(
@@ -185,7 +197,8 @@ where
 
 impl GlobalRefgetStore {
     /// Generic constructor. Creates a new, empty `GlobalRefgetStore`.
-    pub fn new(mode: StorageMode) -> Self {
+    /// This is a private helper - use `on_disk()` or `in_memory()` instead.
+    fn new(mode: StorageMode) -> Self {
         // Initialize the name lookup with a default collection
         let mut name_lookup = HashMap::new();
         name_lookup.insert(DEFAULT_COLLECTION_ID.to_key(), HashMap::new());
@@ -196,18 +209,147 @@ impl GlobalRefgetStore {
             name_lookup,
             collections: HashMap::new(),
             mode,
+            local_path: None,
+            remote_source: None,
+            seqdata_path_template: None,
+            cache_to_disk: false,  // Default to false - users should explicitly choose
         }
     }
 
+    /// Create a disk-backed RefgetStore
+    ///
+    /// Sequences are written to disk immediately and loaded on-demand (lazy loading).
+    /// Only metadata is kept in memory.
+    ///
+    /// # Arguments
+    /// * `cache_path` - Directory for storing sequences and metadata
+    /// * `mode` - Storage mode (Raw or Encoded)
+    ///
+    /// # Returns
+    /// Result with a configured disk-backed store
+    ///
+    /// # Example
+    /// ```ignore
+    /// let store = GlobalRefgetStore::on_disk("/data/store")?;
+    /// store.add_sequence_collection_from_fasta("genome.fa")?;
+    /// ```
+    pub fn on_disk<P: AsRef<Path>>(cache_path: P) -> Result<Self> {
+        let cache_path = cache_path.as_ref();
+        let index_path = cache_path.join("index.json");
+
+        if index_path.exists() {
+            // Load existing store
+            Self::load_local(cache_path)
+        } else {
+            // Create new store with default Encoded mode
+            let mode = StorageMode::Encoded;
+            create_dir_all(cache_path)?;
+
+            // Use private new() helper
+            let mut store = Self::new(mode);
+            store.local_path = Some(cache_path.to_path_buf());
+            store.seqdata_path_template = Some(DEFAULT_SEQDATA_PATH_TEMPLATE.to_string());
+            store.cache_to_disk = true;  // Always true for on_disk
+
+            // Create directory structure
+            create_dir_all(cache_path.join("sequences"))?;
+            create_dir_all(cache_path.join("collections"))?;
+
+            Ok(store)
+        }
+    }
+
+    /// Create an in-memory RefgetStore
+    ///
+    /// All sequences kept in RAM for fast access.
+    /// Defaults to Encoded storage mode (2-bit packing for space efficiency).
+    /// Use set_mode() to change storage mode after creation.
+    ///
+    /// # Example
+    /// ```ignore
+    /// let store = GlobalRefgetStore::in_memory();
+    /// store.add_sequence_collection_from_fasta("genome.fa")?;
+    /// ```
+    pub fn in_memory() -> Self {
+        Self::new(StorageMode::Encoded)
+    }
+
+    /// Change the storage mode, re-encoding/decoding existing sequences as needed.
+    ///
+    /// When switching from Raw to Encoded:
+    /// - All Full sequences in memory are encoded (2-bit packed)
+    ///
+    /// When switching from Encoded to Raw:
+    /// - All Full sequences in memory are decoded back to raw bytes
+    ///
+    /// Note: Stub sequences (lazy-loaded from disk) are not affected.
+    /// They will be loaded in the NEW mode when accessed.
+    ///
+    /// # Arguments
+    /// * `new_mode` - The storage mode to switch to
+    pub fn set_mode(&mut self, new_mode: StorageMode) {
+        if self.mode == new_mode {
+            return;  // No change needed
+        }
+
+        // Re-encode/decode all Full sequences in memory
+        for record in self.sequence_store.values_mut() {
+            match record {
+                SequenceRecord::Full { metadata, sequence } => {
+                    match (self.mode, new_mode) {
+                        (StorageMode::Raw, StorageMode::Encoded) => {
+                            // Encode: raw bytes -> 2-bit packed
+                            let alphabet = lookup_alphabet(&metadata.alphabet);
+                            *sequence = encode_sequence(&*sequence, alphabet);
+                        }
+                        (StorageMode::Encoded, StorageMode::Raw) => {
+                            // Decode: 2-bit packed -> raw bytes
+                            let alphabet = lookup_alphabet(&metadata.alphabet);
+                            *sequence = decode_string_from_bytes(
+                                &*sequence,
+                                metadata.length,
+                                alphabet
+                            );
+                        }
+                        _ => {}  // Same mode, no conversion needed
+                    }
+                }
+                SequenceRecord::Stub(_) => {
+                    // Stubs don't hold sequence data, nothing to convert
+                }
+            }
+        }
+
+        self.mode = new_mode;
+    }
+
+    /// Enable 2-bit encoding for space efficiency.
+    /// Re-encodes any existing Raw sequences in memory.
+    pub fn enable_encoding(&mut self) {
+        self.set_mode(StorageMode::Encoded);
+    }
+
+    /// Disable encoding, use raw byte storage.
+    /// Decodes any existing Encoded sequences in memory.
+    pub fn disable_encoding(&mut self) {
+        self.set_mode(StorageMode::Raw);
+    }
+
     /// Adds a sequence to the Store
     /// Ensure that it is added to the appropriate collection.
     /// If no collection is specified, it will be added to the default collection.
+    ///
+    /// # Arguments
+    /// * `sequence_record` - The sequence to add
+    /// * `collection_digest` - Collection to add to (or None for default)
+    /// * `force` - If true, overwrite existing sequences. If false, skip duplicates.
     // Using Into here  instead of the Option direction allows us to accept
     // either None or [u8; 32], without having to wrap it in Some().
     pub fn add_sequence<T: Into<Option<[u8; 32]>>>(
         &mut self,
         sequence_record: SequenceRecord,
         collection_digest: T,
+        force: bool,
     ) -> Result<()> {
         // Ensure collection exists; otherwise use the default collection
         let collection_digest = collection_digest
@@ -217,31 +359,72 @@ impl GlobalRefgetStore {
             anyhow::anyhow!("Collection not found for digest: {:?}", collection_digest)
         })?;
 
+        // Get metadata from the record (works for both Stub and Full variants)
+        let metadata = sequence_record.metadata();
+
         // Add to name lookup for the collection
         self.name_lookup
             .entry(collection_digest)
             .or_default()
             .insert(
-                sequence_record.metadata.name.clone(),
-                sequence_record.metadata.sha512t24u.to_key(),
+                metadata.name.clone(),
+                metadata.sha512t24u.to_key(),
             );
 
         // Finally, add SequenceRecord to store (consuming the object)
-        self.add_sequence_record(sequence_record)?;
+        self.add_sequence_record(sequence_record, force)?;
 
         Ok(())
     }
 
     /// Adds a collection, and all sequences in it, to the store.
+    ///
+    /// Skips collections and sequences that already exist.
+    /// Use `add_sequence_collection_force()` to overwrite existing data.
+    ///
+    /// # Arguments
+    /// * `collection` - The sequence collection to add
     pub fn add_sequence_collection(&mut self, collection: SequenceCollection) -> Result<()> {
+        self.add_sequence_collection_internal(collection, false)
+    }
+
+    /// Adds a collection, and all sequences in it, to the store, overwriting existing data.
+    ///
+    /// Forces overwrite of collections and sequences that already exist.
+    /// Use `add_sequence_collection()` to skip duplicates (safer default).
+    ///
+    /// # Arguments
+    /// * `collection` - The sequence collection to add
+    pub fn add_sequence_collection_force(&mut self, collection: SequenceCollection) -> Result<()> {
+        self.add_sequence_collection_internal(collection, true)
+    }
+
+    /// Internal implementation for adding a sequence collection.
+    fn add_sequence_collection_internal(&mut self, collection: SequenceCollection, force: bool) -> Result<()> {
         let coll_digest = collection.digest.to_key();
 
+        // Check if collection already exists
+        if !force && self.collections.contains_key(&coll_digest) {
+            // Skip - collection already exists and force=false
+            return Ok(());
+        }
+
+        // Write collection to disk if cache_to_disk is enabled (before moving sequences)
+        if self.cache_to_disk && self.local_path.is_some() {
+            self.write_collection_to_disk_single(&collection)?;
+        }
+
         // Register the collection
         self.collections.insert(coll_digest, collection.clone());
 
         // Add all sequences in the collection to the store
         for sequence_record in collection.sequences {
-            self.add_sequence(sequence_record, coll_digest)?;
+            self.add_sequence(sequence_record, coll_digest, force)?;
+        }
+
+        // Write index files so store is immediately loadable
+        if self.cache_to_disk && self.local_path.is_some() {
+            self.write_index_files()?;
         }
 
         Ok(())
@@ -249,43 +432,133 @@ impl GlobalRefgetStore {
 
     // Adds SequenceRecord to the store.
     // Should only be used internally, via `add_sequence`, which ensures sequences are added to collections.
-    fn add_sequence_record(&mut self, sr: SequenceRecord) -> Result<()> {
+    // If the store is disk-backed (cache_to_disk=true), Full records are written to disk and replaced with Stubs.
+    fn add_sequence_record(&mut self, sr: SequenceRecord, force: bool) -> Result<()> {
+        let metadata = sr.metadata();
+        let key = metadata.sha512t24u.to_key();
+
+        // Check if sequence already exists
+        if !force && self.sequence_store.contains_key(&key) {
+            // Skip - sequence already exists and force=false
+            return Ok(());
+        }
+
         self.md5_lookup
-            .insert(sr.metadata.md5.to_key(), sr.metadata.sha512t24u.to_key());
-        self.sequence_store
-            .insert(sr.metadata.sha512t24u.to_key(), sr);
+            .insert(metadata.md5.to_key(), metadata.sha512t24u.to_key());
+
+        // Check if we should write Full records to disk
+        if self.cache_to_disk && self.local_path.is_some() {
+            match &sr {
+                SequenceRecord::Full { metadata, sequence } => {
+                    // Write to disk
+                    self.write_sequence_to_disk_single(metadata, sequence)?;
+                    // Store as stub instead
+                    let stub = SequenceRecord::Stub(metadata.clone());
+                    self.sequence_store.insert(key, stub);
+                    return Ok(());
+                }
+                SequenceRecord::Stub(_) => {
+                    // Already a stub, just add it normally below
+                }
+            }
+        }
+
+        // Add as-is (either memory-only mode, or already a Stub)
+        self.sequence_store.insert(key, sr);
         Ok(())
     }
 
-    // Loading the sequence data requires 2 passes through the FASTA file, because we
-    // have to guess the alphabet before we can encode. So, the first pass is the digesting
-    // function, which digests and guesses the alphabet, and calculates length, to produce the
-    // SequenceDigest object. Then, using this object, we can go through the sequence a second time and encode it.
-    pub fn import_fasta<P: AsRef<Path>>(&mut self, file_path: P) -> Result<()> {
+    /// Add a sequence collection from a FASTA file.
+    ///
+    /// Skips sequences and collections that already exist in the store.
+    /// Use `add_sequence_collection_from_fasta_force()` to overwrite existing data.
+    ///
+    /// # Arguments
+    /// * `file_path` - Path to the FASTA file
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    ///
+    /// # Notes
+    /// Loading sequence data requires 2 passes through the FASTA file:
+    /// 1. First pass digests and guesses the alphabet to produce SequenceMetadata
+    /// 2. Second pass encodes the sequences based on the detected alphabet
+    pub fn add_sequence_collection_from_fasta<P: AsRef<Path>>(&mut self, file_path: P) -> Result<()> {
+        self.add_sequence_collection_from_fasta_internal(file_path, false)
+    }
+
+    /// Add a sequence collection from a FASTA file, overwriting existing data.
+    ///
+    /// Forces overwrite of collections and sequences that already exist in the store.
+    /// Use `add_sequence_collection_from_fasta()` to skip duplicates (safer default).
+    ///
+    /// # Arguments
+    /// * `file_path` - Path to the FASTA file
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    pub fn add_sequence_collection_from_fasta_force<P: AsRef<Path>>(&mut self, file_path: P) -> Result<()> {
+        self.add_sequence_collection_from_fasta_internal(file_path, true)
+    }
+
+    /// Internal implementation for adding a sequence collection from FASTA.
+    fn add_sequence_collection_from_fasta_internal<P: AsRef<Path>>(&mut self, file_path: P, force: bool) -> Result<()> {
         println!("Loading farg index...");
         let seqcol = SequenceCollection::from_fasta(&file_path)?;
 
+        // Check if collection already exists and skip if not forcing
+        if !force && self.collections.contains_key(&seqcol.digest.to_key()) {
+            println!("Collection already exists, skipping (use force=true to overwrite)");
+            return Ok(());
+        }
+
         // Register the collection
-        self.add_sequence_collection(seqcol.clone())?;
+        self.add_sequence_collection_internal(seqcol.clone(), force)?;
 
         // Local hashmap to store SequenceMetadata (digests)
         let mut seqmeta_hashmap: HashMap<String, SequenceMetadata> = HashMap::new();
         let seqcol_sequences = seqcol.sequences.clone(); // Clone to avoid partial move
         for record in seqcol_sequences {
-            let seqmeta = record.metadata;
+            let seqmeta = record.metadata().clone();
             seqmeta_hashmap.insert(seqmeta.name.clone(), seqmeta);
         }
 
         let file_reader = get_dynamic_reader(file_path.as_ref())?;
         let mut fasta_reader = Reader::new(file_reader);
 
-        println!("Preparing to load sequences into refget SeqColStore...");
+        println!("Loading sequences into GlobalRefgetStore...");
+        let start_time = Instant::now();
 
+        let mut seq_count = 0;
         while let Some(record) = fasta_reader.next() {
             let record = record?;
-            let id = record.id()?;
-            let dr = seqmeta_hashmap[id].clone();
-            println!("Digest result: {:?}", dr);
+            let id = std::str::from_utf8(record.head())?;
+            let dr = seqmeta_hashmap.get(id)
+                .ok_or_else(|| {
+                    let available_keys: Vec<_> = seqmeta_hashmap.keys().collect();
+                    let total = available_keys.len();
+                    let sample: Vec<_> = available_keys.iter().take(3).collect();
+                    anyhow::anyhow!(
+                        "Sequence '{}' not found in metadata. Available ({} total): {:?}{}",
+                        id,
+                        total,
+                        sample,
+                        if total > 3 { " ..." } else { "" }
+                    )
+                })?
+                .clone();
+
+            seq_count += 1;
+            if seq_count <= 3 {
+                let display_name = if dr.name.len() > 120 {
+                    format!("{}...", &dr.name[..117])
+                } else {
+                    dr.name.clone()
+                };
+                println!("  [{}] {} ({} bp)", seq_count, display_name, dr.length);
+            } else if seq_count == 4 {
+                println!("  ...");
+            }
 
             match self.mode {
                 StorageMode::Raw => {
@@ -294,17 +567,15 @@ impl GlobalRefgetStore {
                     for seq_line in record.seq_lines() {
                         raw_sequence.extend(seq_line);
                     }
-                    println!(
-                        "Storing raw sequence. Name: {}; Alphabet: {}; Digest: {}",
-                        id, dr.alphabet, dr.sha512t24u
-                    );
 
+                    // Always replace Stubs with Full sequences from FASTA
                     self.add_sequence(
-                        SequenceRecord {
+                        SequenceRecord::Full {
                             metadata: dr,
-                            data: Some(raw_sequence),
+                            sequence: raw_sequence,
                         },
                         seqcol.digest.to_key(),
+                        true,  // Always replace Stubs with Full
                     )?;
                 }
                 StorageMode::Encoded => {
@@ -316,57 +587,196 @@ impl GlobalRefgetStore {
                     // let encoded_sequence = BitVec::<u8, Msb0>::from_vec(encoder.finalize());
                     let encoded_sequence = encoder.finalize();
 
-                    println!(
-                        "Storing encoded sequence. Name: {}; Alphabet: {}; Digest: {}",
-                        id, dr.alphabet, dr.sha512t24u
-                    );
+                    // Always replace Stubs with Full sequences from FASTA
                     self.add_sequence(
-                        SequenceRecord {
+                        SequenceRecord::Full {
                             metadata: dr,
-                            data: Some(encoded_sequence),
+                            sequence: encoded_sequence,
                         },
                         seqcol.digest.to_key(),
+                        true,  // Always replace Stubs with Full
                     )?;
                 }
             }
         }
 
-        println!("Finished loading sequences into refget SeqColStore.");
+        let elapsed = start_time.elapsed();
+        let mode_str = match self.mode {
+            StorageMode::Raw => "Raw",
+            StorageMode::Encoded => "Encoded",
+        };
+        println!("Loaded {} sequences into GlobalRefgetStore ({}) in {:.2}s.", seq_count, mode_str, elapsed.as_secs_f64());
+
+        // Note: If cache_to_disk=true, sequences were already written to disk
+        // and replaced with stubs by add_sequence_record()
+
         Ok(())
     }
 
-    /// Returns a list of all sequence names in the store
-    pub fn list_sequence_digests(&self) -> Vec<[u8; 32]> {
-        self.sequence_store.keys().cloned().collect()
+    /// Returns an iterator over all sequence digests in the store
+    pub fn sequence_digests(&self) -> impl Iterator<Item = [u8; 32]> + '_ {
+        self.sequence_store.keys().cloned()
+    }
+
+    /// Returns an iterator over sequence metadata for all sequences in the store.
+    ///
+    /// This is a lightweight operation that returns only metadata (name, length, digests)
+    /// without loading sequence data.
+    ///
+    /// # Returns
+    /// An iterator over `SequenceMetadata` references.
+    ///
+    /// # Example
+    /// ```ignore
+    /// for metadata in store.sequence_metadata() {
+    ///     println!("{}: {} bp", metadata.name, metadata.length);
+    /// }
+    /// ```
+    pub fn sequence_metadata(&self) -> impl Iterator<Item = &SequenceMetadata> + '_ {
+        self.sequence_store.values().map(|rec| rec.metadata())
+    }
+
+    /// Calculate the total disk size of all sequences in the store
+    ///
+    /// This computes the disk space used by sequence data based on:
+    /// - Sequence length
+    /// - Alphabet type (bits per symbol)
+    /// - Storage mode (Raw or Encoded)
+    ///
+    /// # Returns
+    /// Total bytes used for sequence data on disk
+    ///
+    /// # Note
+    /// This only accounts for sequence data files (.seq), not metadata files
+    /// like FARG files, index.json, or directory overhead.
+    ///
+    /// # Examples
+    /// ```ignore
+    /// let store = GlobalRefgetStore::on_disk("store");
+    /// store.add_sequence_collection_from_fasta("genome.fa")?;
+    /// let disk_size = store.total_disk_size();
+    /// println!("Sequences use {} bytes on disk", disk_size);
+    /// ```
+    pub fn total_disk_size(&self) -> usize {
+        self.sequence_store
+            .values()
+            .map(|rec| rec.metadata().disk_size(&self.mode))
+            .sum()
+    }
+
+    /// Returns an iterator over all complete sequence records in the store.
+    ///
+    /// This returns full `SequenceRecord` objects including both metadata and sequence data.
+    /// Use `sequence_metadata()` if you only need metadata.
+    ///
+    /// # Returns
+    /// An iterator over `SequenceRecord` references.
+    ///
+    /// # Example
+    /// ```ignore
+    /// for record in store.sequence_records() {
+    ///     println!("{}: has_data={}", record.metadata().name, record.has_data());
+    /// }
+    /// ```
+    pub fn sequence_records(&self) -> impl Iterator<Item = &SequenceRecord> + '_ {
+        self.sequence_store.values()
+    }
+
+    /// Returns an iterator over all collection digests
+    pub fn collection_digests(&self) -> impl Iterator<Item = [u8; 32]> + '_ {
+        self.collections.keys().cloned()
+    }
+
+    /// Returns an iterator over all collections
+    pub fn collections(&self) -> impl Iterator<Item = &SequenceCollection> + '_ {
+        self.collections.values()
+    }
+
+    /// Returns the local path where the store is located (if any)
+    pub fn local_path(&self) -> Option<&PathBuf> {
+        self.local_path.as_ref()
+    }
+
+    /// Returns the remote source URL (if any)
+    pub fn remote_source(&self) -> Option<&str> {
+        self.remote_source.as_deref()
+    }
+
+    /// Returns the storage mode used by this store
+    pub fn storage_mode(&self) -> StorageMode {
+        self.mode
     }
 
     /// Retrieve a SequenceRecord from the store by its SHA512t24u digest
-    pub fn get_sequence_by_id<K: AsRef<[u8]>>(&self, seq_digest: K) -> Option<&SequenceRecord> {
-        self.sequence_store.get(&seq_digest.to_key())
+    pub fn get_sequence_by_id<K: AsRef<[u8]>>(&mut self, seq_digest: K) -> Option<&SequenceRecord> {
+        let digest_key = seq_digest.to_key();
+
+        // Ensure the sequence data is loaded
+        if let Err(e) = self.ensure_sequence_loaded(&digest_key) {
+            eprintln!("Failed to load sequence: {}", e);
+            return None;
+        }
+
+        self.sequence_store.get(&digest_key)
     }
 
     /// Retrieve a SequenceRecord from the store by its collection digest and name
     pub fn get_sequence_by_collection_and_name<K: AsRef<[u8]>>(
-        &self,
+        &mut self,
         collection_digest: K,
         sequence_name: &str,
     ) -> Option<&SequenceRecord> {
+        let collection_key = collection_digest.to_key();
+
+        // Try to ensure collection is loaded (lazy-load from remote if needed)
+        if let Err(e) = self.ensure_collection_loaded(&collection_key) {
+            eprintln!("Failed to load collection: {}", e);
+            return None;
+        }
+
         // Look up the collection by digest
-        if let Some(name_map) = self.name_lookup.get(&collection_digest.to_key()) {
+        let digest_key = if let Some(name_map) = self.name_lookup.get(&collection_key) {
             // Look up the sequence name in the collection's name map
-            if let Some(sha512t24u) = name_map.get(sequence_name) {
-                // Retrieve the sequence record from the store
-                return self.sequence_store.get(sha512t24u);
-            }
+            name_map.get(sequence_name).cloned()?
+        } else {
+            return None;
+        };
+
+        // Ensure the sequence data is loaded
+        if let Err(e) = self.ensure_sequence_loaded(&digest_key) {
+            eprintln!("Failed to load sequence: {}", e);
+            return None;
         }
-        None
+
+        // Retrieve the sequence record from the store
+        self.sequence_store.get(&digest_key)
     }
 
-    pub fn get_seqs_in_bed_file_iter<'a, K: AsRef<[u8]>>(
-        &'a self,
+    /// Get an iterator over substrings defined by BED file regions.
+    ///
+    /// Reads a BED file line-by-line and yields substrings for each region.
+    /// This is memory-efficient for large BED files as it streams results.
+    ///
+    /// # Arguments
+    /// * `collection_digest` - The collection digest containing the sequences
+    /// * `bed_file_path` - Path to the BED file defining regions
+    ///
+    /// # Returns
+    /// Iterator yielding `Result<RetrievedSequence>` for each BED region
+    ///
+    /// # Example
+    /// ```ignore
+    /// let iter = store.substrings_from_regions(digest, "regions.bed")?;
+    /// for result in iter {
+    ///     let seq = result?;
+    ///     println!("{}:{}-{}: {}", seq.chrom_name, seq.start, seq.end, seq.sequence);
+    /// }
+    /// ```
+    pub fn substrings_from_regions<'a, K: AsRef<[u8]>>(
+        &'a mut self,
         collection_digest: K,
         bed_file_path: &str,
-    ) -> Result<GetSeqsBedFileIter<'a, K>, Box<dyn std::error::Error>> {
+    ) -> Result<SubstringsFromRegions<'a, K>, Box<dyn std::error::Error>> {
         let path = Path::new(bed_file_path);
         let file_info = get_file_info(path);
         let is_gzipped = file_info.is_gzipped;
@@ -379,7 +789,7 @@ impl GlobalRefgetStore {
         };
         let reader = BufReader::new(reader);
 
-        Ok(GetSeqsBedFileIter {
+        Ok(SubstringsFromRegions {
             store: self,
             reader,
             collection_digest,
@@ -389,31 +799,77 @@ impl GlobalRefgetStore {
         })
     }
 
-    /// Given a Sequence Collection digest and a bed file path
-    /// retrieve sequences and write to a file
-    pub fn get_seqs_bed_file<K: AsRef<[u8]>>(
-        &self,
+    /// Export sequences from BED file regions to a FASTA file.
+    ///
+    /// Reads a BED file defining genomic regions and exports the sequences
+    /// for those regions to a FASTA file. This is useful for extracting
+    /// specific regions of interest from a genome.
+    ///
+    /// # Arguments
+    /// * `collection_digest` - The collection digest containing the sequences
+    /// * `bed_file_path` - Path to the BED file defining regions
+    /// * `output_file_path` - Path to write the output FASTA file
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    ///
+    /// # Example
+    /// ```ignore
+    /// store.export_fasta_from_regions(
+    ///     digest,
+    ///     "regions.bed",
+    ///     "output.fa"
+    /// )?;
+    /// ```
+    pub fn export_fasta_from_regions<K: AsRef<[u8]>>(
+        &mut self,
         collection_digest: K,
         bed_file_path: &str,
         output_file_path: &str,
     ) -> Result<(), Box<dyn std::error::Error>> {
         // Set up the output path and create directories if they don't exist
-        let output_path = Path::new(output_file_path).parent().ok_or_else(|| {
-            io::Error::new(
-                io::ErrorKind::InvalidInput,
-                "Invalid output file path: parent directory not found",
-            )
-        })?;
+        let output_path_obj = Path::new(output_file_path);
+        if let Some(parent) = output_path_obj.parent() {
+            create_dir_all(parent)?;
+        }
 
-        create_dir_all(output_path)?;
+        // Create output file with optional gzip compression
+        let file = File::create(output_file_path)?;
 
-        // Open file for writing to
-        let mut output_file = OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(output_file_path)?;
+        let mut writer: Box<dyn Write> = if output_path_obj.extension() == Some(OsStr::new("gz")) {
+            Box::new(GzEncoder::new(file, Compression::default()))
+        } else {
+            Box::new(file)
+        };
+
+        // Pre-fetch all sequence metadata from the collection to avoid borrowing issues
+        let collection_key = collection_digest.as_ref().to_key();
+        let name_to_metadata: HashMap<String, (String, usize, AlphabetType, String, String)> = self
+            .name_lookup
+            .get(&collection_key)
+            .map(|name_map| {
+                name_map
+                    .iter()
+                    .filter_map(|(name, seq_digest)| {
+                        self.sequence_store.get(seq_digest).map(|record| {
+                            let metadata = record.metadata();
+                            (
+                                name.clone(),
+                                (
+                                    metadata.name.clone(),
+                                    metadata.length,
+                                    metadata.alphabet,
+                                    metadata.sha512t24u.clone(),
+                                    metadata.md5.clone(),
+                                ),
+                            )
+                        })
+                    })
+                    .collect()
+            })
+            .unwrap_or_default();
 
-        let seq_iter = self.get_seqs_in_bed_file_iter(&collection_digest, bed_file_path)?;
+        let seq_iter = self.substrings_from_regions(&collection_digest, bed_file_path)?;
 
         let mut previous_parsed_chr = String::new();
         let mut current_header: String = String::new();
@@ -429,18 +885,15 @@ impl GlobalRefgetStore {
             if previous_parsed_chr != rs.chrom_name {
                 previous_parsed_chr = rs.chrom_name.clone();
 
-                // If we get this far, the metadata **must** exist, so we will unwrap the present
-                let seq_info = self
-                    .get_sequence_by_collection_and_name(&collection_digest, &rs.chrom_name)
-                    .unwrap();
-                current_header = format!(
-                    ">{} {} {} {} {}",
-                    seq_info.metadata.name,
-                    seq_info.metadata.length,
-                    seq_info.metadata.alphabet,
-                    seq_info.metadata.sha512t24u,
-                    seq_info.metadata.md5
-                );
+                // Look up metadata from our pre-fetched map
+                if let Some((name, length, alphabet, sha512, md5)) =
+                    name_to_metadata.get(&rs.chrom_name)
+                {
+                    current_header = format!(
+                        ">{} {} {} {} {}",
+                        name, length, alphabet, sha512, md5
+                    );
+                }
             }
 
             let retrieved_substring = rs.sequence;
@@ -452,47 +905,31 @@ impl GlobalRefgetStore {
 
                 // Combine the prefix, current_header, and a trailing newline
                 let header_to_be_written = format!("{}{}\n", prefix, current_header);
-                output_file.write_all(header_to_be_written.as_bytes())?;
+                writer.write_all(header_to_be_written.as_bytes())?;
             }
 
-            output_file.write_all(retrieved_substring.as_ref())?;
+            writer.write_all(retrieved_substring.as_ref())?;
         }
 
-        Ok(())
-    }
+        // Ensure all data is flushed (important for gzip)
+        writer.flush()?;
 
-    /// Given a Sequence Collection digest and a bed file path,
-    /// retrieve sequences and return them as a Vec<RetrievedSequence>.
-    /// Lines that cannot be parsed or sequences that cannot be retrieved will be skipped
-    pub fn get_seqs_bed_file_to_vec<K: AsRef<[u8]>>(
-        &self,
-        collection_digest: K,
-        bed_file_path: &str,
-    ) -> Result<Vec<RetrievedSequence>, Box<dyn std::error::Error>> {
-        let seq_iter = self.get_seqs_in_bed_file_iter(collection_digest, bed_file_path)?;
-
-        let retrieved_sequences = seq_iter
-            .into_iter()
-            .filter_map(|rs| match rs {
-                Ok(rs) => Some(rs),
-                Err(err) => {
-                    eprintln!("{err}");
-                    None
-                }
-            })
-            .collect::<Vec<RetrievedSequence>>();
-
-        Ok(retrieved_sequences)
+        Ok(())
     }
 
     /// Retrieve a SequenceRecord from the store by its MD5 digest
-    pub fn get_sequence_by_md5<K: AsRef<[u8]>>(&self, seq_md5: K) -> Option<&SequenceRecord> {
+    pub fn get_sequence_by_md5<K: AsRef<[u8]>>(&mut self, seq_md5: K) -> Option<&SequenceRecord> {
         // Look up the SHA512t24u digest using the MD5 lookup
-        if let Some(sha512_digest) = self.md5_lookup.get(&seq_md5.to_key()) {
-            // Retrieve the sequence record from the store
-            return self.sequence_store.get(sha512_digest);
+        let sha512_digest = self.md5_lookup.get(&seq_md5.to_key()).cloned()?;
+
+        // Ensure the sequence data is loaded
+        if let Err(e) = self.ensure_sequence_loaded(&sha512_digest) {
+            eprintln!("Failed to load sequence: {}", e);
+            return None;
         }
-        None
+
+        // Retrieve the sequence record from the store
+        self.sequence_store.get(&sha512_digest)
     }
 
     /// Retrieves a substring from an encoded sequence by its SHA512t24u digest.
@@ -507,25 +944,36 @@ impl GlobalRefgetStore {
     ///
     /// The substring if the sequence is found, or None if not found
     pub fn get_substring<K: AsRef<[u8]>>(
-        &self,
+        &mut self,
         sha512_digest: K,
         start: usize,
         end: usize,
     ) -> Option<String> {
-        let record = self.sequence_store.get(&sha512_digest.to_key())?;
-        let sequence = record.data.as_ref()?;
+        let digest_key = sha512_digest.to_key();
+
+        // Ensure the sequence data is loaded
+        if let Err(e) = self.ensure_sequence_loaded(&digest_key) {
+            eprintln!("Failed to load sequence: {}", e);
+            return None;
+        }
 
-        if start >= record.metadata.length || end > record.metadata.length || start >= end {
+        let record = self.sequence_store.get(&digest_key)?;
+        let (metadata, sequence) = match record {
+            SequenceRecord::Stub(_) => return None,
+            SequenceRecord::Full { metadata, sequence } => (metadata, sequence),
+        };
+
+        if start >= metadata.length || end > metadata.length || start >= end {
             println!(
                 "Invalid substring range: start={}, end={}, sequence length={}",
-                start, end, record.metadata.length
+                start, end, metadata.length
             );
             return None;
         }
 
         match self.mode {
             StorageMode::Encoded => {
-                let alphabet = lookup_alphabet(&record.metadata.alphabet);
+                let alphabet = lookup_alphabet(&metadata.alphabet);
                 let decoded_sequence = decode_substring_from_bytes(sequence, start, end, alphabet);
                 Some(String::from_utf8(decoded_sequence).expect("Invalid UTF-8"))
             }
@@ -543,6 +991,196 @@ impl GlobalRefgetStore {
         }
     }
 
+    /// Export sequences from a collection to a FASTA file
+    ///
+    /// # Arguments
+    /// * `collection_digest` - The digest of the collection to export from
+    /// * `output_path` - Path to write the FASTA file
+    /// * `sequence_names` - Optional list of sequence names to export.
+    ///                      If None, exports all sequences in the collection.
+    /// * `line_width` - Optional line width for wrapping sequences (default: 80)
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    pub fn export_fasta<K: AsRef<[u8]>, P: AsRef<Path>>(
+        &mut self,
+        collection_digest: K,
+        output_path: P,
+        sequence_names: Option<Vec<&str>>,
+        line_width: Option<usize>,
+    ) -> Result<()> {
+        let line_width = line_width.unwrap_or(80);
+        let output_path = output_path.as_ref();
+        let collection_key = collection_digest.as_ref().to_key();
+
+        // Get the name map for this collection and build a map of name -> digest
+        let name_to_digest: HashMap<String, [u8; 32]> = self
+            .name_lookup
+            .get(&collection_key)
+            .ok_or_else(|| {
+                anyhow!("Collection not found: {:?}", String::from_utf8_lossy(collection_digest.as_ref()))
+            })?
+            .clone();
+
+        // Determine which sequences to export
+        let names_to_export: Vec<String> = if let Some(names) = sequence_names {
+            // Filter to only requested names
+            names.iter().map(|s| s.to_string()).collect()
+        } else {
+            // Export all sequences in the collection
+            name_to_digest.keys().cloned().collect()
+        };
+
+        // Create output file with optional gzip compression
+        let file = File::create(output_path)
+            .context(format!("Failed to create output file: {}", output_path.display()))?;
+
+        let mut writer: Box<dyn Write> = if output_path.extension() == Some(OsStr::new("gz")) {
+            Box::new(GzEncoder::new(file, Compression::default()))
+        } else {
+            Box::new(file)
+        };
+
+        // Export each sequence
+        for seq_name in names_to_export {
+            // Get the sequence digest from the name map
+            let seq_digest = name_to_digest.get(&seq_name).ok_or_else(|| {
+                anyhow!("Sequence '{}' not found in collection", seq_name)
+            })?;
+
+            // Ensure sequence is loaded
+            self.ensure_sequence_loaded(seq_digest)?;
+
+            // Get the sequence record
+            let record = self.sequence_store.get(seq_digest).ok_or_else(|| {
+                anyhow!("Sequence record not found for digest: {:?}", seq_digest)
+            })?;
+
+            // Get the sequence data
+            let (metadata, sequence_data) = match record {
+                SequenceRecord::Stub(_) => {
+                    return Err(anyhow!("Sequence data not loaded for '{}'", seq_name));
+                }
+                SequenceRecord::Full { metadata, sequence } => (metadata, sequence),
+            };
+
+            // Decode the sequence based on storage mode
+            let decoded_sequence = match self.mode {
+                StorageMode::Encoded => {
+                    let alphabet = lookup_alphabet(&metadata.alphabet);
+                    let decoded = decode_substring_from_bytes(
+                        sequence_data,
+                        0,
+                        metadata.length,
+                        alphabet,
+                    );
+                    String::from_utf8(decoded).context("Failed to decode sequence as UTF-8")?
+                }
+                StorageMode::Raw => {
+                    String::from_utf8(sequence_data.clone())
+                        .context("Failed to decode raw sequence as UTF-8")?
+                }
+            };
+
+            // Write FASTA header
+            writeln!(writer, ">{}", seq_name)?;
+
+            // Write sequence with line wrapping
+            for chunk in decoded_sequence.as_bytes().chunks(line_width) {
+                writer.write_all(chunk)?;
+                writer.write_all(b"\n")?;
+            }
+        }
+
+        // Ensure all data is flushed (important for gzip)
+        writer.flush()?;
+
+        Ok(())
+    }
+
+    /// Export sequences by their sequence digests to a FASTA file
+    ///
+    /// Bypasses collection information and exports sequences directly via sequence digests.
+    /// # Arguments
+    /// * `seq_digests` - List of SHA512t24u sequence digests (not collection digests) to export
+    /// * `output_path` - Path to write the FASTA file
+    /// * `line_width` - Optional line width for wrapping sequences (default: 80)
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    pub fn export_fasta_by_digests<P: AsRef<Path>>(
+        &mut self,
+        seq_digests: Vec<&str>,
+        output_path: P,
+        line_width: Option<usize>,
+    ) -> Result<()> {
+        let line_width = line_width.unwrap_or(80);
+        let output_path = output_path.as_ref();
+
+        // Create output file with optional gzip compression
+        let file = File::create(output_path)
+            .context(format!("Failed to create output file: {}", output_path.display()))?;
+
+        let mut writer: Box<dyn Write> = if output_path.extension() == Some(OsStr::new("gz")) {
+            Box::new(GzEncoder::new(file, Compression::default()))
+        } else {
+            Box::new(file)
+        };
+
+        // Export each sequence
+        for digest_str in seq_digests {
+            let digest_key = digest_str.as_bytes().to_key();
+
+            // Ensure sequence is loaded
+            self.ensure_sequence_loaded(&digest_key)?;
+
+            // Get the sequence record
+            let record = self.sequence_store.get(&digest_key).ok_or_else(|| {
+                anyhow!("Sequence record not found for digest: {}", digest_str)
+            })?;
+
+            // Get the sequence data
+            let (metadata, sequence_data) = match record {
+                SequenceRecord::Stub(_) => {
+                    return Err(anyhow!("Sequence data not loaded for digest: {}", digest_str));
+                }
+                SequenceRecord::Full { metadata, sequence } => (metadata, sequence),
+            };
+
+            // Decode the sequence based on storage mode
+            let decoded_sequence = match self.mode {
+                StorageMode::Encoded => {
+                    let alphabet = lookup_alphabet(&metadata.alphabet);
+                    let decoded = decode_substring_from_bytes(
+                        sequence_data,
+                        0,
+                        metadata.length,
+                        alphabet,
+                    );
+                    String::from_utf8(decoded).context("Failed to decode sequence as UTF-8")?
+                }
+                StorageMode::Raw => {
+                    String::from_utf8(sequence_data.clone())
+                        .context("Failed to decode raw sequence as UTF-8")?
+                }
+            };
+
+            // Write FASTA header with sequence name
+            writeln!(writer, ">{}", metadata.name)?;
+
+            // Write sequence with line wrapping
+            for chunk in decoded_sequence.as_bytes().chunks(line_width) {
+                writer.write_all(chunk)?;
+                writer.write_all(b"\n")?;
+            }
+        }
+
+        // Ensure all data is flushed (important for gzip)
+        writer.flush()?;
+
+        Ok(())
+    }
+
     /// Helper function to get the relative path for a sequence based on its SHA512t24u digest string
     fn get_sequence_path(digest_str: &str, template: &str) -> PathBuf {
         let path_str = template
@@ -552,9 +1190,153 @@ impl GlobalRefgetStore {
         PathBuf::from(path_str)
     }
 
-    /// Load a GlobalRefgetStore from a directory
-    pub fn load_from_directory<P: AsRef<Path>>(root_path: P) -> Result<Self> {
-        let root_path = root_path.as_ref();
+    /// Write a single sequence to disk using the configured path template
+    fn write_sequence_to_disk_single(
+        &self,
+        metadata: &SequenceMetadata,
+        sequence: &[u8],
+    ) -> Result<()> {
+        let template = self.seqdata_path_template.as_ref()
+            .context("seqdata_path_template not set")?;
+        let local_path = self.local_path.as_ref()
+            .context("local_path not set")?;
+
+        // Build path using template
+        let seq_file_path = Self::get_sequence_path(&metadata.sha512t24u, template);
+        let full_path = local_path.join(&seq_file_path);
+
+        // Create parent directory
+        if let Some(parent) = full_path.parent() {
+            create_dir_all(parent)?;
+        }
+
+        // Write sequence data
+        let mut file = File::create(&full_path)?;
+        file.write_all(sequence)?;
+
+        Ok(())
+    }
+
+    /// Write a single collection FARG file to disk
+    /// Used when cache_to_disk=true to persist collections incrementally
+    fn write_collection_to_disk_single(&self, collection: &SequenceCollection) -> Result<()> {
+        let local_path = self.local_path.as_ref()
+            .context("local_path not set")?;
+
+        // Build path: collections/{digest}.farg
+        let coll_file_path = format!("collections/{}.farg", collection.digest);
+        let full_path = local_path.join(&coll_file_path);
+
+        // Create parent directory
+        if let Some(parent) = full_path.parent() {
+            create_dir_all(parent)?;
+        }
+
+        // Write collection FARG file
+        collection.write_collection_farg(&full_path)?;
+
+        Ok(())
+    }
+
+    /// Write index files (sequences.farg and index.json) to disk
+    ///
+    /// This allows the store to be loaded later via load_local().
+    /// Called automatically when adding collections in disk-backed mode.
+    fn write_index_files(&self) -> Result<()> {
+        let local_path = self.local_path.as_ref()
+            .context("local_path not set")?;
+        let template = self.seqdata_path_template.as_ref()
+            .context("seqdata_path_template not set")?;
+
+        // Write the sequence metadata index file
+        let sequence_index_path = local_path.join("sequences.farg");
+        self.write_sequences_farg(&sequence_index_path)?;
+
+        // Create the metadata structure
+        let metadata = StoreMetadata {
+            version: 1,
+            seqdata_path_template: template.clone(),
+            collections_path_template: "collections/%s.farg".to_string(),
+            sequence_index: "sequences.farg".to_string(),
+            mode: self.mode,
+            created_at: Utc::now().to_rfc3339(),
+        };
+
+        // Write metadata to index.json
+        let json = serde_json::to_string_pretty(&metadata)
+            .context("Failed to serialize metadata to JSON")?;
+        fs::write(local_path.join("index.json"), json)
+            .context("Failed to write index.json")?;
+
+        Ok(())
+    }
+
+    /// Helper function to fetch a file from local path or remote source
+    /// Returns the file contents as Vec<u8>
+    fn fetch_file(
+        local_path: &Option<PathBuf>,
+        remote_source: &Option<String>,
+        relative_path: &str,
+        cache_to_disk: bool,
+    ) -> Result<Vec<u8>> {
+        // Check if file exists locally (only if caching is enabled and path exists)
+        if cache_to_disk {
+            if let Some(local_path) = local_path {
+                let full_local_path = local_path.join(relative_path);
+                if full_local_path.exists() {
+                    return fs::read(&full_local_path)
+                        .context(format!("Failed to read local file: {}", full_local_path.display()));
+                }
+            }
+        }
+
+        // If not local and we have a remote source, fetch from remote
+        if let Some(remote_url) = remote_source {
+            let full_remote_url = if remote_url.ends_with('/') {
+                format!("{}{}", remote_url, relative_path)
+            } else {
+                format!("{}/{}", remote_url, relative_path)
+            };
+
+            let response = ureq::get(&full_remote_url)
+                .call()
+                .map_err(|e| anyhow!("Failed to fetch from remote: {}", e))?;
+
+            let mut data = Vec::new();
+            response
+                .into_reader()
+                .read_to_end(&mut data)
+                .context("Failed to read response body")?;
+
+            // Save to local cache only if caching is enabled
+            if cache_to_disk {
+                if let Some(local_path) = local_path {
+                    let full_local_path = local_path.join(relative_path);
+
+                    // Create parent directory if needed
+                    if let Some(parent) = full_local_path.parent() {
+                        create_dir_all(parent)?;
+                    }
+
+                    // Save to disk
+                    fs::write(&full_local_path, &data)
+                        .context(format!("Failed to cache file to: {}", full_local_path.display()))?;
+                }
+            }
+
+            Ok(data)
+        } else {
+            Err(anyhow!(
+                "File not found locally and no remote source configured: {}",
+                relative_path
+            ))
+        }
+    }
+
+    /// Load a local RefgetStore from a directory
+    /// This loads metadata only; sequence data is loaded on-demand
+    pub fn load_local<P: AsRef<Path>>(cache_path: P) -> Result<Self> {
+        let root_path = cache_path.as_ref();
 
         // Read and parse index.json
         let index_path = root_path.join("index.json");
@@ -568,8 +1350,11 @@ impl GlobalRefgetStore {
 
         // Create a new empty store with the correct mode
         let mut store = GlobalRefgetStore::new(metadata.mode);
+        store.local_path = Some(root_path.to_path_buf());
+        store.seqdata_path_template = Some(metadata.seqdata_path_template.clone());
+        store.cache_to_disk = true;  // Local stores always use disk
 
-        // Load sequence metadata from the sequence index file
+        // Load sequence metadata from the sequence index file (metadata only, no data)
         let sequence_index_path = root_path.join(&metadata.sequence_index);
         if sequence_index_path.exists() {
             let file = std::fs::File::open(&sequence_index_path)?;
@@ -595,33 +1380,11 @@ impl GlobalRefgetStore {
                     alphabet: parts[2].parse().unwrap_or(AlphabetType::Unknown),
                     sha512t24u: parts[3].to_string(),
                     md5: parts[4].to_string(),
+                    fai: None,  // Lazy loading doesn't preserve FAI data
                 };
 
-                // Generate the path using the template
-                let path_str = metadata
-                    .seqdata_path_template
-                    .replace("%s2", &seq_metadata.sha512t24u[0..2])
-                    .replace("%s4", &seq_metadata.sha512t24u[0..4])
-                    .replace("%s", &seq_metadata.sha512t24u);
-                let seq_path = root_path.join(path_str);
-
-                // Open and memory-map the sequence file
-                let file = File::open(&seq_path).context(format!(
-                    "Failed to open sequence file: {}",
-                    seq_path.display()
-                ))?;
-
-                let mmap =
-                    unsafe { Mmap::map(&file) }.context("Failed to memory-map sequence file")?;
-
-                // Convert to Vec<u8> for storage
-                let data = mmap.to_vec();
-
-                // Create a SequenceRecord
-                let record = SequenceRecord {
-                    metadata: seq_metadata.clone(),
-                    data: Some(data),
-                };
+                // Create a SequenceRecord with no data (lazy loading)
+                let record = SequenceRecord::Stub(seq_metadata.clone());
 
                 // Add to store
                 let sha512_key = seq_metadata.sha512t24u.to_key();
@@ -653,8 +1416,127 @@ impl GlobalRefgetStore {
                     // Build name lookup for this collection
                     let mut name_map = HashMap::new();
                     for sequence_record in &collection.sequences {
-                        let sha512_key = sequence_record.metadata.sha512t24u.to_key();
-                        name_map.insert(sequence_record.metadata.name.clone(), sha512_key);
+                        let metadata = sequence_record.metadata();
+                        let sha512_key = metadata.sha512t24u.to_key();
+                        name_map.insert(metadata.name.clone(), sha512_key);
+                    }
+                    store.name_lookup.insert(collection_digest, name_map);
+                }
+            }
+        }
+
+        Ok(store)
+    }
+
+    /// Load a remote-backed RefgetStore
+    /// This loads metadata from remote and caches sequence data on-demand
+    ///
+    /// # Arguments
+    /// * `cache_path` - Local directory for caching (used even if cache_to_disk is false for temp operations)
+    /// * `remote_url` - Remote URL to fetch data from
+    /// * `cache_to_disk` - If true, cache fetched data to disk; if false, keep only in memory
+    pub fn load_remote<P: AsRef<Path>, S: AsRef<str>>(
+        cache_path: P,
+        remote_url: S,
+        cache_to_disk: bool,
+    ) -> Result<Self> {
+        let cache_path = cache_path.as_ref();
+        let remote_url = remote_url.as_ref().to_string();
+
+        // Create cache directory (needed for metadata even in memory-only mode)
+        create_dir_all(cache_path)?;
+
+        // Fetch index.json from remote (always cache metadata - it's small)
+        let index_data = Self::fetch_file(&Some(cache_path.to_path_buf()), &Some(remote_url.clone()), "index.json", true)?;
+        let json = String::from_utf8(index_data)
+            .context("index.json contains invalid UTF-8")?;
+
+        let metadata: StoreMetadata =
+            serde_json::from_str(&json).context("Failed to parse index.json")?;
+
+        // Create a new empty store with the correct mode
+        let mut store = GlobalRefgetStore::new(metadata.mode);
+        store.local_path = Some(cache_path.to_path_buf());
+        store.remote_source = Some(remote_url.clone());
+        store.seqdata_path_template = Some(metadata.seqdata_path_template.clone());
+        store.cache_to_disk = cache_to_disk;
+
+        // Fetch sequence index from remote (always cache metadata - it's small)
+        let sequence_index_data = Self::fetch_file(
+            &Some(cache_path.to_path_buf()),
+            &Some(remote_url.clone()),
+            &metadata.sequence_index,
+            true,  // Always cache metadata
+        )?;
+        let sequence_index_str = String::from_utf8(sequence_index_data)
+            .context("sequence index contains invalid UTF-8")?;
+
+        // Parse sequence metadata (metadata only, no data)
+        for line in sequence_index_str.lines() {
+            // Skip comment lines
+            if line.starts_with('#') {
+                continue;
+            }
+
+            // Parse sequence metadata lines
+            let parts: Vec<&str> = line.split('\t').collect();
+            if parts.len() != 5 {
+                continue;
+            }
+
+            let seq_metadata = SequenceMetadata {
+                name: parts[0].to_string(),
+                length: parts[1].parse().unwrap_or(0),
+                alphabet: parts[2].parse().unwrap_or(AlphabetType::Unknown),
+                sha512t24u: parts[3].to_string(),
+                md5: parts[4].to_string(),
+                fai: None,  // Lazy loading doesn't preserve FAI data
+            };
+
+            // Create a SequenceRecord with no data (lazy loading)
+            let record = SequenceRecord::Stub(seq_metadata.clone());
+
+            // Add to store
+            let sha512_key = seq_metadata.sha512t24u.to_key();
+            store.sequence_store.insert(sha512_key, record);
+
+            // Add to MD5 lookup
+            let md5_key = seq_metadata.md5.to_key();
+            store.md5_lookup.insert(md5_key, sha512_key);
+        }
+
+        // Load collections from the collections directory
+        let collections_dir_path = "collections";
+        let local_collections_dir = cache_path.join(collections_dir_path);
+
+        // Try to read from local cache first, or create if doesn't exist
+        if !local_collections_dir.exists() {
+            create_dir_all(&local_collections_dir)?;
+        }
+
+        // Note: We'll try to fetch collection files on-demand when needed
+        // For now, just check if any exist locally
+        if local_collections_dir.exists() {
+            for entry in fs::read_dir(&local_collections_dir)? {
+                let entry = entry?;
+                let path = entry.path();
+
+                if path.is_file() && path.extension() == Some(std::ffi::OsStr::new("farg")) {
+                    // Load the collection from the .farg file
+                    let collection = read_fasta_refget_file(&path)?;
+                    let collection_digest = collection.digest.to_key();
+
+                    // Add collection to store
+                    store
+                        .collections
+                        .insert(collection_digest, collection.clone());
+
+                    // Build name lookup for this collection
+                    let mut name_map = HashMap::new();
+                    for sequence_record in &collection.sequences {
+                        let metadata = sequence_record.metadata();
+                        let sha512_key = metadata.sha512t24u.to_key();
+                        name_map.insert(metadata.name.clone(), sha512_key);
                     }
                     store.name_lookup.insert(collection_digest, name_map);
                 }
@@ -664,20 +1546,124 @@ impl GlobalRefgetStore {
         Ok(store)
     }
 
-    /// Write the sequence_store component to a FARG file (without collection headers).
-    /// * `file_path` - The path to the FARG file to be written.
-    pub fn to_farg<P: AsRef<Path>>(&self, file_path: P) -> Result<()> {
-        // Write the FARGI file
+    /// Ensure a collection is loaded into the store
+    /// If the collection is not already loaded, try to fetch it from local or remote
+    fn ensure_collection_loaded(&mut self, collection_digest: &[u8; 32]) -> Result<()> {
+        // Check if collection is already loaded
+        if self.name_lookup.contains_key(collection_digest) {
+            return Ok(());
+        }
+
+        // Collection not found - try to fetch it if we have a remote source
+        // Convert the collection digest to a string for the path
+        let digest_str = String::from_utf8_lossy(collection_digest);
+
+        // Build the collection file path using the template
+        // Default template is "collections/%s.farg"
+        let relative_path = format!("collections/{}.farg", digest_str);
+
+        // Try to fetch the collection file
+        // Always cache metadata files (they're small), even when cache_to_disk is false
+        // The memory-only benefit comes from not caching large sequence data files
+        let _collection_data = Self::fetch_file(&self.local_path, &self.remote_source, &relative_path, true)?;
+
+        // Read the collection from the cached file
+        let collection_file_path = self.local_path
+            .as_ref()
+            .ok_or_else(|| anyhow!("No local path configured"))?
+            .join(&relative_path);
+
+        let collection = read_fasta_refget_file(&collection_file_path)?;
+
+        // Verify the collection digest matches what we requested
+        let loaded_digest = collection.digest.to_key();
+        if loaded_digest != *collection_digest {
+            eprintln!(
+                "Warning: Collection digest mismatch. Expected {:?}, got {:?}",
+                String::from_utf8_lossy(collection_digest),
+                String::from_utf8_lossy(&loaded_digest)
+            );
+        }
+
+        // Add collection to store
+        self.collections.insert(*collection_digest, collection.clone());
+
+        // Build name lookup for this collection
+        let mut name_map = HashMap::new();
+        for sequence_record in &collection.sequences {
+            let metadata = sequence_record.metadata();
+            let sha512_key = metadata.sha512t24u.to_key();
+            name_map.insert(metadata.name.clone(), sha512_key);
+        }
+        self.name_lookup.insert(*collection_digest, name_map);
+
+        Ok(())
+    }
+
+    /// Ensure a sequence is loaded into memory
+    /// If the sequence data is not already loaded, fetch it from local or remote
+    fn ensure_sequence_loaded(&mut self, digest: &[u8; 32]) -> Result<()> {
+        // Check if sequence exists
+        let record = self
+            .sequence_store
+            .get(digest)
+            .ok_or_else(|| anyhow!("Sequence not found in store"))?;
+
+        // If data is already loaded, return early
+        if matches!(record, SequenceRecord::Full { .. }) {
+            return Ok(());
+        }
+
+        // Get the necessary information before borrowing mutably
+        let digest_str = &record.metadata().sha512t24u;
+        let template = self
+            .seqdata_path_template
+            .as_ref()
+            .ok_or_else(|| anyhow!("No sequence data path template configured"))?;
+
+        // Build the relative path using the template
+        let relative_path = template
+            .replace("%s2", &digest_str[0..2])
+            .replace("%s4", &digest_str[0..4])
+            .replace("%s", digest_str);
+
+        // Fetch the sequence data
+        // Use cache_to_disk flag - this is where memory-only mode saves disk I/O
+        let data = Self::fetch_file(&self.local_path, &self.remote_source, &relative_path, self.cache_to_disk)?;
+
+        // Update the record with the loaded data
+        self.sequence_store.entry(*digest).and_modify(|r| {
+            *r = r.clone().with_data(data);
+        });
+
+        Ok(())
+    }
+
+    /// Write all sequence metadata to a FARG file (without collection headers).
+    ///
+    /// Creates a global sequence index file containing metadata for all sequences
+    /// in the store across all collections. Does not include collection-level digest headers.
+    ///
+    /// # Arguments
+    /// * `file_path` - The path to the FARG file to be written
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    ///
+    /// # Notes
+    /// For writing individual collection FARG files with collection headers,
+    /// use `SequenceCollection::write_collection_farg()` instead.
+    pub fn write_sequences_farg<P: AsRef<Path>>(&self, file_path: P) -> Result<()> {
         let file_path = file_path.as_ref();
-        println!("Writing farg file: {:?}", file_path);
+        println!("Writing sequences farg file: {:?}", file_path);
         let mut file = std::fs::File::create(file_path)?;
 
-        // Write header with digest metadata
+        // Write header with column names
         writeln!(file, "#name\tlength\talphabet\tsha512t24u\tmd5")?;
 
-        // Write sequence data
+        // Write sequence metadata for all sequences
         for result_sr in self.sequence_store.values() {
-            let result = result_sr.metadata.clone();
+            let result = result_sr.metadata().clone();
             writeln!(
                 file,
                 "{}\t{}\t{}\t{}\t{}",
@@ -687,17 +1673,50 @@ impl GlobalRefgetStore {
         Ok(())
     }
 
+    /// Write the store using its configured paths
+    ///
+    /// For disk-backed stores (on_disk), this updates index files only since
+    /// sequences/collections are already written incrementally.
+    /// For in-memory stores, this is not supported (use write_store_to_dir instead).
+    ///
+    /// # Returns
+    /// Result indicating success or error
+    ///
+    /// # Errors
+    /// Returns an error if `local_path` is not set.
+    ///
+    /// # Example
+    /// ```ignore
+    /// let store = GlobalRefgetStore::on_disk("/data/store")?;
+    /// store.add_sequence_collection_from_fasta("genome.fa")?;
+    /// store.write()?;  // Updates index files
+    /// ```
+    pub fn write(&self) -> Result<()> {
+        if !self.cache_to_disk {
+            return Err(anyhow!("write() only works with disk-backed stores - use write_store_to_dir() instead"));
+        }
+
+        // For disk-backed stores, just update indexes (sequences/collections already written)
+        self.write_index_files()
+    }
+
     /// Write a GlobalRefgetStore object to a directory
-    pub fn write_store_to_directory<P: AsRef<Path>>(
+    pub fn write_store_to_dir<P: AsRef<Path>>(
         &self,
         root_path: P,
-        seqdata_path_template: &str,
+        seqdata_path_template: Option<&str>,
     ) -> Result<()> {
         let root_path = root_path.as_ref();
+
+        // Use provided template, or store's template, or default
+        let template = seqdata_path_template
+            .or(self.seqdata_path_template.as_deref())
+            .unwrap_or(DEFAULT_SEQDATA_PATH_TEMPLATE);
+
         println!(
             "Writing store to directory: {}; Using seqdata path template: {}",
             root_path.display(),
-            seqdata_path_template
+            template
         );
 
         // Create the root directory if it doesn't exist
@@ -713,19 +1732,20 @@ impl GlobalRefgetStore {
 
         // Write each sequence to its own file
         for record in self.sequence_store.values() {
-            if record.data.is_some() {
-                // Get the path for this sequence using the template and base64url-encoded digest
-                let rel_path =
-                    Self::get_sequence_path(&record.metadata.sha512t24u, seqdata_path_template);
-                let full_path = root_path.join(&rel_path);
-
-                // Write the sequence data to file
-                record.to_file(full_path)?;
-            } else {
-                return Err(anyhow!(
-                    "Sequence data is missing for digest: {}",
-                    &record.metadata.sha512t24u
-                ));
+            match record {
+                SequenceRecord::Full { metadata, .. } => {
+                    // Get the path for this sequence using the template and base64url-encoded digest
+                    let rel_path =
+                        Self::get_sequence_path(&metadata.sha512t24u, template);
+                    let full_path = root_path.join(&rel_path);
+
+                    // Write the sequence data to file
+                    record.to_file(full_path)?;
+                }
+                SequenceRecord::Stub(_metadata) => {
+                    // Stub means sequence already on disk - skip writing
+                    continue;
+                }
             }
         }
 
@@ -733,17 +1753,17 @@ impl GlobalRefgetStore {
         for collection in self.collections.values() {
             let collection_file_path =
                 root_path.join(format!("collections/{}.farg", collection.digest));
-            collection.to_farg_path(&collection_file_path)?;
+            collection.write_collection_farg(&collection_file_path)?;
         }
 
         // Write the sequence metadata index file
         let sequence_index_path = root_path.join("sequences.farg");
-        self.to_farg(&sequence_index_path)?;
+        self.write_sequences_farg(&sequence_index_path)?;
 
         // Create the metadata structure
         let metadata = StoreMetadata {
             version: 1,
-            seqdata_path_template: seqdata_path_template.to_string(),
+            seqdata_path_template: template.to_string(),
             collections_path_template: "collections/%s.farg".to_string(),
             sequence_index: "sequences.farg".to_string(),
             mode: self.mode,
@@ -757,30 +1777,71 @@ impl GlobalRefgetStore {
 
         Ok(())
     }
+
+    /// Returns statistics about the store
+    ///
+    /// # Returns
+    /// A tuple of (n_sequences, n_collections, storage_mode_str)
+    pub fn stats(&self) -> (usize, usize, &'static str) {
+        let n_sequences = self.sequence_store.len();
+        let n_collections = self.collections.len();
+        let mode_str = match self.mode {
+            StorageMode::Raw => "Raw",
+            StorageMode::Encoded => "Encoded",
+        };
+        (n_sequences, n_collections, mode_str)
+    }
+}
+
+/// Format bytes into human-readable size (KB, MB, GB, etc.)
+fn format_bytes(bytes: usize) -> String {
+    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
+    let mut size = bytes as f64;
+    let mut unit_idx = 0;
+
+    while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
+        size /= 1024.0;
+        unit_idx += 1;
+    }
+
+    if unit_idx == 0 {
+        format!("{} {}", bytes, UNITS[0])
+    } else {
+        format!("{:.2} {}", size, UNITS[unit_idx])
+    }
 }
 
 impl Display for GlobalRefgetStore {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let total_size = self.total_disk_size();
+        let size_str = format_bytes(total_size);
         writeln!(f, "SeqColStore object:")?;
+        writeln!(f, "  Mode: {:?}", self.mode)?;
+        writeln!(f, "  Disk size: {} ({} bytes)", size_str, total_size)?;
         writeln!(f, ">Sequences (n={}):", self.sequence_store.len())?;
         // Print out the sequences in the store
         for (i, (sha512_digest, sequence_record)) in self.sequence_store.iter().take(10).enumerate()
         {
-            let seq = sequence_record.data.as_ref().unwrap();
-            // Extract the first 3 characters of the sequence (or fewer if the sequence is shorter)
-            let first_8_chars = match self.mode {
-                StorageMode::Encoded => {
-                    let alphabet = lookup_alphabet(&sequence_record.metadata.alphabet);
-                    let decoded = decode_substring_from_bytes(
-                        seq,
-                        0,
-                        8.min(sequence_record.metadata.length),
-                        alphabet,
-                    );
-                    String::from_utf8(decoded).unwrap_or_else(|_| "???".to_string())
+            let metadata = sequence_record.metadata();
+            let first_8_chars = match sequence_record {
+                SequenceRecord::Stub(_) => "<stub>".to_string(),
+                SequenceRecord::Full { metadata, sequence: seq } => {
+                    // Extract the first 8 characters of the sequence (or fewer if the sequence is shorter)
+                    match self.mode {
+                        StorageMode::Encoded => {
+                            let alphabet = lookup_alphabet(&metadata.alphabet);
+                            let decoded = decode_substring_from_bytes(
+                                seq,
+                                0,
+                                8.min(metadata.length),
+                                alphabet,
+                            );
+                            String::from_utf8(decoded).unwrap_or_else(|_| "???".to_string())
+                        }
+                        StorageMode::Raw => String::from_utf8(seq[0..8.min(seq.len())].to_vec())
+                            .unwrap_or_else(|_| "???".to_string()),
+                    }
                 }
-                StorageMode::Raw => String::from_utf8(seq[0..8.min(seq.len())].to_vec())
-                    .unwrap_or_else(|_| "???".to_string()),
             };
 
             writeln!(
@@ -788,9 +1849,9 @@ impl Display for GlobalRefgetStore {
                 "   - {}. {:02x?}, MD5: {:02x?}, Length: {}, Alphabet: {:?}, Start: {}",
                 i + 1,
                 std::str::from_utf8(sha512_digest).unwrap(),
-                &sequence_record.metadata.md5,
-                &sequence_record.metadata.length,
-                &sequence_record.metadata.alphabet,
+                &metadata.md5,
+                &metadata.length,
+                &metadata.alphabet,
                 first_8_chars
             )?;
         }
@@ -801,15 +1862,20 @@ impl Display for GlobalRefgetStore {
             let seqcol_digest_str = String::from_utf8_lossy(digest);
             writeln!(
                 f,
-                "  {}. Collection Digest: {:02x?}",
+                "  {}. Collection Digest: {:02x?} ({} sequences)",
                 i + 1,
-                seqcol_digest_str
+                seqcol_digest_str,
+                name_map.len()
             )?;
-            for (name, sha512_digest) in name_map {
+            // Only show first 5 sequences in each collection
+            for (j, (name, sha512_digest)) in name_map.iter().enumerate().take(5) {
                 // Convert the sha512_digest to a hex string
                 let sha512_str = String::from_utf8_lossy(sha512_digest);
                 writeln!(f, "   - Name: {}, SHA512: {:02x?}", name, sha512_str)?;
             }
+            if name_map.len() > 5 {
+                writeln!(f, "   - ... and {} more", name_map.len() - 5)?;
+            }
         }
 
         Ok(())
@@ -844,18 +1910,18 @@ mod tests {
             .expect("Failed to create SeqColDigest from FASTA file");
 
         // Write FARG to temporary directory
-        seqcol.to_farg().expect("Failed to write farg file");
+        seqcol.write_farg().expect("Failed to write farg file");
 
         // Load and verify
         let loaded_seqcol = read_fasta_refget_file(&temp_farg).expect("Failed to read refget file");
 
         // Test round-trip integrity
         for (original, loaded) in seqcol.sequences.iter().zip(loaded_seqcol.sequences.iter()) {
-            assert_eq!(original.metadata.name, loaded.metadata.name);
-            assert_eq!(original.metadata.length, loaded.metadata.length);
-            assert_eq!(original.metadata.sha512t24u, loaded.metadata.sha512t24u);
-            assert_eq!(original.metadata.md5, loaded.metadata.md5);
-            assert_eq!(original.metadata.alphabet, loaded.metadata.alphabet);
+            assert_eq!(original.metadata().name, loaded.metadata().name);
+            assert_eq!(original.metadata().length, loaded.metadata().length);
+            assert_eq!(original.metadata().sha512t24u, loaded.metadata().sha512t24u);
+            assert_eq!(original.metadata().md5, loaded.metadata().md5);
+            assert_eq!(original.metadata().alphabet, loaded.metadata().alphabet);
         }
     }
 
@@ -864,6 +1930,107 @@ mod tests {
         (sha512t24u(sequence), md5(sequence))
     }
 
+    #[test]
+    fn test_mode_basics() {
+        // Test default mode and convenience methods (no sequences needed)
+        let mut store = GlobalRefgetStore::in_memory();
+
+        // Default is Encoded
+        assert_eq!(store.mode, StorageMode::Encoded);
+
+        // Convenience methods
+        store.disable_encoding();
+        assert_eq!(store.mode, StorageMode::Raw);
+        store.enable_encoding();
+        assert_eq!(store.mode, StorageMode::Encoded);
+
+        // set_mode() also works
+        store.set_mode(StorageMode::Raw);
+        assert_eq!(store.mode, StorageMode::Raw);
+        store.set_mode(StorageMode::Encoded);
+        assert_eq!(store.mode, StorageMode::Encoded);
+    }
+
+    #[test]
+    fn test_mode_switching_raw_to_encoded() {
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+        let fasta_content = ">chr1\nATGCATGCATGC\n>chr2\nGGGGAAAA\n";
+        let temp_fasta_path = temp_path.join("test.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Create store in Raw mode and load sequences
+        let mut store = GlobalRefgetStore::in_memory();
+        store.disable_encoding();
+        assert_eq!(store.mode, StorageMode::Raw);
+
+        store.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
+
+        let (chr1_sha, _) = calculate_test_digests(b"ATGCATGCATGC");
+        let chr1_key = chr1_sha.as_bytes().to_key();
+
+        // Verify raw bytes in Raw mode - plain ASCII
+        if let Some(SequenceRecord::Full { sequence, .. }) = store.sequence_store.get(&chr1_key) {
+            assert_eq!(sequence, b"ATGCATGCATGC");
+            assert_eq!(sequence.len(), 12);
+        }
+
+        let seq1 = store.get_sequence_by_id(&chr1_sha).unwrap().decode().unwrap();
+
+        // Switch to Encoded
+        store.set_mode(StorageMode::Encoded);
+
+        // Verify raw bytes are now bit-packed
+        if let Some(SequenceRecord::Full { sequence, .. }) = store.sequence_store.get(&chr1_key) {
+            assert_eq!(sequence.len(), 3); // 12 bases * 2 bits = 3 bytes
+            assert!(sequence.len() < 12);
+        }
+
+        // Verify decoded sequence is identical
+        let seq2 = store.get_sequence_by_id(&chr1_sha).unwrap().decode().unwrap();
+        assert_eq!(seq1, seq2);
+    }
+
+    #[test]
+    fn test_mode_switching_encoded_to_raw() {
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+        let fasta_content = ">chr1\nATGCATGCATGC\n>chr2\nGGGGAAAA\n";
+        let temp_fasta_path = temp_path.join("test.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Create store in default Encoded mode and load sequences
+        let mut store = GlobalRefgetStore::in_memory();
+        assert_eq!(store.mode, StorageMode::Encoded);
+
+        store.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
+
+        let (chr1_sha, _) = calculate_test_digests(b"ATGCATGCATGC");
+        let chr1_key = chr1_sha.as_bytes().to_key();
+
+        // Verify raw bytes in Encoded mode - bit-packed
+        if let Some(SequenceRecord::Full { sequence, .. }) = store.sequence_store.get(&chr1_key) {
+            assert_eq!(sequence.len(), 3);
+            assert!(sequence.len() < 12);
+        }
+
+        let seq1 = store.get_sequence_by_id(&chr1_sha).unwrap().decode().unwrap();
+
+        // Switch to Raw
+        store.disable_encoding();
+
+        // Verify raw bytes are now plain ASCII
+        if let Some(SequenceRecord::Full { sequence, .. }) = store.sequence_store.get(&chr1_key) {
+            assert_eq!(sequence, b"ATGCATGCATGC");
+            assert_eq!(sequence.len(), 12);
+        }
+
+        // Verify decoded sequence is identical
+        let seq2 = store.get_sequence_by_id(&chr1_sha).unwrap().decode().unwrap();
+        assert_eq!(seq1, seq2);
+        assert!(!seq2.is_empty());
+    }
+
     #[test]
     fn test_refget_store_retrieve_seq_and_vec() {
         // Create temporary directory for all test files
@@ -882,8 +2049,8 @@ GGGGAAAA
         fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
 
         // --- 2. Initialize GlobalRefgetStore and import FASTA ---
-        let mut store = GlobalRefgetStore::new(StorageMode::Encoded);
-        store.import_fasta(&temp_fasta_path).unwrap();
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
 
         let sequence_keys: Vec<[u8; 32]> = store.sequence_store.keys().cloned().collect();
 
@@ -912,12 +2079,12 @@ chr1\t-5\t100
         let temp_output_fa_path = temp_path.join("output.fa");
 
         store
-            .get_seqs_bed_file(
+            .export_fasta_from_regions(
                 collection_digest_ref,
                 temp_bed_path.to_str().unwrap(),
                 temp_output_fa_path.to_str().unwrap(),
             )
-            .expect("get_seqs_bed_file failed");
+            .expect("export_fasta_from_regions failed");
 
         // Read the output FASTA file and verify its content
         let output_fa_content =
@@ -933,12 +2100,14 @@ chr1\t-5\t100
             expected_fa_content.trim(),
             "Output FASTA file content mismatch"
         );
-        println!("✓ get_seqs_bed_file test passed.");
+        println!("✓ export_fasta_from_regions test passed.");
 
-        // --- Test get_seqs_bed_file_to_vec (returns Vec<RetrievedSequence>) ---
-        let vec_result = store
-            .get_seqs_bed_file_to_vec(collection_digest_ref, temp_bed_path.to_str().unwrap())
-            .expect("get_seqs_bed_file_to_vec failed");
+        // --- Test substrings_from_regions iterator (returns iterator of RetrievedSequence) ---
+        let vec_result: Vec<_> = store
+            .substrings_from_regions(collection_digest_ref, temp_bed_path.to_str().unwrap())
+            .expect("substrings_from_regions failed")
+            .filter_map(Result::ok)  // Skip errors
+            .collect();
 
         // Define the expected vector of RetrievedSequence structs
         let expected_vec = vec![
@@ -967,7 +2136,7 @@ chr1\t-5\t100
             vec_result, expected_vec,
             "Retrieved sequence vector mismatch"
         );
-        println!("✓ get_seqs_bed_file_to_vec test passed.");
+        println!("✓ substrings_from_regions test passed.");
     }
 
     #[test]
@@ -986,7 +2155,6 @@ chr1\t-5\t100
                 lengths_digest: "test".to_string(),
             },
             file_path: None,
-            has_data: false,
         };
 
         // Create a sequence record
@@ -996,17 +2164,18 @@ chr1\t-5\t100
             sha512t24u: sha512t24u(sequence),
             md5: md5(sequence),
             alphabet: AlphabetType::Dna2bit,
+            fai: None,
         };
 
-        let record = SequenceRecord {
+        let record = SequenceRecord::Full {
             metadata: seq_metadata.clone(),
-            data: Some(sequence.to_vec()),
+            sequence: sequence.to_vec(),
         };
 
         collection.sequences.push(record);
 
         // Add the sequence to the store
-        let mut store = GlobalRefgetStore::new(StorageMode::Encoded);
+        let mut store = GlobalRefgetStore::in_memory();
         store.add_sequence_collection(collection.clone()).unwrap();
 
         // Verify the store has the sequence
@@ -1017,44 +2186,44 @@ chr1\t-5\t100
             store.get_sequence_by_collection_and_name(&collection.digest, name);
         assert!(retrieved_by_name_str.is_some());
         let retrieved_record = retrieved_by_name_str.unwrap();
-        assert_eq!(retrieved_record.metadata.name, name);
-        assert_eq!(retrieved_record.data.as_ref().unwrap(), sequence);
+        assert_eq!(retrieved_record.metadata().name, name);
+        assert_eq!(retrieved_record.sequence().unwrap(), sequence);
 
         // Test sequence lookup by collection+name (using [u8; 32] digest)
         let retrieved_by_name_key =
             store.get_sequence_by_collection_and_name(collection.digest.to_key(), name);
         assert!(retrieved_by_name_key.is_some());
         let retrieved_record = retrieved_by_name_key.unwrap();
-        assert_eq!(retrieved_record.metadata.name, name);
-        assert_eq!(retrieved_record.data.as_ref().unwrap(), sequence);
+        assert_eq!(retrieved_record.metadata().name, name);
+        assert_eq!(retrieved_record.sequence().unwrap(), sequence);
 
         // Test sequence lookup by SHA512 digest (using string)
         let retrieved_by_sha512_str = store.get_sequence_by_id(&seq_metadata.sha512t24u);
         assert!(retrieved_by_sha512_str.is_some());
         let retrieved_record = retrieved_by_sha512_str.unwrap();
-        assert_eq!(retrieved_record.metadata.name, name);
-        assert_eq!(retrieved_record.data.as_ref().unwrap(), sequence);
+        assert_eq!(retrieved_record.metadata().name, name);
+        assert_eq!(retrieved_record.sequence().unwrap(), sequence);
 
         // Test sequence lookup by SHA512 digest (using [u8; 32])
         let retrieved_by_sha512_key = store.get_sequence_by_id(seq_metadata.sha512t24u.to_key());
         assert!(retrieved_by_sha512_key.is_some());
         let retrieved_record = retrieved_by_sha512_key.unwrap();
-        assert_eq!(retrieved_record.metadata.name, name);
-        assert_eq!(retrieved_record.data.as_ref().unwrap(), sequence);
+        assert_eq!(retrieved_record.metadata().name, name);
+        assert_eq!(retrieved_record.sequence().unwrap(), sequence);
 
         // Test sequence lookup by MD5 digest (using string)
         let retrieved_by_md5_str = store.get_sequence_by_md5(&seq_metadata.md5);
         assert!(retrieved_by_md5_str.is_some());
         let retrieved_record = retrieved_by_md5_str.unwrap();
-        assert_eq!(retrieved_record.metadata.name, name);
-        assert_eq!(retrieved_record.data.as_ref().unwrap(), sequence);
+        assert_eq!(retrieved_record.metadata().name, name);
+        assert_eq!(retrieved_record.sequence().unwrap(), sequence);
 
         // Test sequence lookup by MD5 digest (using [u8; 32])
         let retrieved_by_md5_key = store.get_sequence_by_md5(seq_metadata.md5.to_key());
         assert!(retrieved_by_md5_key.is_some());
         let retrieved_record = retrieved_by_md5_key.unwrap();
-        assert_eq!(retrieved_record.metadata.name, name);
-        assert_eq!(retrieved_record.data.as_ref().unwrap(), sequence);
+        assert_eq!(retrieved_record.metadata().name, name);
+        assert_eq!(retrieved_record.sequence().unwrap(), sequence);
     }
 
     #[test]
@@ -1068,10 +2237,10 @@ chr1\t-5\t100
 
         std::fs::copy(test_fa, &temp_fa).expect("Failed to copy test FASTA file");
 
-        let mut store = GlobalRefgetStore::new(StorageMode::Encoded);
+        let mut store = GlobalRefgetStore::in_memory();
 
         // Import the FASTA file
-        store.import_fasta(temp_fa).unwrap();
+        store.add_sequence_collection_from_fasta(temp_fa).unwrap();
 
         // Check that the store has sequences
         assert!(!store.sequence_store.is_empty());
@@ -1080,7 +2249,7 @@ chr1\t-5\t100
         let seq_template = "sequences/%s2/%s.seq";
         // let col_template = "collections/%s.farg";
         store
-            .write_store_to_directory(temp_path.to_str().unwrap(), seq_template)
+            .write_store_to_dir(temp_path.to_str().unwrap(), Some(seq_template))
             .unwrap();
     }
 
@@ -1094,11 +2263,11 @@ chr1\t-5\t100
             .expect("Failed to copy base.fa.gz to tempdir");
 
         // Create a new sequence store
-        let mut store = GlobalRefgetStore::new(StorageMode::Encoded);
+        let mut store = GlobalRefgetStore::in_memory();
 
         // Import a FASTA file into the store
-        // store.import_fasta("../tests/data/subset.fa.gz").unwrap();
-        store.import_fasta(&temp_fasta).unwrap();
+        // store.add_sequence_collection_from_fasta("../tests/data/subset.fa.gz").unwrap();
+        store.add_sequence_collection_from_fasta(&temp_fasta).unwrap();
 
         // Get the sequence keys for verification (assuming we know the test file contains 3 sequences)
         let sequence_keys: Vec<[u8; 32]> = store.sequence_store.keys().cloned().collect();
@@ -1118,7 +2287,7 @@ chr1\t-5\t100
         // Write the store to the temporary directory
         let seq_template = "sequences/%s2/%s.seq";
         store
-            .write_store_to_directory(temp_path, seq_template)
+            .write_store_to_dir(temp_path, Some(seq_template))
             .unwrap();
 
         // Verify that the files were created
@@ -1129,7 +2298,7 @@ chr1\t-5\t100
         assert!(temp_path.join("collections").exists());
 
         // Load the store from disk
-        let loaded_store = GlobalRefgetStore::load_from_directory(temp_path).unwrap();
+        let mut loaded_store = GlobalRefgetStore::load_local(temp_path).unwrap();
 
         // Verify that the loaded store has the same sequences
         assert_eq!(loaded_store.sequence_store.len(), 3);
@@ -1143,25 +2312,25 @@ chr1\t-5\t100
         let loaded_seq2 = loaded_store.sequence_store.get(&sha512_key2).unwrap();
 
         // Check metadata equality
-        assert_eq!(original_seq1.metadata.name, loaded_seq1.metadata.name);
-        assert_eq!(original_seq1.metadata.length, loaded_seq1.metadata.length);
+        assert_eq!(original_seq1.metadata().name, loaded_seq1.metadata().name);
+        assert_eq!(original_seq1.metadata().length, loaded_seq1.metadata().length);
         assert_eq!(
-            original_seq1.metadata.sha512t24u,
-            loaded_seq1.metadata.sha512t24u
+            original_seq1.metadata().sha512t24u,
+            loaded_seq1.metadata().sha512t24u
         );
-        assert_eq!(original_seq1.metadata.md5, loaded_seq1.metadata.md5);
+        assert_eq!(original_seq1.metadata().md5, loaded_seq1.metadata().md5);
 
-        assert_eq!(original_seq2.metadata.name, loaded_seq2.metadata.name);
-        assert_eq!(original_seq2.metadata.length, loaded_seq2.metadata.length);
+        assert_eq!(original_seq2.metadata().name, loaded_seq2.metadata().name);
+        assert_eq!(original_seq2.metadata().length, loaded_seq2.metadata().length);
         assert_eq!(
-            original_seq2.metadata.sha512t24u,
-            loaded_seq2.metadata.sha512t24u
+            original_seq2.metadata().sha512t24u,
+            loaded_seq2.metadata().sha512t24u
         );
-        assert_eq!(original_seq2.metadata.md5, loaded_seq2.metadata.md5);
+        assert_eq!(original_seq2.metadata().md5, loaded_seq2.metadata().md5);
 
-        // Check data equality
-        assert_eq!(original_seq1.data, loaded_seq1.data);
-        assert_eq!(original_seq2.data, loaded_seq2.data);
+        // Check data is not loaded initially (lazy loading)
+        assert_eq!(loaded_seq1.has_data(), false, "Data should not be loaded initially with lazy loading");
+        assert_eq!(loaded_seq2.has_data(), false, "Data should not be loaded initially with lazy loading");
 
         // Verify MD5 lookup is preserved
         assert_eq!(loaded_store.md5_lookup.len(), 3);
@@ -1172,15 +2341,15 @@ chr1\t-5\t100
         // Test sequence retrieval functionality
         for (digest, original_record) in &store.sequence_store {
             let loaded_record = loaded_store.get_sequence_by_id(*digest).unwrap();
-            assert_eq!(original_record.metadata.name, loaded_record.metadata.name);
+            assert_eq!(original_record.metadata().name, loaded_record.metadata().name);
             assert_eq!(
-                original_record.metadata.length,
-                loaded_record.metadata.length
+                original_record.metadata().length,
+                loaded_record.metadata().length
             );
 
             // Test substring retrieval works on loaded store
-            if original_record.metadata.length > 0 {
-                let substring_len = std::cmp::min(5, original_record.metadata.length);
+            if original_record.metadata().length > 0 {
+                let substring_len = std::cmp::min(5, original_record.metadata().length);
                 let substring = loaded_store.get_substring(digest, 0, substring_len);
                 assert!(
                     substring.is_some(),
@@ -1192,4 +2361,430 @@ chr1\t-5\t100
 
         println!("✓ Disk persistence test passed - all data preserved correctly");
     }
+
+    #[test]
+    fn test_export_fasta_all_sequences() {
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+
+        // Create test FASTA
+        let fasta_content = "\
+>chr1
+ATGCATGCATGC
+>chr2
+GGGGAAAA
+>chr3
+TTTTCCCC
+";
+        let temp_fasta_path = temp_path.join("test.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Import into store
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
+
+        // Get the collection digest
+        let collections: Vec<_> = store.collections.keys().cloned().collect();
+        assert_eq!(collections.len(), 1, "Should have exactly one collection");
+        let collection_digest = collections[0];
+
+        // Export all sequences
+        let output_path = temp_path.join("exported_all.fa");
+        store
+            .export_fasta(&collection_digest, &output_path, None, Some(80))
+            .expect("Failed to export FASTA");
+
+        // Read and verify the exported file
+        let exported_content = fs::read_to_string(&output_path).expect("Failed to read exported file");
+
+        // Check that all sequences are present
+        assert!(exported_content.contains(">chr1"), "Should contain chr1");
+        assert!(exported_content.contains(">chr2"), "Should contain chr2");
+        assert!(exported_content.contains(">chr3"), "Should contain chr3");
+        assert!(exported_content.contains("ATGCATGCATGC"), "Should contain chr1 sequence");
+        assert!(exported_content.contains("GGGGAAAA"), "Should contain chr2 sequence");
+        assert!(exported_content.contains("TTTTCCCC"), "Should contain chr3 sequence");
+
+        println!("✓ Export all sequences test passed");
+    }
+
+    #[test]
+    fn test_export_fasta_subset_sequences() {
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+
+        // Create test FASTA
+        let fasta_content = "\
+>chr1
+ATGCATGCATGC
+>chr2
+GGGGAAAA
+>chr3
+TTTTCCCC
+";
+        let temp_fasta_path = temp_path.join("test.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Import into store
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
+
+        // Get the collection digest
+        let collections: Vec<_> = store.collections.keys().cloned().collect();
+        let collection_digest = collections[0];
+
+        // Export only chr1 and chr3
+        let output_path = temp_path.join("exported_subset.fa");
+        let subset_names = vec!["chr1", "chr3"];
+        store
+            .export_fasta(&collection_digest, &output_path, Some(subset_names), Some(80))
+            .expect("Failed to export subset FASTA");
+
+        // Read and verify the exported file
+        let exported_content = fs::read_to_string(&output_path).expect("Failed to read exported file");
+
+        // Check that only selected sequences are present
+        assert!(exported_content.contains(">chr1"), "Should contain chr1");
+        assert!(!exported_content.contains(">chr2"), "Should NOT contain chr2");
+        assert!(exported_content.contains(">chr3"), "Should contain chr3");
+        assert!(exported_content.contains("ATGCATGCATGC"), "Should contain chr1 sequence");
+        assert!(!exported_content.contains("GGGGAAAA"), "Should NOT contain chr2 sequence");
+        assert!(exported_content.contains("TTTTCCCC"), "Should contain chr3 sequence");
+
+        println!("✓ Export subset sequences test passed");
+    }
+
+    #[test]
+    fn test_export_fasta_roundtrip() {
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+
+        // Create test FASTA with longer sequences
+        let fasta_content = "\
+>seq1
+ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
+ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
+>seq2
+GGGGAAAACCCCTTTTGGGGAAAACCCCTTTTGGGGAAAACCCCTTTTGGGGAAAACCCC
+";
+        let temp_fasta_path = temp_path.join("original.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Import into store
+        let mut store1 = GlobalRefgetStore::in_memory();
+        store1.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
+
+        // Get original digests
+        let original_digests: Vec<String> = store1
+            .sequence_store
+            .values()
+            .map(|r| r.metadata().sha512t24u.clone())
+            .collect();
+
+        // Export to new FASTA
+        let collections: Vec<_> = store1.collections.keys().cloned().collect();
+        let collection_digest = collections[0];
+        let exported_path = temp_path.join("exported.fa");
+        store1
+            .export_fasta(&collection_digest, &exported_path, None, Some(60))
+            .expect("Failed to export FASTA");
+
+        // Re-import the exported FASTA
+        let mut store2 = GlobalRefgetStore::in_memory();
+        store2.add_sequence_collection_from_fasta(&exported_path).unwrap();
+
+        // Verify digests match (same sequences)
+        let new_digests: Vec<String> = store2
+            .sequence_store
+            .values()
+            .map(|r| r.metadata().sha512t24u.clone())
+            .collect();
+
+        assert_eq!(original_digests.len(), new_digests.len(), "Should have same number of sequences");
+        for digest in original_digests {
+            assert!(
+                new_digests.contains(&digest),
+                "Digest {} should be present after roundtrip",
+                digest
+            );
+        }
+
+        println!("✓ Export/import roundtrip test passed");
+    }
+
+    #[test]
+    fn test_export_fasta_by_digests() {
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+
+        // Create test FASTA
+        let fasta_content = "\
+>chr1
+ATGCATGCATGC
+>chr2
+GGGGAAAA
+";
+        let temp_fasta_path = temp_path.join("test.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Import into store
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
+
+        // Get digests
+        let digests: Vec<String> = store
+            .sequence_store
+            .values()
+            .map(|r| r.metadata().sha512t24u.clone())
+            .collect();
+
+        // Export by digests
+        let output_path = temp_path.join("exported_by_digests.fa");
+        let digest_refs: Vec<&str> = digests.iter().map(|s| s.as_str()).collect();
+        store
+            .export_fasta_by_digests(digest_refs, &output_path, Some(80))
+            .expect("Failed to export FASTA by digests");
+
+        // Read and verify the exported file
+        let exported_content = fs::read_to_string(&output_path).expect("Failed to read exported file");
+
+        // Check that all sequences are present
+        assert!(exported_content.contains(">chr1"), "Should contain chr1");
+        assert!(exported_content.contains(">chr2"), "Should contain chr2");
+        assert!(exported_content.contains("ATGCATGCATGC"), "Should contain chr1 sequence");
+        assert!(exported_content.contains("GGGGAAAA"), "Should contain chr2 sequence");
+
+        println!("✓ Export by digests test passed");
+    }
+
+    #[test]
+    fn test_export_fasta_error_handling() {
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+
+        // Create test FASTA
+        let fasta_content = "\
+>chr1
+ATGCATGCATGC
+";
+        let temp_fasta_path = temp_path.join("test.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Import into store
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&temp_fasta_path).unwrap();
+
+        // Test with non-existent collection
+        let output_path = temp_path.join("should_fail.fa");
+        let fake_collection = b"fake_collection_digest_12345678";
+        let result = store.export_fasta(fake_collection, &output_path, None, Some(80));
+        assert!(result.is_err(), "Should fail with non-existent collection");
+
+        // Test with non-existent sequence name
+        let collections: Vec<_> = store.collections.keys().cloned().collect();
+        let collection_digest = collections[0];
+        let result = store.export_fasta(
+            &collection_digest,
+            &output_path,
+            Some(vec!["nonexistent_chr"]),
+            Some(80),
+        );
+        assert!(result.is_err(), "Should fail with non-existent sequence name");
+
+        println!("✓ Error handling test passed");
+    }
+
+    #[test]
+    fn test_sequence_names_with_spaces() {
+        // Ensure FASTA headers with spaces work correctly
+        // This is common in real-world files like pangenome assemblies
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+
+        // Create test FASTA with sequence names containing spaces
+        // This mimics the structure from HPRC pangenome files
+        let fasta_content = "\
+>JAHKSE010000016.1 unmasked:primary_assembly HG002.alt.pat.f1_v2:JAHKSE010000016.1:1:100:1
+ATGCATGCATGCATGCATGCATGCATGCATGCATGC
+ATGCATGCATGCATGCATGCATGCATGCATGCATGC
+>JAHKSE010000012.1 unmasked:primary_assembly HG002.alt.pat.f1_v2:JAHKSE010000012.1:1:100:1
+GGGGAAAACCCCTTTTGGGGAAAACCCCTTTTGGGG
+GGGGAAAACCCCTTTTGGGGAAAACCCCTTTTGGGG
+";
+        let temp_fasta_path = temp_path.join("spaces_in_names.fa");
+        fs::write(&temp_fasta_path, fasta_content).expect("Failed to write test FASTA file");
+
+        // Import FASTA with sequence names containing spaces
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&temp_fasta_path)
+            .expect("Should handle sequence names with spaces");
+
+        // Verify the sequences were loaded with full names including spaces
+        assert_eq!(store.sequence_store.len(), 2);
+
+        let full_name1 = "JAHKSE010000016.1 unmasked:primary_assembly HG002.alt.pat.f1_v2:JAHKSE010000016.1:1:100:1";
+        let full_name2 = "JAHKSE010000012.1 unmasked:primary_assembly HG002.alt.pat.f1_v2:JAHKSE010000012.1:1:100:1";
+
+        // Get the collection
+        let collections: Vec<_> = store.collections.keys().cloned().collect();
+        assert_eq!(collections.len(), 1);
+        let collection_digest = collections[0];
+
+        // Verify we can retrieve sequences by their full names (with spaces)
+        let seq1 = store.get_sequence_by_collection_and_name(&collection_digest, full_name1);
+        assert!(seq1.is_some(), "Should retrieve sequence by full name with spaces");
+
+        let seq2 = store.get_sequence_by_collection_and_name(&collection_digest, full_name2);
+        assert!(seq2.is_some(), "Should retrieve sequence by full name with spaces");
+
+        println!("✓ Sequence names with spaces test passed");
+    }
+
+    #[test]
+    fn test_farg_filename_with_dots() {
+        // Test that FARG filenames preserve dots in the base name
+        // Real HPRC files like "HG002.alt.pat.f1_v2.unmasked.fa.gz"
+        // should create "HG002.alt.pat.f1_v2.unmasked.farg", NOT "HG002.farg"
+
+        let temp_dir = tempdir().expect("Failed to create temporary directory");
+        let temp_path = temp_dir.path();
+
+        // Copy test file to temp (so .farg file gets created there, not in test data)
+        let test_file = "../tests/data/fasta/HG002.alt.pat.f1_v2.unmasked.fa";
+        let temp_fasta = temp_path.join("HG002.alt.pat.f1_v2.unmasked.fa");
+        fs::copy(test_file, &temp_fasta).expect("Failed to copy test file");
+
+        // Load the FASTA - this creates a .farg file
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta(&temp_fasta)
+            .expect("Should load FASTA");
+
+        // Check which .farg file was created
+        let correct_farg = temp_path.join("HG002.alt.pat.f1_v2.unmasked.farg");
+        let wrong_farg = temp_path.join("HG002.farg");
+
+        let files: Vec<_> = std::fs::read_dir(temp_path).unwrap()
+            .map(|e| e.unwrap().file_name().to_string_lossy().to_string())
+            .collect();
+
+        assert!(
+            correct_farg.exists(),
+            "Expected 'HG002.alt.pat.f1_v2.unmasked.farg' but found: {:?}",
+            files
+        );
+
+        assert!(
+            !wrong_farg.exists(),
+            "Should NOT create 'HG002.farg' (strips too many dots)"
+        );
+
+        println!("✓ FARG filename with dots test passed");
+    }
+
+    #[test]
+    fn test_on_disk_collection_written_incrementally() {
+        // Test that collection FARG files are written to disk immediately
+        // when using on_disk() store, not just when write_store_to_dir() is called
+        let temp_dir = tempdir().unwrap();
+        let temp_path = temp_dir.path();
+        let temp_fasta = temp_path.join("base.fa.gz");
+        std::fs::copy("../tests/data/fasta/base.fa.gz", &temp_fasta)
+            .expect("Failed to copy base.fa.gz to tempdir");
+
+        let cache_path = temp_path.join("cache");
+        let mut store = GlobalRefgetStore::on_disk(&cache_path).unwrap();
+
+        // Load FASTA file into the store
+        store.add_sequence_collection_from_fasta(&temp_fasta).unwrap();
+
+        // BEFORE calling write_store_to_dir, verify collection FARG files exist
+        let collections_dir = cache_path.join("collections");
+        assert!(collections_dir.exists(), "Collections directory should exist");
+
+        let farg_files: Vec<_> = std::fs::read_dir(&collections_dir)
+            .unwrap()
+            .map(|e| e.unwrap().file_name().to_string_lossy().to_string())
+            .collect();
+
+        assert!(!farg_files.is_empty(), "Collection FARG files should be written incrementally, found: {:?}", farg_files);
+        assert!(farg_files.iter().any(|f| f.ends_with(".farg")), "Should have .farg files");
+
+        println!("✓ On-disk collection incremental write test passed");
+    }
+
+    #[test]
+    fn test_disk_size_calculation() {
+        let mut store = GlobalRefgetStore::in_memory();
+        store.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa.gz").unwrap();
+
+        let disk_size = store.total_disk_size();
+        assert!(disk_size > 0, "Disk size should be greater than 0");
+
+        // Verify against manual calculation
+        let manual: usize = store.sequence_metadata()
+            .map(|m| (m.length * m.alphabet.bits_per_symbol()).div_ceil(8))
+            .sum();
+        assert_eq!(disk_size, manual);
+    }
+
+    #[test]
+    fn test_incremental_index_writing() {
+        let temp_dir = tempdir().unwrap();
+        let cache_path = temp_dir.path().join("store");
+        let mut store = GlobalRefgetStore::on_disk(&cache_path).unwrap();
+
+        store.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa.gz").unwrap();
+
+        // Index files should exist immediately
+        assert!(cache_path.join("index.json").exists(), "index.json should exist");
+        assert!(cache_path.join("sequences.farg").exists(), "sequences.farg should exist");
+
+        // Store should be loadable (mode ignored for existing store)
+        let _loaded = GlobalRefgetStore::on_disk(&cache_path).unwrap();
+    }
+
+    #[test]
+    fn test_write_method() {
+        let temp_dir = tempdir().unwrap();
+        let cache_path = temp_dir.path().join("store");
+        let mut store = GlobalRefgetStore::on_disk(&cache_path).unwrap();
+
+        store.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa.gz").unwrap();
+        store.write().unwrap();  // Should succeed
+
+        assert!(cache_path.join("index.json").exists());
+    }
+
+    #[test]
+    fn test_on_disk_smart_constructor() {
+        let temp_dir = tempdir().unwrap();
+        let cache_path = temp_dir.path().join("store");
+
+        // Create new store (defaults to Encoded mode)
+        let mut store1 = GlobalRefgetStore::on_disk(&cache_path).unwrap();
+        assert_eq!(store1.mode, StorageMode::Encoded);
+        store1.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa.gz").unwrap();
+
+        // Load existing store - should preserve Encoded mode
+        let store2 = GlobalRefgetStore::on_disk(&cache_path).unwrap();
+        assert_eq!(store2.sequence_store.len(), store1.sequence_store.len());
+        assert_eq!(store2.mode, StorageMode::Encoded, "Loaded store should preserve Encoded mode");
+
+        // Test with Raw mode
+        let cache_path_raw = temp_dir.path().join("store_raw");
+        let mut store3 = GlobalRefgetStore::on_disk(&cache_path_raw).unwrap();
+        store3.disable_encoding(); // Switch to Raw
+        assert_eq!(store3.mode, StorageMode::Raw);
+        store3.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa.gz").unwrap();
+
+        // Load and verify Raw mode is persisted
+        let store4 = GlobalRefgetStore::on_disk(&cache_path_raw).unwrap();
+        assert_eq!(store4.mode, StorageMode::Raw, "Loaded store should preserve Raw mode");
+
+        // Verify index.json contains the mode
+        let index_path = cache_path_raw.join("index.json");
+        let json = fs::read_to_string(&index_path).unwrap();
+        assert!(json.contains("\"mode\":\"Raw\"") || json.contains("\"mode\": \"Raw\""),
+                "index.json should contain mode: Raw");
+    }
 }
diff --git a/gtars-refget/src/utils.rs b/gtars-refget/src/utils.rs
index c2d552ae..7f1b4a08 100644
--- a/gtars-refget/src/utils.rs
+++ b/gtars-refget/src/utils.rs
@@ -11,12 +11,14 @@ impl<T: AsRef<Path>> PathExtension for T {
         let path = self.as_ref();
         if let Some(file_name) = path.file_name() {
             let file_name_str = file_name.to_string_lossy();
-            // Find the first dot to get the base name without any extensions
-            let base_name = if let Some(dot_pos) = file_name_str.find('.') {
-                &file_name_str[..dot_pos]
-            } else {
-                &file_name_str
-            };
+
+            // Strip only known FASTA extensions, preserving dots in the base name
+            // Check longest extensions first (.fa.gz, .fasta.gz) before shorter ones
+            let known_extensions = [".fa.gz", ".fasta.gz", ".fa", ".fasta"];
+
+            let base_name = known_extensions.iter()
+                .find_map(|ext| file_name_str.strip_suffix(ext))
+                .unwrap_or(&file_name_str);
 
             if let Some(parent) = path.parent() {
                 parent.join(format!("{}.{}", base_name, new_extension))
diff --git a/gtars-refget/tests/test_decode.rs b/gtars-refget/tests/test_decode.rs
new file mode 100644
index 00000000..18d15c1f
--- /dev/null
+++ b/gtars-refget/tests/test_decode.rs
@@ -0,0 +1,105 @@
+//! Integration tests for decode() workflow through GlobalRefgetStore
+//!
+//! These tests validate the end-to-end workflow of importing FASTA files,
+//! storing sequences, retrieving them, and decoding them through the public API.
+//!
+//! Note: Unit tests for the decode() method itself (including edge cases,
+//! different alphabets, encoding schemes) are in src/collection.rs
+
+use gtars_refget::store::{GlobalRefgetStore, StorageMode};
+use std::io::Write;
+use tempfile::NamedTempFile;
+
+/// Helper function to create a temporary FASTA file with test sequences
+fn create_test_fasta() -> NamedTempFile {
+    let mut file = NamedTempFile::new().expect("Failed to create temp file");
+    writeln!(file, ">seq1").expect("Failed to write");
+    writeln!(file, "ACGTACGTACGT").expect("Failed to write");
+    writeln!(file, ">seq2").expect("Failed to write");
+    writeln!(file, "TTGGCCAA").expect("Failed to write");
+    writeln!(file, ">seq3").expect("Failed to write");
+    writeln!(file, "NNNNAAAA").expect("Failed to write");
+    file
+}
+
+#[test]
+fn test_decode_workflow_encoded() {
+    // Test the complete workflow: import FASTA → store (encoded) → retrieve → decode
+    let fasta_file = create_test_fasta();
+    let fasta_path = fasta_file.path();
+
+    // Create an encoded (bit-packed) store (default mode)
+    let mut store = GlobalRefgetStore::in_memory();
+    store
+        .add_sequence_collection_from_fasta(fasta_path)
+        .expect("Failed to import FASTA");
+
+    // Get the collection digest
+    let collections: Vec<_> = store.collections().collect();
+    assert!(!collections.is_empty(), "No collections found");
+    let collection_digest = collections[0].digest.clone();
+
+    // Expected sequences
+    let expected = vec![
+        ("seq1", "ACGTACGTACGT"),
+        ("seq2", "TTGGCCAA"),
+        ("seq3", "NNNNAAAA"),  // Tests IUPAC 'N' handling
+    ];
+
+    // Verify all sequences can be retrieved and decoded correctly
+    for (name, expected_seq) in expected.iter() {
+        let record = store
+            .get_sequence_by_collection_and_name(&collection_digest, name)
+            .unwrap_or_else(|| panic!("Failed to retrieve {}", name));
+
+        assert!(record.has_data(), "Record should have data");
+
+        let decoded = record
+            .decode()
+            .unwrap_or_else(|| panic!("decode() returned None for {}", name));
+
+        assert_eq!(
+            &decoded, expected_seq,
+            "Decoded sequence for {} doesn't match",
+            name
+        );
+    }
+}
+
+#[test]
+fn test_decode_workflow_raw() {
+    // Test the same workflow with raw (uncompressed) storage mode
+    let fasta_file = create_test_fasta();
+    let fasta_path = fasta_file.path();
+
+    // Create a raw store
+    let mut store = GlobalRefgetStore::in_memory();
+    store.disable_encoding(); // Switch to Raw mode
+    store
+        .add_sequence_collection_from_fasta(fasta_path)
+        .expect("Failed to import FASTA");
+
+    // Get the collection digest
+    let collections: Vec<_> = store.collections().collect();
+    assert!(!collections.is_empty(), "No collections found");
+    let collection_digest = collections[0].digest.clone();
+
+    // Verify sequences decode correctly with raw storage
+    let record = store
+        .get_sequence_by_collection_and_name(&collection_digest, "seq1")
+        .expect("Failed to retrieve seq1");
+
+    let decoded = record.decode().expect("decode() returned None");
+    assert_eq!(
+        decoded, "ACGTACGTACGT",
+        "Decoded sequence from raw store doesn't match"
+    );
+
+    // Verify IUPAC sequences work with raw storage too
+    let record2 = store
+        .get_sequence_by_collection_and_name(&collection_digest, "seq3")
+        .expect("Failed to retrieve seq3");
+
+    let decoded2 = record2.decode().expect("decode() returned None for seq3");
+    assert_eq!(decoded2, "NNNNAAAA", "IUPAC sequence doesn't match");
+}
diff --git a/tests/README.md b/tests/README.md
index 2e713ec2..d7718367 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1 +1,31 @@
-To run tests, run `cargo test` in the root directory of the project.
+# Testing gtars
+
+## Basic Commands
+
+```bash
+# Run all tests
+cargo test
+
+# Run tests for a specific package/crate
+cargo test -p gtars-refget
+
+# Run tests matching a name pattern
+cargo test refget
+cargo test test_import_fasta
+
+# Combine filters
+cargo test -p gtars-refget test_import
+```
+
+## Useful Options
+
+```bash
+# Show output from tests
+cargo test -- --nocapture
+
+# Run ignored tests
+cargo test -- --ignored
+
+# List available tests without running
+cargo test -- --list
+```
diff --git a/tests/data/fasta/HG002.alt.pat.f1_v2.unmasked.fa b/tests/data/fasta/HG002.alt.pat.f1_v2.unmasked.fa
new file mode 100644
index 00000000..dd08063d
--- /dev/null
+++ b/tests/data/fasta/HG002.alt.pat.f1_v2.unmasked.fa
@@ -0,0 +1,6 @@
+>chrX
+TTGGGGAA
+>chr1
+GGAA
+>chr2
+GCGC
diff --git a/tests/data/fasta/base.fa.fai b/tests/data/fasta/base.fa.fai
new file mode 100644
index 00000000..dc37dcde
--- /dev/null
+++ b/tests/data/fasta/base.fa.fai
@@ -0,0 +1,3 @@
+chrX	8	6	8	9
+chr1	4	21	4	5
+chr2	4	32	4	5
diff --git a/tests/data/fasta/crlf_endings.fa b/tests/data/fasta/crlf_endings.fa
new file mode 100644
index 00000000..6089944d
--- /dev/null
+++ b/tests/data/fasta/crlf_endings.fa
@@ -0,0 +1,5 @@
+>chr1
+ACGTACGTACGTACGTACGTACGTACGTACGT
+TGCATGCATGCATGCATGCATGCATGCATGCA
+>chr2
+GGGGCCCCAAAATTTTGGGGCCCCAAAATTTT
diff --git a/tests/data/fasta/crlf_endings.fa.fai b/tests/data/fasta/crlf_endings.fa.fai
new file mode 100644
index 00000000..e22f331b
--- /dev/null
+++ b/tests/data/fasta/crlf_endings.fa.fai
@@ -0,0 +1,2 @@
+chr1	64	7	32	34
+chr2	32	82	32	34
diff --git a/tests/data/fasta/unwrapped.fa b/tests/data/fasta/unwrapped.fa
new file mode 100644
index 00000000..9279eec7
--- /dev/null
+++ b/tests/data/fasta/unwrapped.fa
@@ -0,0 +1,6 @@
+>chr1
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTTGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAGGCCGGCCGGCCGGCCGGCCGGCCGGCCGGCCGGCCGGCC
+>chr2
+AAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTT
+>chr3
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
diff --git a/tests/data/fasta/unwrapped.fa.fai b/tests/data/fasta/unwrapped.fa.fai
new file mode 100644
index 00000000..87cb4a1c
--- /dev/null
+++ b/tests/data/fasta/unwrapped.fa.fai
@@ -0,0 +1,3 @@
+chr1	120	6	120	121
+chr2	64	133	64	65
+chr3	40	204	40	41
diff --git a/tests/data/fasta/wrapped_20.fa b/tests/data/fasta/wrapped_20.fa
new file mode 100644
index 00000000..96284947
--- /dev/null
+++ b/tests/data/fasta/wrapped_20.fa
@@ -0,0 +1,15 @@
+>chr1
+ACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGT
+TGCATGCATGCATGCATGCA
+TGCATGCATGCATGCATGCA
+GGCCGGCCGGCCGGCCGGCC
+GGCCGGCCGGCCGGCCGGCC
+>chr2
+AAAACCCCGGGGTTTTAAAA
+CCCCGGGGTTTTAAAACCCC
+GGGGTTTTAAAACCCCGGGG
+TTTT
+>chr3
+NNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNN
diff --git a/tests/data/fasta/wrapped_20.fa.fai b/tests/data/fasta/wrapped_20.fa.fai
new file mode 100644
index 00000000..70a80853
--- /dev/null
+++ b/tests/data/fasta/wrapped_20.fa.fai
@@ -0,0 +1,3 @@
+chr1	120	6	20	21
+chr2	64	138	20	21
+chr3	40	212	20	21
diff --git a/tests/data/fasta/wrapped_40.fa b/tests/data/fasta/wrapped_40.fa
new file mode 100644
index 00000000..9ace5f4c
--- /dev/null
+++ b/tests/data/fasta/wrapped_40.fa
@@ -0,0 +1,9 @@
+>chr1
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
+GGCCGGCCGGCCGGCCGGCCGGCCGGCCGGCCGGCCGGCC
+>chr2
+AAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCC
+GGGGTTTTAAAACCCCGGGGTTTT
+>chr3
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
diff --git a/tests/data/fasta/wrapped_40.fa.fai b/tests/data/fasta/wrapped_40.fa.fai
new file mode 100644
index 00000000..71f045db
--- /dev/null
+++ b/tests/data/fasta/wrapped_40.fa.fai
@@ -0,0 +1,3 @@
+chr1	120	6	40	41
+chr2	64	135	40	41
+chr3	40	207	40	41