change the "CS" to "cs" #25

akikuno · Nov 23, 2023 · 20dec8d · 20dec8d
1 parent 11ff1a5
commit 20dec8d
Show file tree

Hide file tree

Showing 9 changed files with 50 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -7,25 +7,25 @@
 
 # cstag
 
-`cstag` is a Python library tailored for the manipulation and handling of [minimap2's CS tags](https://github.com/lh3/minimap2#cs).
+`cstag` is a Python library tailored for for manipulating and visualizing [minimap2's cs tags](https://github.com/lh3/minimap2#cs).
 
 
 ## 🌟 Features
 
-- `cstag.call()`: Generate a CS tag
-- `cstag.shorten()`: Convert a CS tag from its long to short format
-- `cstag.lengthen()`: Convert a CS tag from its short to long format
-- `cstag.consensus()`: Create a consensus CS tag from multiple CS tags
-- `cstag.mask()`: Mask low-quality bases within a CS tag
-- `cstag.split()`: Break down a CS tag into its constituent parts
-- `cstag.revcomp()`: Convert a CS tag to its reverse complement
+- `cstag.call()`: Generate a cs tag
+- `cstag.shorten()`: Convert a cs tag from its long to short format
+- `cstag.lengthen()`: Convert a cs tag from its short to long format
+- `cstag.consensus()`: Create a consensus cs tag from multiple cs tags
+- `cstag.mask()`: Mask low-quality bases within a cs tag
+- `cstag.split()`: Break down a cs tag into its constituent parts
+- `cstag.revcomp()`: Convert a cs tag to its reverse complement
 - `cstag.to_sequence()`: Reconstruct a reference subsequence from the alignment
 - `cstag.to_vcf()`: Generate a VCF representation
 - `cstag.to_html()`: Generate an HTML representation
 - `cstag.to_pdf()`: Produce a PDF file
 
 For comprehensive documentation, please visit [our docs](https://akikuno.github.io/cstag/cstag/).  
-To add CS tags to SAM/BAM files, check out [`cstag-cli`](https://github.com/akikuno/cstag-cli).  
+To add cs tags to SAM/BAM files, check out [`cstag-cli`](https://github.com/akikuno/cstag-cli).  
 
 
 ## 🛠 Installation
@@ -44,7 +44,7 @@ conda install -c bioconda cstag
 
 ## 💡 Usage
 
-### Generating CS Tags
+### Generating cs Tags
 
 ```python
 import cstag
@@ -60,19 +60,19 @@ print(cstag.call(cigar, md, seq, long=True))
 # =AC*ag=TACGT-ag=ACGT+ac~nn3nn=G
 ```
 
-### Shortening or Lengthening CS Tags
+### Shortening or Lengthening cs Tags
 
 ```python
 import cstag
 
-# Convert a CS tag from long to short
+# Convert a cs tag from long to short
 cs_tag = "=ACGT*ag=CGT"
 
 print(cstag.shorten(cs_tag))
 # :4*ag:3
 
 
-# Convert a CS tag from short to long
+# Convert a cs tag from short to long
 cs_tag = ":4*ag:3"
 cigar = "8M"
 seq = "ACGTACGT"
@@ -106,7 +106,7 @@ print(cstag.mask(cs_tag, cigar, qual, phred_threshold))
 # =ACNN*an+ng-cc=T
 ```
 
-### Splitting a CS Tag
+### Splitting a cs Tag
 
 ```python
 import cstag
@@ -116,7 +116,7 @@ print(cstag.split(cs_tag))
 # ['=ACGT', '*ac', '+gg', '-cc', '=T']
 ```
 
-### Reverse Complement of a CS Tag
+### Reverse Complement of a cs Tag
 
 ```python
 import cstag
@@ -152,7 +152,7 @@ chr1	5	.	C	CTT	.	.	.
 """
 ```
 
-The multiple CS tags enable reporting of the variant allele frequency (VAF).
+The multiple cs tags enable reporting of the variant allele frequency (VAF).
 
 ```python
 import cstag
@@ -186,7 +186,7 @@ Path("report.html").write_text(cs_tag_html)
 # Output "report.html"
 ```
 
-You can visualize mutations indicated by the CS tag using the generated `report.html` file as shown below:
+You can visualize mutations indicated by the cs tag using the generated `report.html` file as shown below:
 
 <img width="511" alt="image" src="https://user-images.githubusercontent.com/15861316/265405607-a3cc1b76-f6a2-441d-b282-6f2dc06fc03d.png">
 

diff --git a/src/cstag/call.py b/src/cstag/call.py
@@ -74,8 +74,10 @@ def trim_clips(cigar: str, seq: str) -> tuple[str, str]:
 
 
 ###########################################################
-# Generate CS long
+# Generate cs tag in long format
 ###########################################################
+
+
 def expand_cigar_operations(cigar: str) -> list[str]:
     parsed_cigar = parse_cigar(cigar)
     expanded_list = []

diff --git a/src/cstag/consensus.py b/src/cstag/consensus.py
@@ -25,20 +25,20 @@ def expand_deletion_tags(tags_combined: list[str]) -> list[str]:
 
 def split_cs_tags(cs_tags: list[str]) -> list[list[str]]:
     """
-    Split and process each CS tag in cs_tags.
+    Split and process each cs tag in cs_tags.
 
     Args:
-        cs_tags (list[str]): list of CS tags in the long format.
+        cs_tags (list[str]): list of cs tags in the long format.
 
     Returns:
-        list[list[str]]: list of processed CS tags.
+        list[list[str]]: list of processed cs tags.
     """
     cs_tags_splitted = []
     for cs_tag in cs_tags:
         # Remove the prefix "cs:Z:" if present
         cs_tag = cs_tag.replace("cs:Z:", "")
 
-        # Split the CS tag using special symbols (-, *, ~, =)
+        # Split the cs tag using special symbols (-, *, ~, =)
         # insertion symbol (+) is ignored because it is not observed in reference sequence
         tags_splitted = re.split(r"([-*~=])", cs_tag)[1:]
         # Combine the symbol with the corresponding sequence
@@ -70,7 +70,7 @@ def normalize_read_lengths(cs_tags: list[str], positions: list[int]) -> list[lis
     Normalize the lengths of each read in cs_tags based on their starts positions. If the length is insufficient, fill in with `None`.
 
     Args:
-        cs_tags (list[str]): list of CS tags.
+        cs_tags (list[str]): list of cs tags.
         positions (list[int]): Starting positions of each read.
 
     Returns:
@@ -109,7 +109,7 @@ def get_consensus(cs_tags: list[list[str]]) -> str:
     for cs in zip(*cs_tags):
         # Remove the None that is compensating for the insufficient lead length.
         cs = [c for c in cs if c]
-        # Get the most common CS tag(s)
+        # Get the most common cs tag(s)
         most_common_tags = Counter(cs).most_common()
 
         # If there's a unique most common tag, return it
@@ -134,13 +134,13 @@ def get_consensus(cs_tags: list[list[str]]) -> str:
 
 
 def consensus(cs_tags: list[str], positions: list[int], prefix: bool = False) -> str:
-    """generate consensus of CS tags
+    """generate consensus of cs tags
     Args:
-        cs_tags (list): CS tags in the **long** format
+        cs_tags (list): cs tags in the **long** format
         positions (list): 1-based leftmost mapping position (4th column in SAM file)
-        prefix (bool, optional): Whether to add the prefix 'cs:Z:' to the CS tag. Defaults to False
+        prefix (bool, optional): Whether to add the prefix 'cs:Z:' to the cs tag. Defaults to False
     Return:
-        str: a consensus of CS tag in the **long** format
+        str: a consensus of cs tag in the **long** format
     Example:
         >>> import cstag
         >>> cs_tags = ["=ACGT", "=AC*gt=T", "=C*gt=T", "=C*gt=T", "=ACT+ccc=T"]

diff --git a/src/cstag/to_html.py b/src/cstag/to_html.py
@@ -83,7 +83,7 @@
 
 
 def append_mark_to_n(cs_tag: str) -> str:
-    """Process each CS tag by adding specific markers `@` to `N`."""
+    """Process each cs tag by adding specific markers `@` to `N`."""
 
     def append_mark(cs: str) -> str:
         if cs.startswith("N"):
@@ -138,7 +138,7 @@ def process_cs_tag(cs_tag: str) -> str:
 def to_html(cs_tag: str, description: str = "") -> str:
     """Output HTML string showing a sequence with mutations colored
     Args:
-        cs_tag (str): CS tag in the **long** format
+        cs_tag (str): cs tag in the **long** format
         description (str): (optional) header information in the output string
     Return:
         HTML string

diff --git a/src/cstag/to_pdf.py b/src/cstag/to_pdf.py
@@ -7,15 +7,15 @@
 
 def to_pdf(cs_tag: str, description: str, path_out: str | Path) -> None:
     """
-    Convert a CS tag and its description to a PDF file.
+    Convert a cs tag and its description to a PDF file.
 
-    This function takes a CS (custom string) tag and its description, converts
+    This function takes a cs (custom string) tag and its description, converts
     it to HTML using the `to_html` function, and then writes it to a PDF file
     using WeasyPrint.
 
     Args:
-        cs_tag (str): The CS tag to be converted.
-        description (str): The description associated with the CS tag.
+        cs_tag (str): The cs tag to be converted.
+        description (str): The description associated with the cs tag.
         path_out (str | Path): The path where the output PDF file will be saved.
 
     Returns:

diff --git a/src/cstag/to_sequence.py b/src/cstag/to_sequence.py
@@ -8,10 +8,10 @@ def to_sequence(cs_tag: str) -> str:
     """Reconstruct the reference subsequence in the alignment
 
     Args:
-        cs_tag (str): CS tag in the **long** format
+        cs_tag (str): cs tag in the **long** format
 
     Returns:
-        str: The sequence string derived from the CS tag.
+        str: The sequence string derived from the cs tag.
 
     Example:
         >>> import cstag

diff --git a/src/cstag/to_vcf.py b/src/cstag/to_vcf.py
@@ -96,7 +96,7 @@ def get_variant_annotations(cs_tag_split: list[str], position: int) -> list[Vcf]
 
 
 ###########################################################
-# Format the CS tags
+# Format the cs tags
 ###########################################################
 
 
@@ -146,7 +146,7 @@ def format_cs_tags(cs_tags: list[str], chroms: list[str] | list[int], positions:
 
 
 def group_by_chrom(cs_tags_formatted: list[tuple]) -> dict[str, tuple]:
-    """Group CS tags by chromosomes"""
+    """Group cs tags by chromosomes"""
     cs_tags_grouped = defaultdict(list)
     for cs in cs_tags_formatted:
         cs_tags_grouped[cs.chrom].append(
@@ -234,7 +234,7 @@ def add_vcf_fields(
 
 
 ###########################################################
-# Process CS tag (One)
+# Process cs tag (One)
 ###########################################################
 
 
@@ -259,7 +259,7 @@ def process_cs_tag(cs_tag: str, chrom: str | int, pos: int) -> str:
 
 
 ###########################################################
-# Process CS tags (Many)
+# Process cs tags (Many)
 ###########################################################
 
 
@@ -319,10 +319,10 @@ def process_cs_tags(cs_tags: list[str], chroms: list[str], positions: list[int])
 
 def to_vcf(cs_tags: str | list[str], chroms: str | int | list[str] | list[int], positions: int | list[int]) -> str:
     """
-    Convert CS tag(s) to VCF (Variant Call Format) string.
+    Convert cs tag(s) to VCF (Variant Call Format) string.
 
     Args:
-        cs_tag (str | list[str]): The CS tag representing the sequence alignment.
+        cs_tag (str | list[str]): The cs tag representing the sequence alignment.
         chrom (str | list[str]): The chromosome name.
         pos (int | list[int]): The starting position for the sequence.
 

diff --git a/src/cstag/utils/validator.py b/src/cstag/utils/validator.py
@@ -9,17 +9,17 @@ def validate_cs_tag(cs_tag: str) -> None:
     )
 
     if not pattern.fullmatch(cs_tag.replace("cs:Z:", "")):
-        raise ValueError(f"Invalid CS tag: {cs_tag}")
+        raise ValueError(f"Invalid cs tag: {cs_tag}")
 
 
 def validate_short_format(cs_tag: str) -> None:
     if re.search(r"=[ACGTN]+", cs_tag):
-        raise ValueError("CS tag must be in short format")
+        raise ValueError("cs tag must be in short format")
 
 
 def validate_long_format(cs_tag: str) -> None:
     if re.search(r":[0-9]+", cs_tag):
-        raise ValueError("CS tag must be in long format")
+        raise ValueError("cs tag must be in long format")
 
 
 def validate_threshold(threshold: int) -> None:

diff --git a/tests/test_to_vcf.py b/tests/test_to_vcf.py
@@ -79,7 +79,7 @@ def test_get_variant_annotations():
 
 
 ###########################################################
-# Format the CS tags
+# Format the cs tags
 ###########################################################
 
 
@@ -202,7 +202,7 @@ def test_add_vcf_fields():
 
 
 ###########################################################
-# process_cs_tag: Single CS tag
+# process_cs_tag: Single cs tag
 ###########################################################
 
 
@@ -227,7 +227,7 @@ def test_process_cs_tag():
 
 
 ###########################################################
-# process_cs_tags: Multuple CS tags
+# process_cs_tags: Multuple cs tags
 ###########################################################