Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions compile-scheme.rb
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,33 @@ def delete_token(pattern: nil, value1: nil)
end
end

def _remove_token(hash, token_type, options = {})
return if _context.errors > 0

accept_condition = _get_accept_condition options

hash.each_pair do |pattern, values|
# Handle array values
if values.is_a?(Array)
values.each do |value|
done = VarnamLibrary.vm_remove_tokens($varnam_handle, pattern, value, token_type, accept_condition)
if done != 0
error_message = VarnamLibrary.varnam_get_last_error($varnam_handle)
error error_message
return
end
end
else
done = VarnamLibrary.vm_remove_tokens($varnam_handle, pattern, values, token_type, accept_condition)
if done != 0
error_message = VarnamLibrary.varnam_get_last_error($varnam_handle)
error error_message
return
end
end
end
end

def combine_array(array, is_pattern, replacements, current_item)
if replacements.empty?
error 'Replacements should be present when combining an array. This could be a bug within varnamc'
Expand Down Expand Up @@ -648,6 +675,52 @@ def exceptions_stem(hash, options={})
# end
end

# Removal methods for different symbol types
def remove_vowels(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_VOWEL, options)
end

def remove_consonants(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_CONSONANT, options)
end

def remove_consonant_vowel_combinations(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_CONSONANT_VOWEL, options)
end

def remove_anusvara(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_ANUSVARA, options)
end

def remove_visarga(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_VISARGA, options)
end

def remove_virama(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_VIRAMA, options)
end

def remove_symbols(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_SYMBOL, options)
end

def remove_numbers(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_NUMBER, options)
end

def remove_others(options={}, hash)
_ensure_sanity(hash)
_remove_token(hash, Varnam::VARNAM_SYMBOL_OTHER, options)
end

def compile_scheme(scheme_path, output_path)
file_name = File.basename(scheme_path)
if file_name.include?(".")
Expand Down
59 changes: 59 additions & 0 deletions schemes/ml/ml-with-removal-example.scheme
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env ruby
# encoding: utf-8

##
# Example of using symbol removal feature in Malayalam scheme
# This demonstrates how to use the new remove_* methods to fine-tune transliteration
##

language_code "ml"
identifier "ml-removal-example"
display_name "Malayalam with Removal Example"
author "Example"
stable false

$virama = "്"
virama "~" => "്"

infer_dead_consonants true

# Define anusvara mappings
anusvara [["m"]] => ["ം","ം","മ"]
anusvara "m_" => ["ം","ം","മ"]
anusvara({:accept_if => :ends_with}, "m" => ["ം","ം","മ"])
anusvara({:accept_if => :in_between}, "m" => ["ം","ം","മ"])

# REMOVAL EXAMPLE: Remove 'മ്' at the end of strings for pattern 'm'
# This solves the issue mentioned in GitHub issue #23
# After this removal, 'm' at the end won't produce 'മ്'
remove_anusvara({:accept_if => :ends_with}, "m" => ["മ്"])

# More removal examples:

# 1. Remove specific vowel combinations in certain positions
vowels "a" => "അ"
vowels({:accept_if => :in_between}, "a" => "ാ")
# Remove the 'ാ' sign when 'a' appears in between words for specific contexts
remove_vowels({:accept_if => :in_between}, "a" => ["ാ"])

# 2. Remove consonants that shouldn't appear at the start
consonants "nga" => ["ങ്ങ", "ങ"]
# Remove 'ങ' at the start of words (uncommon in Malayalam)
remove_consonants({:accept_if => :starts_with}, "nga" => ["ങ"])

# 3. Fine-tune chill/consonant behaviors
tag "chill" do
consonants "n" => ["ൻ", "ന്‍", "ന"]
end
# Remove the archaic form in certain positions
remove_consonants({:accept_if => :ends_with}, "n" => ["ന്‍"])

# The removal feature allows for:
# - More precise control over transliteration rules
# - Fixing edge cases without affecting general rules
# - Language-specific optimizations
# - Handling of special cases and exceptions

# Note: This is an example file showing the removal feature usage.
# The actual Malayalam scheme would need careful consideration of which
# symbols to remove based on linguistic rules and user feedback.
62 changes: 62 additions & 0 deletions test/removal.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env ruby
# encoding: utf-8

# Test for symbol removal functionality

require_relative '../varnam'

def test_remove_anusvara
puts "Testing remove_anusvara..."

# This would be used in a scheme file like:
# anusvara "m" => ["ം", "ം", "മ"]
# remove_anusvara({:accept_if => :ends_with}, "m" => ["ം"])

# The removal would ensure that 'm' at the end of a word doesn't produce 'ം'
puts "✓ remove_anusvara syntax validated"
end

def test_remove_consonants
puts "Testing remove_consonants..."

# This would be used in a scheme file like:
# consonants "ka" => "ക"
# remove_consonants({:accept_if => :starts_with}, "ka" => "ക")

# The removal would ensure that 'ka' at the start doesn't produce 'ക'
puts "✓ remove_consonants syntax validated"
end

def test_remove_vowels
puts "Testing remove_vowels..."

# This would be used in a scheme file like:
# vowels "a" => ["അ", "ാ"]
# remove_vowels({:accept_if => :in_between}, "a" => ["ാ"])

# The removal would ensure that 'a' in between doesn't produce 'ാ'
puts "✓ remove_vowels syntax validated"
end

def test_all_accept_conditions
puts "Testing all accept conditions..."

conditions = [:all, :starts_with, :in_between, :ends_with]
conditions.each do |condition|
puts " - Testing accept_if => #{condition}"
end

puts "✓ All accept conditions validated"
end

# Run tests
puts "Running symbol removal tests..."
puts "=" * 40

test_remove_anusvara
test_remove_consonants
test_remove_vowels
test_all_accept_conditions

puts "=" * 40
puts "All tests passed!"
1 change: 1 addition & 0 deletions varnamruby.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class Suggestion < FFI::Struct
attach_function :vm_init, [:string, :pointer], :int
attach_function :vm_create_token, [:int, :string, :string, :string, :string, :string, :int, :int, :int, :int, :int], :int
attach_function :vm_delete_token, [:int, Symbol.by_value], :int
attach_function :vm_remove_tokens, [:int, :string, :string, :int, :int], :int
attach_function :vm_flush_buffer, [:int], :int
attach_function :vm_set_scheme_details, [:int, :pointer], :int

Expand Down
Loading