Skip to content

Commit

Permalink
refactory with extending multitone_pinyin
Browse files Browse the repository at this point in the history
  • Loading branch information
tom2cjp committed Nov 16, 2011
1 parent d51f6db commit f5d9269
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 78 deletions.
41 changes: 41 additions & 0 deletions lib/multi_pinyin.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
class MultiPinyin < Pinyin
M_SPLIT_CHAR = "|"

def self.full(value, split_char = nil)
res = etymon_mapping_arr(value, split_char)
cross_product_arr(res).map{|w| full_word(w, split_char) }.join(M_SPLIT_CHAR)
end

def self.abbr(value, split_char = nil)
res = etymon_mapping_arr(value, split_char)
cross_product_arr(res).map{|w| abbr_word(w, split_char) }.join(M_SPLIT_CHAR)
end

def self.abbr_else(value, split_char = nil)
res = etymon_mapping_arr(value, split_char)
cross_product_arr(res).map { |w| abbr_else_word(w, split_char) }.join(M_SPLIT_CHAR)
end

def self.find_etymon(word)
@@dist.select{ |k, v| v.match(word) }.map{ |k, v| k }
end

def self.cross_product_arr(arr)
return arr if arr.length <= 1
arg_str = (1..(arr.length - 1)).map{|num| "arr[#{num}]" }.join(', ')

eval("arr[0].product(#{arg_str})")
end

def self.etymon_mapping_arr(value, split_char)
return [] if value.nil?

result = []
value.clone.split(//).each do |w|
etymon = find_etymon(w) if zh_cn?(w)
result << (etymon || [w])
end

result
end
end
102 changes: 37 additions & 65 deletions lib/pinyin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,85 +5,57 @@
class Pinyin
@@dist = YAML.load_file(File.dirname(__FILE__) + "/../dist.yml")

def self.full(value, split_char = nil, multitone = false)
res = etymon_mapping_arr(value, split_char, multitone)

if multitone
Pinyin.cross_product_arr(res).map{|w| full_word(w, split_char) }.join("|")
else
full_word(res, split_char)
end
def self.full(value, split_char = nil)
res = etymon_mapping_arr(value, split_char)
full_word(res, split_char)
end

def self.abbr(value, split_char = nil, multitone = false)
res = etymon_mapping_arr(value, split_char, multitone)

if multitone
Pinyin.cross_product_arr(res).map{|w| abbr_word(w, split_char) }.join("|")
else
abbr_word(res, split_char)
end
def self.abbr(value, split_char = nil)
res = etymon_mapping_arr(value, split_char)
abbr_word(res, split_char)
end

def self.abbr_else(value, split_char = nil, multitone = false)
res = etymon_mapping_arr(value, split_char, multitone)

if multitone
Pinyin.cross_product_arr(res).map { |w| abbr_else_word(w, split_char) }.join("|")
else
abbr_else_word(res, split_char)
end
def self.abbr_else(value, split_char = nil)
res = etymon_mapping_arr(value, split_char)
abbr_else_word(res, split_char)
end

def self.find_etymon(word, multitone = false)
if multitone
@@dist.select{ |k, v| v.match(word) }.map{ |k, v| k }
else
@@dist.each{ |k, v| return k if v.match(word) }
nil
end
def self.find_etymon(word)
@@dist.each{ |k, v| return k if v.match(word) }
nil
end

def self.cross_product_arr(arr)
return arr if arr.length <= 1
arg_str = (1..(arr.length - 1)).map{|num| "arr[#{num}]" }.join(', ')

eval("arr[0].product(#{arg_str})")
def self.zh_cn?(w)
w.length != 1
end

private
def self.etymon_mapping_arr(value, split_char)
return [] if value.nil?

def self.zh_cn?(w)
w.length != 1
result = []
value.clone.split(//).each do |w|
etymon = find_etymon(w) if zh_cn?(w)
result << (etymon || w)
end

def self.etymon_mapping_arr(value, split_char, multitone)
return [] if value.nil?

result = []
value.clone.split(//).each do |w|
etymon = find_etymon(w, multitone) if zh_cn?(w)
result << (etymon || (multitone ? [w] : w))
end

result
end
result
end

def self.full_word(word, split_char)
Proc.new { word.join(split_char) }.call
end
def self.full_word(word, split_char)
Proc.new { word.join(split_char) }.call
end

def self.abbr_word(word, split_char)
Proc.new { word.map{|i| i[0..0]}.join(split_char) }.call
end
def self.abbr_word(word, split_char)
Proc.new { word.map{|i| i[0..0]}.join(split_char) }.call
end

def self.abbr_else_word(word, split_char)
Proc.new do
i_index = 0
word.map do |w|
i_index += 1
i_index == 1 ? w : w[0..0]
end.join(split_char)
end.call
end
def self.abbr_else_word(word, split_char)
Proc.new do
i_index = 0
word.map do |w|
i_index += 1
i_index == 1 ? w : w[0..0]
end.join(split_char)
end.call
end
end
24 changes: 24 additions & 0 deletions test/multi_pinyin_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
require 'test_helper'

class MultiPinyinTest < Test::Unit::TestCase
def test_full
assert_equal "yedongkai|xiedongkai", MultiPinyin.full("叶冬开")
assert_equal "yedongkaiabcyedong|yedongkaiabcxiedong|xiedongkaiabcyedong|xiedongkaiabcxiedong", MultiPinyin.full("叶冬开abc叶冬", nil)
end

def test_abbr
assert_equal "gjp|hjp", MultiPinyin.abbr("红靖鹏")
end

def test_abbr_else
assert_equal "gongyjp|gongxjp|hongyjp|hongxjp", MultiPinyin.abbr_else("红叶靖鹏")
end

def test_find_etymon
assert_equal ["ye", "xie"], MultiPinyin.find_etymon("叶")
end

def test_cross_product_arr
assert_equal [["a", 1], ["a", 2], ["a", 3], ["b", 1], ["b", 2], ["b", 3]], MultiPinyin.cross_product_arr([['a', 'b'], [1, 2, 3]])
end
end
13 changes: 0 additions & 13 deletions test/pinyin_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,25 @@ def test_full
assert_equal("yedongkai", Pinyin.full("叶冬开"))
assert_equal("yedongkaiyedong", Pinyin.full("叶冬开叶冬"))
assert_equal("yedongkaiabcyedong", Pinyin.full("叶冬开abc叶冬"))

assert_equal "yedongkai|xiedongkai", Pinyin.full("叶冬开", nil, true)
assert_equal "yedongkaiabcyedong|yedongkaiabcxiedong|xiedongkaiabcyedong|xiedongkaiabcxiedong", Pinyin.full("叶冬开abc叶冬", nil, true)
end

def test_abbr
assert_equal("cjp", Pinyin.abbr("曹靖鹏"))
assert_equal("cjpcj", Pinyin.abbr("曹靖鹏曹靖"))
assert_equal("cjpabccj", Pinyin.abbr("曹靖鹏abc曹靖"))

assert_equal("gjp|hjp", Pinyin.abbr("红靖鹏", nil, true))
end

def test_abbr_else
assert_equal("caojp", Pinyin.abbr_else("曹靖鹏"))
assert_equal("caojpcj", Pinyin.abbr_else("曹靖鹏曹靖"))
assert_equal("caojpabccj", Pinyin.abbr_else("曹靖鹏abc曹靖"))

assert_equal("gongyjp|gongxjp|hongyjp|hongxjp", Pinyin.abbr_else("红叶靖鹏", nil, true))
end

def test_find_etymon
assert_equal("ye", Pinyin.find_etymon("叶"))
assert_equal(nil, Pinyin.find_etymon("a"))

assert_equal(["ye", "xie"], Pinyin.find_etymon("叶", true))
assert_equal([], Pinyin.find_etymon("a", true))
end

def test_cross_product_arr
assert_equal [["a", 1], ["a", 2], ["a", 3], ["b", 1], ["b", 2], ["b", 3]], Pinyin.cross_product_arr([['a', 'b'], [1, 2, 3]])
end

def test_size
assert_equal 3, "叶冬开".split(//).size
Expand Down
1 change: 1 addition & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
require 'rubygems'
require 'test/unit'
require 'pinyin'
require 'multi_pinyin'

0 comments on commit f5d9269

Please sign in to comment.