diff --git a/lib/multi_pinyin.rb b/lib/multi_pinyin.rb new file mode 100644 index 0000000..fcdd0e2 --- /dev/null +++ b/lib/multi_pinyin.rb @@ -0,0 +1,41 @@ +class MultiPinyin < Pinyin + M_SPLIT_CHAR = "|" + + def self.full(value, split_char = nil) + res = etymon_mapping_arr(value, split_char) + cross_product_arr(res).map{|w| full_word(w, split_char) }.join(M_SPLIT_CHAR) + end + + def self.abbr(value, split_char = nil) + res = etymon_mapping_arr(value, split_char) + cross_product_arr(res).map{|w| abbr_word(w, split_char) }.join(M_SPLIT_CHAR) + end + + def self.abbr_else(value, split_char = nil) + res = etymon_mapping_arr(value, split_char) + cross_product_arr(res).map { |w| abbr_else_word(w, split_char) }.join(M_SPLIT_CHAR) + end + + def self.find_etymon(word) + @@dist.select{ |k, v| v.match(word) }.map{ |k, v| k } + end + + def self.cross_product_arr(arr) + return arr if arr.length <= 1 + arg_str = (1..(arr.length - 1)).map{|num| "arr[#{num}]" }.join(', ') + + eval("arr[0].product(#{arg_str})") + end + + def self.etymon_mapping_arr(value, split_char) + return [] if value.nil? + + result = [] + value.clone.split(//).each do |w| + etymon = find_etymon(w) if zh_cn?(w) + result << (etymon || [w]) + end + + result + end +end diff --git a/lib/pinyin.rb b/lib/pinyin.rb index 9b69623..0f55790 100644 --- a/lib/pinyin.rb +++ b/lib/pinyin.rb @@ -5,85 +5,57 @@ class Pinyin @@dist = YAML.load_file(File.dirname(__FILE__) + "/../dist.yml") - def self.full(value, split_char = nil, multitone = false) - res = etymon_mapping_arr(value, split_char, multitone) - - if multitone - Pinyin.cross_product_arr(res).map{|w| full_word(w, split_char) }.join("|") - else - full_word(res, split_char) - end + def self.full(value, split_char = nil) + res = etymon_mapping_arr(value, split_char) + full_word(res, split_char) end - def self.abbr(value, split_char = nil, multitone = false) - res = etymon_mapping_arr(value, split_char, multitone) - - if multitone - Pinyin.cross_product_arr(res).map{|w| abbr_word(w, split_char) }.join("|") - else - abbr_word(res, split_char) - end + def self.abbr(value, split_char = nil) + res = etymon_mapping_arr(value, split_char) + abbr_word(res, split_char) end - def self.abbr_else(value, split_char = nil, multitone = false) - res = etymon_mapping_arr(value, split_char, multitone) - - if multitone - Pinyin.cross_product_arr(res).map { |w| abbr_else_word(w, split_char) }.join("|") - else - abbr_else_word(res, split_char) - end + def self.abbr_else(value, split_char = nil) + res = etymon_mapping_arr(value, split_char) + abbr_else_word(res, split_char) end - def self.find_etymon(word, multitone = false) - if multitone - @@dist.select{ |k, v| v.match(word) }.map{ |k, v| k } - else - @@dist.each{ |k, v| return k if v.match(word) } - nil - end + def self.find_etymon(word) + @@dist.each{ |k, v| return k if v.match(word) } + nil end - def self.cross_product_arr(arr) - return arr if arr.length <= 1 - arg_str = (1..(arr.length - 1)).map{|num| "arr[#{num}]" }.join(', ') - - eval("arr[0].product(#{arg_str})") + def self.zh_cn?(w) + w.length != 1 end - private + def self.etymon_mapping_arr(value, split_char) + return [] if value.nil? - def self.zh_cn?(w) - w.length != 1 + result = [] + value.clone.split(//).each do |w| + etymon = find_etymon(w) if zh_cn?(w) + result << (etymon || w) end - def self.etymon_mapping_arr(value, split_char, multitone) - return [] if value.nil? - - result = [] - value.clone.split(//).each do |w| - etymon = find_etymon(w, multitone) if zh_cn?(w) - result << (etymon || (multitone ? [w] : w)) - end - - result - end + result + end - def self.full_word(word, split_char) - Proc.new { word.join(split_char) }.call - end + def self.full_word(word, split_char) + Proc.new { word.join(split_char) }.call + end - def self.abbr_word(word, split_char) - Proc.new { word.map{|i| i[0..0]}.join(split_char) }.call - end + def self.abbr_word(word, split_char) + Proc.new { word.map{|i| i[0..0]}.join(split_char) }.call + end - def self.abbr_else_word(word, split_char) - Proc.new do - i_index = 0 - word.map do |w| - i_index += 1 - i_index == 1 ? w : w[0..0] - end.join(split_char) - end.call - end + def self.abbr_else_word(word, split_char) + Proc.new do + i_index = 0 + word.map do |w| + i_index += 1 + i_index == 1 ? w : w[0..0] + end.join(split_char) + end.call + end end diff --git a/test/multi_pinyin_test.rb b/test/multi_pinyin_test.rb new file mode 100644 index 0000000..7185989 --- /dev/null +++ b/test/multi_pinyin_test.rb @@ -0,0 +1,24 @@ +require 'test_helper' + +class MultiPinyinTest < Test::Unit::TestCase + def test_full + assert_equal "yedongkai|xiedongkai", MultiPinyin.full("叶冬开") + assert_equal "yedongkaiabcyedong|yedongkaiabcxiedong|xiedongkaiabcyedong|xiedongkaiabcxiedong", MultiPinyin.full("叶冬开abc叶冬", nil) + end + + def test_abbr + assert_equal "gjp|hjp", MultiPinyin.abbr("红靖鹏") + end + + def test_abbr_else + assert_equal "gongyjp|gongxjp|hongyjp|hongxjp", MultiPinyin.abbr_else("红叶靖鹏") + end + + def test_find_etymon + assert_equal ["ye", "xie"], MultiPinyin.find_etymon("叶") + end + + def test_cross_product_arr + assert_equal [["a", 1], ["a", 2], ["a", 3], ["b", 1], ["b", 2], ["b", 3]], MultiPinyin.cross_product_arr([['a', 'b'], [1, 2, 3]]) + end +end diff --git a/test/pinyin_test.rb b/test/pinyin_test.rb index 39298f9..b0e83e6 100644 --- a/test/pinyin_test.rb +++ b/test/pinyin_test.rb @@ -6,38 +6,25 @@ def test_full assert_equal("yedongkai", Pinyin.full("叶冬开")) assert_equal("yedongkaiyedong", Pinyin.full("叶冬开叶冬")) assert_equal("yedongkaiabcyedong", Pinyin.full("叶冬开abc叶冬")) - - assert_equal "yedongkai|xiedongkai", Pinyin.full("叶冬开", nil, true) - assert_equal "yedongkaiabcyedong|yedongkaiabcxiedong|xiedongkaiabcyedong|xiedongkaiabcxiedong", Pinyin.full("叶冬开abc叶冬", nil, true) end def test_abbr assert_equal("cjp", Pinyin.abbr("曹靖鹏")) assert_equal("cjpcj", Pinyin.abbr("曹靖鹏曹靖")) assert_equal("cjpabccj", Pinyin.abbr("曹靖鹏abc曹靖")) - - assert_equal("gjp|hjp", Pinyin.abbr("红靖鹏", nil, true)) end def test_abbr_else assert_equal("caojp", Pinyin.abbr_else("曹靖鹏")) assert_equal("caojpcj", Pinyin.abbr_else("曹靖鹏曹靖")) assert_equal("caojpabccj", Pinyin.abbr_else("曹靖鹏abc曹靖")) - - assert_equal("gongyjp|gongxjp|hongyjp|hongxjp", Pinyin.abbr_else("红叶靖鹏", nil, true)) end def test_find_etymon assert_equal("ye", Pinyin.find_etymon("叶")) assert_equal(nil, Pinyin.find_etymon("a")) - - assert_equal(["ye", "xie"], Pinyin.find_etymon("叶", true)) - assert_equal([], Pinyin.find_etymon("a", true)) end - def test_cross_product_arr - assert_equal [["a", 1], ["a", 2], ["a", 3], ["b", 1], ["b", 2], ["b", 3]], Pinyin.cross_product_arr([['a', 'b'], [1, 2, 3]]) - end def test_size assert_equal 3, "叶冬开".split(//).size diff --git a/test/test_helper.rb b/test/test_helper.rb index bda7b5c..4219969 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,3 +1,4 @@ require 'rubygems' require 'test/unit' require 'pinyin' +require 'multi_pinyin'