-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path2mapper.rb
executable file
·65 lines (52 loc) · 1.43 KB
/
2mapper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/ruby
require 'rubygems'
require 'aws/s3'
require 'bigdecimal'
require "bigdecimal/math"
include BigMath
AWS.config(:ssl_verify_peer => false)
s3 = AWS::S3.new(
:access_key_id => 'AKIAI6WMFH3GYHEXOKGQ',
:secret_access_key => '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')
bucket = s3.buckets['/cs7960']
#Initialize mapper data
mybucket = s3.buckets['srikanthraju']
all_words = mybucket.objects['all_words']
fw = Hash.new
all_words.read.each_line do |s|
if s =~ /(.*)\t([0-9]*)/
# puts $1 + $2
fw[$1] = $2.to_i
end
end
#bucket.objects.with_prefix('NSFAbstractsSmall').each do |object|
#puts object.key
#end
STDIN.each_line do |line|
line.chomp!
word_count = {}
line.split(",").each do |fname|
file = bucket.objects.with_prefix('NSFAbstractsSmall')[fname]
if file == nil
next
end
process_line = file.read
process_line.split(/[^a-zA-Z0-9]+/).each do |word|
next if word.downcase =~ /[0-9].*/
word_count[word.downcase] ||= 0
word_count[word.downcase] += 1
end
end
total_words = 0
word_count.each do |k,v|
total_words += v
end
word_count.each do |k,v|
#puts v
#puts total_words
#puts fw[k]
p = Math.log((BigDecimal.new(v.to_s)/BigDecimal.new(total_words.to_s))/(BigDecimal.new(fw[k].to_s)/BigDecimal.new('4418825')))
p = p * (BigDecimal.new(v.to_s)/BigDecimal.new(total_words.to_s));
puts "DoubleValueSum:#{line}\t#{p.to_f.to_s}"
end
end