Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add #content and #content_csv #7

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions lib/ckan/resource.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require 'csv'
require 'open-uri'
require 'time'

module CKAN
Expand Down Expand Up @@ -145,6 +147,33 @@ def post_body_for_resource

post_body.join
end

# Gets the content of this resource as a CSV::Table
# options defaults to the same options as CSV.table
# add an :encoding option to refer to a different source encoding
def content_csv(options={headers: true, converters: :all, header_converters: :symbol})
CSV.parse content_sanitized(options[:encoding]), options
end

# Gets the content of this resource, from the internet if appropriate
def content(encoding=nil)
mode = encoding ? "r:#{encoding}" : "r"
@content || open(url_safe, mode).read
end

# Gets the content sanitized to remove any non-UTF-8 characters
def content_sanitized(source_encoding=nil)
content(source_encoding).encode('UTF-8', invalid: :replace, undef: :replace)
end

# Gets the URL of the resource safely - some CKAN APIs return this
# containing invalid characters such as spaces.
# If URI thinks this is an invalid URI, escape it
def url_safe
URI.parse(url).to_s
rescue URI::InvalidURIError
URI.escape url
end

=begin
{
Expand Down
71 changes: 70 additions & 1 deletion spec/ckan/resource_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
end

subject do
CKAN::Resource.new(url: 'http://foo.bar', format: 'text/csv', description: 'Fubar', hash: '0xDEADBEEF')
CKAN::Resource.new(url: 'http://foo.bar/', format: 'text/csv', description: 'Fubar', hash: '0xDEADBEEF')
end

it "should return a Resource object" do
Expand Down Expand Up @@ -77,4 +77,73 @@
r = CKAN::Resource.new(name: 'foo')
r.hash_of_metadata_at_index()[:'resources__0__name'].should == 'foo'
end

describe '#url_safe' do
it 'should escape URLs containing spaces' do
subject.stub(:url) {'http://localhost/foo bar baz.csv'}
subject.url_safe.should eq('http://localhost/foo%20bar%20baz.csv')
end
it 'should not escape URLs that are already escaped' do
subject.stub(:url) {'http://localhost/foo%20bar%20baz.csv'}
subject.url_safe.should eq('http://localhost/foo%20bar%20baz.csv')
end
end

context 'when resource URL points to a CSV' do
before(:each) do
VCR.turn_off!
stub_request(:get, subject.url).to_return({
status: 200,
headers: {'Content-Type' => 'text/csv'},
body: "foo,bar,baz\nWibble,Wobble,Woo\n"
})
end
describe '#content' do
context 'when @content is unset' do
it 'should return the content from the resource' do
subject.content.should eq("foo,bar,baz\nWibble,Wobble,Woo\n")
end
end
context 'when @content is set' do
before(:each) { subject.content = "foo,bar,baz\nOne,Two,Three\n" }
it 'should return @content' do
subject.content.should eq("foo,bar,baz\nOne,Two,Three\n")
end
end
end
describe '#content_csv' do
context 'when @content is unset' do
it 'should return a CSV::Table' do
subject.content_csv.should be_a(CSV::Table)
end
it 'should return the content from the resource' do
subject.content_csv.first[:foo].should eq('Wibble')
end
it 'can be called with overridden options' do
subject.content_csv(headers: false).first.first.should eq('foo')
end
context 'iso-8859-1 data' do
before(:each) do
stub_request(:get, subject.url).to_return({
status: 200,
headers: {'Content-Type' => 'text/csv'},
body: "foo,bar,baz\nWibbl\xe9,Wobbl\xe9,Woo\xe9\n"
})
end
it 'can get content in a different encoding' do
data = subject.content_csv(headers: true, encoding: 'iso-8859-1')
data.first['foo'].should eq("Wibblé")
data.first['baz'].should eq("Wooé")
end
end
end
context 'when @content is set' do
before(:each) { subject.content = "foo,bar,baz\nOne,Two,Three\n" }
it 'should return @content' do
subject.content_csv.first[:foo].should eq('One')
end
end
end
end

end