Skip to content

Commit

Permalink
Downloaded all missing student profile pics
Browse files Browse the repository at this point in the history
  • Loading branch information
gglin committed Jun 25, 2013
1 parent 417663c commit 9d9d92e
Show file tree
Hide file tree
Showing 281 changed files with 33,955 additions and 6 deletions.
31 changes: 31 additions & 0 deletions lib/concerns/findable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
module Findable
module ClassMethods

def all
all_rows = db.execute "SELECT * FROM students;"
all_rows.collect{|row| new_from_db(row)}
end

def find_by(*args)
args.flatten.each do |arg|
define_singleton_method("find_by_#{arg}") do |value|
rows_found = db.execute "SELECT * FROM #{table_name} WHERE #{arg} = ?", value
new_from_db(rows_found.first) if !rows_found.empty?
end
end
end
# find_by must be called by the class extending it to initialize all find_by_attribute methods

def find(id)
self.find_by_id(id)
end

def where(conditions)
# conditions is a hash - get a string (to be used by SQL WHERE) based on that hash
arg = conditions.to_a.collect{|condition| "#{condition[0].to_s} = :#{condition[0]}"}.join(" AND ")
rows_found = db.execute("SELECT * FROM #{table_name} WHERE #{arg}", conditions)
rows_found.collect{|row| new_from_db(row)} if !rows_found.empty?
end

end
end
6 changes: 0 additions & 6 deletions lib/concerns/persistable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,6 @@ def save
persisted? ? update : insert
end

def attributes
self.class.attributes.keys.collect do |attribute|
self.send(attribute)
end
end

def attributes_for_sql
self.attributes[1..-1]
end
Expand Down
Binary file added lib/models/flatiron.db
Binary file not shown.
168 changes: 168 additions & 0 deletions lib/models/scraper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Columns to add to students table:
# Name
# Profile Pic
# Social media links/ Twitter, LinkedIn, GitHub, Blog(RSS)
# Quote
# About/ Bio, Education, Work
# Coder Cred / Treehouse, Codeschool, Coderwal .reject(Github)

require 'nokogiri'
require 'open-uri'
require 'sqlite3'

require 'pp'
require 'json'

class Scraper

attr_accessor :db, :db_name, :students_html_array, :students

def create_database(db_name)
@db_name = db_name
# Create a new database and drop the students table from the database if it exists
begin
db = SQLite3::Database.new db_name
db.execute("DROP TABLE IF EXISTS students")
rescue SQLite3::Exception => e
puts "Exception occurred"
puts e
ensure
db.close if db
end
end


def scrape_index(index_html)

# scrape different HTML elements
index = Nokogiri::HTML(open(index_html))

student_a_selector = "li.home-blog-post div.blog-title h3 a" # div.big-comment before "a" won't select Matt's profile
names = index.css(student_a_selector).collect{|student| student.content}
urls = index.css(student_a_selector).collect{|student| student.attr("href").downcase}
@students_html_array = urls.reject{|url| url == "#"}
urls = urls.collect{ |url| url.sub("students/","") }

student_img_selector = "li.home-blog-post div.blog-thumb img"
imgs = index.css(student_img_selector).collect{|student| student.attr("src").downcase}
imgs = imgs.collect{ |img| (img[0..3]=="http" ? "" : "/") + img }

student_tagline_selector = "li.home-blog-post p.home-blog-post-meta"
taglines = index.css(student_tagline_selector).collect{|student| student.content}

student_bio_selector = "li.home-blog-post div.excerpt p"
bios = index.css(student_bio_selector).collect{|student| student.content}

# ensure sizes are all same
raise if urls.size != names.size || urls.size != imgs.size || urls.size != taglines.size || urls.size != bios.size

# create array of all data
students_array = []
names.each_with_index do |student, index|
students_array << {:name=>names[index], :url=>urls[index], :img=>imgs[index], :tagline=>taglines[index], :bio=>bios[index]}
end

#convert into hash and store as instance variable
students_array.delete_if { |info| info[:url]=="#" }
@students ||= {}
students_array.each_with_index {|info, index| @students[index+1] = info}
# {1 => {:name=> "Avi Flombaum", :img=> "http://avi.com"}, 2 => {:name=> "Ashley Williams", :img=> "http://ashley.com"}}

# pp @students
puts "\nThe students_html_array looks like this:\n #{students_html_array.inspect}"
puts "There are #{students_html_array.size} elements in the array"

end


# Scrape individual student profiles based on the array created from scraping index.html

def scrape_students(index_html)
# Loop through each student profile URL in @students
@students_html_array.each_with_index do |student_html, index|
begin
puts
student_page = Nokogiri::HTML(open("#{index_html}/#{student_html}"))

# Get student's name
name_css_selector = "h4.ib_main_header"
html_tag_for_name = student_page.css(name_css_selector).first # will return nil if the ib_main_header css selector is not found
# puts html_tag_for_name.class => Nokogiri::XML::Element

# only scrape the rest of page if html_tag_for_name is found (to make sure that only correctly formatted pages are scraped)
if html_tag_for_name
puts "...scraping: #{student_html}"

info = {}
info[:name] = html_tag_for_name.content

prof_pic_selector = "div .student_pic"
info[:prof_pic] = student_page.css(prof_pic_selector).attr("src").to_s

social_media_selector = "div.social-icons a"
info[:twitter] = student_page.css(social_media_selector)[0].attr("href")
info[:linkedin] = student_page.css(social_media_selector)[1].attr("href")
info[:github] = student_page.css(social_media_selector)[2].attr("href")
info[:blog] = student_page.css(social_media_selector)[3].attr("href")

quote_selector = "div.textwidget"
info[:quote] = student_page.css(quote_selector).text

student = {(index+1) => info}
p student

# merge into existing hash
@students.merge!(student) do |id, oldinfo, newinfo|
oldinfo.merge!(newinfo) do |attribute, oldvalue, newvalue|
puts "in index: #{oldvalue}; in profile: #{newvalue}" if oldvalue != newvalue
newvalue
end
end

else
puts "#{student_html} isn't the correct template. Page will not be scraped."
end

rescue OpenURI::HTTPError => e
puts "No profile found at " + student_html
puts e
end
end
end


def insert_into_db(db_name)
@students.each do |id, student|
columns_names = student.keys.join(",").gsub(",", " TEXT, ") + " TEXT"
student_attributes = student.keys.join(",")
student_values = student.keys.to_s.gsub("]", "").gsub("[", "")

db = SQLite3::Database.open db_name
db.execute("CREATE TABLE IF NOT EXISTS students(id INTEGER PRIMARY KEY AUTOINCREMENT,
#{columns_names})"
)

# insert specific student into students table if it doesn't exist
db.execute("DELETE FROM students WHERE name=?", student[:name])
db.execute("INSERT INTO students(#{student_attributes})
VALUES (#{student_values})", student
)
end
puts "\nSuccessfully updated database!"
end

end


db_name = "flatiron.db"
index_html = "http://students.flatironschool.com"

scraper = Scraper.new
scraper.create_database(db_name)

scraper.scrape_index(index_html)
scraper.scrape_students(index_html)
# pp scraper.students

scraper.insert_into_db(db_name)

6 changes: 6 additions & 0 deletions lib/models/student.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def self.attributes
ATTRIBUTES
end

def attributes
self.class.attributes.keys.collect do |attribute|
self.send(attribute)
end
end

def self.attr_accessors
self.attributes.keys.each do |k|
attr_accessor k
Expand Down
1 change: 1 addition & 0 deletions public/CNAME
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
students.flatironschool.com
64 changes: 64 additions & 0 deletions public/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
Deploy on Day One Project

Goal: To add yourself to the student section of the FlatironSchool.com and create your profile page.

Steps:
- Clone the students website to your code directory.
- [email protected]:flatiron-school/002.students.flatironschool.com.git
- git clone [email protected]:flatiron-school/002.students.flatironschool.com.git
- Create a feature branch for your profile add-profile-aviflombaum, feature-studentgithub
- git co -b add-profile-aviflombaum
- Create your profile page within the students directory aviflombaum.htm
- touch students/aviflombaum.html
- Add it, commit, push
- git add .
- git commit -am "Add profile for Avi Flombaum"
- git push
Create a pull request to merge your feature branch, add-profile-aviflombaum to the flatiron-school origin repository
https://github.com/flatiron-school/002.students.flatironschool.com/pull/new/add-profile-aviflombaum

- Squashing commits?

Ping Avi or Tag him in a comment (@aviflombaum) to let him know that your pull request needs to be reviewed
- Admin Merges Pull Request
- Deploy master of main repository
- Our Git Flow
- Anything in master branch is deployable.
- Create feature branches off master.
- Commit locally and regularly push your work to the same named branch on the server.
- When you need help or you think your branch is ready to merge, open a pull request on github.com
- After someone reviews and +1, you can merge that pull request into the master.
- Once a pull request is merged to master, deploy.

Never, ever, do anything in a master branch. It must remain stable.
- Do not merge the upstream develop with your feature branch, rebase your branch on top of the upstream/master to synch your feature branch with the latest master.
- Issue branches should be prefixed with the issue # and description.

We used these steps:

♕ git add .

♕ git commit -m 'messed with profile'

♕ git push upstream add-profile-adamjonas

♕ git fetch upstream

♕ git co master

♕ git pull upstream master

♕ git co add-profile-adamjonas

♕ git rebase master


http://scottchacon.com/2011/08/31/github-flow.html

https://github.com/diaspora/diaspora/wiki/Git-Workflow

http://mettadore.com/analysis/a-simple-git-rebase-workflow-explained/

http://zachholman.com/talk/how-github-uses-github-to-build-github

https://openshift.redhat.com/community/wiki/github-workflow-for-submitting-pull-requests
Loading

0 comments on commit 9d9d92e

Please sign in to comment.