forked from ashleygwilliams/student-sinatra
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Downloaded all missing student profile pics
- Loading branch information
Showing
281 changed files
with
33,955 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
module Findable | ||
module ClassMethods | ||
|
||
def all | ||
all_rows = db.execute "SELECT * FROM students;" | ||
all_rows.collect{|row| new_from_db(row)} | ||
end | ||
|
||
def find_by(*args) | ||
args.flatten.each do |arg| | ||
define_singleton_method("find_by_#{arg}") do |value| | ||
rows_found = db.execute "SELECT * FROM #{table_name} WHERE #{arg} = ?", value | ||
new_from_db(rows_found.first) if !rows_found.empty? | ||
end | ||
end | ||
end | ||
# find_by must be called by the class extending it to initialize all find_by_attribute methods | ||
|
||
def find(id) | ||
self.find_by_id(id) | ||
end | ||
|
||
def where(conditions) | ||
# conditions is a hash - get a string (to be used by SQL WHERE) based on that hash | ||
arg = conditions.to_a.collect{|condition| "#{condition[0].to_s} = :#{condition[0]}"}.join(" AND ") | ||
rows_found = db.execute("SELECT * FROM #{table_name} WHERE #{arg}", conditions) | ||
rows_found.collect{|row| new_from_db(row)} if !rows_found.empty? | ||
end | ||
|
||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
# Columns to add to students table: | ||
# Name | ||
# Profile Pic | ||
# Social media links/ Twitter, LinkedIn, GitHub, Blog(RSS) | ||
# Quote | ||
# About/ Bio, Education, Work | ||
# Coder Cred / Treehouse, Codeschool, Coderwal .reject(Github) | ||
|
||
require 'nokogiri' | ||
require 'open-uri' | ||
require 'sqlite3' | ||
|
||
require 'pp' | ||
require 'json' | ||
|
||
class Scraper | ||
|
||
attr_accessor :db, :db_name, :students_html_array, :students | ||
|
||
def create_database(db_name) | ||
@db_name = db_name | ||
# Create a new database and drop the students table from the database if it exists | ||
begin | ||
db = SQLite3::Database.new db_name | ||
db.execute("DROP TABLE IF EXISTS students") | ||
rescue SQLite3::Exception => e | ||
puts "Exception occurred" | ||
puts e | ||
ensure | ||
db.close if db | ||
end | ||
end | ||
|
||
|
||
def scrape_index(index_html) | ||
|
||
# scrape different HTML elements | ||
index = Nokogiri::HTML(open(index_html)) | ||
|
||
student_a_selector = "li.home-blog-post div.blog-title h3 a" # div.big-comment before "a" won't select Matt's profile | ||
names = index.css(student_a_selector).collect{|student| student.content} | ||
urls = index.css(student_a_selector).collect{|student| student.attr("href").downcase} | ||
@students_html_array = urls.reject{|url| url == "#"} | ||
urls = urls.collect{ |url| url.sub("students/","") } | ||
|
||
student_img_selector = "li.home-blog-post div.blog-thumb img" | ||
imgs = index.css(student_img_selector).collect{|student| student.attr("src").downcase} | ||
imgs = imgs.collect{ |img| (img[0..3]=="http" ? "" : "/") + img } | ||
|
||
student_tagline_selector = "li.home-blog-post p.home-blog-post-meta" | ||
taglines = index.css(student_tagline_selector).collect{|student| student.content} | ||
|
||
student_bio_selector = "li.home-blog-post div.excerpt p" | ||
bios = index.css(student_bio_selector).collect{|student| student.content} | ||
|
||
# ensure sizes are all same | ||
raise if urls.size != names.size || urls.size != imgs.size || urls.size != taglines.size || urls.size != bios.size | ||
|
||
# create array of all data | ||
students_array = [] | ||
names.each_with_index do |student, index| | ||
students_array << {:name=>names[index], :url=>urls[index], :img=>imgs[index], :tagline=>taglines[index], :bio=>bios[index]} | ||
end | ||
|
||
#convert into hash and store as instance variable | ||
students_array.delete_if { |info| info[:url]=="#" } | ||
@students ||= {} | ||
students_array.each_with_index {|info, index| @students[index+1] = info} | ||
# {1 => {:name=> "Avi Flombaum", :img=> "http://avi.com"}, 2 => {:name=> "Ashley Williams", :img=> "http://ashley.com"}} | ||
|
||
# pp @students | ||
puts "\nThe students_html_array looks like this:\n #{students_html_array.inspect}" | ||
puts "There are #{students_html_array.size} elements in the array" | ||
|
||
end | ||
|
||
|
||
# Scrape individual student profiles based on the array created from scraping index.html | ||
|
||
def scrape_students(index_html) | ||
# Loop through each student profile URL in @students | ||
@students_html_array.each_with_index do |student_html, index| | ||
begin | ||
puts | ||
student_page = Nokogiri::HTML(open("#{index_html}/#{student_html}")) | ||
|
||
# Get student's name | ||
name_css_selector = "h4.ib_main_header" | ||
html_tag_for_name = student_page.css(name_css_selector).first # will return nil if the ib_main_header css selector is not found | ||
# puts html_tag_for_name.class => Nokogiri::XML::Element | ||
|
||
# only scrape the rest of page if html_tag_for_name is found (to make sure that only correctly formatted pages are scraped) | ||
if html_tag_for_name | ||
puts "...scraping: #{student_html}" | ||
|
||
info = {} | ||
info[:name] = html_tag_for_name.content | ||
|
||
prof_pic_selector = "div .student_pic" | ||
info[:prof_pic] = student_page.css(prof_pic_selector).attr("src").to_s | ||
|
||
social_media_selector = "div.social-icons a" | ||
info[:twitter] = student_page.css(social_media_selector)[0].attr("href") | ||
info[:linkedin] = student_page.css(social_media_selector)[1].attr("href") | ||
info[:github] = student_page.css(social_media_selector)[2].attr("href") | ||
info[:blog] = student_page.css(social_media_selector)[3].attr("href") | ||
|
||
quote_selector = "div.textwidget" | ||
info[:quote] = student_page.css(quote_selector).text | ||
|
||
student = {(index+1) => info} | ||
p student | ||
|
||
# merge into existing hash | ||
@students.merge!(student) do |id, oldinfo, newinfo| | ||
oldinfo.merge!(newinfo) do |attribute, oldvalue, newvalue| | ||
puts "in index: #{oldvalue}; in profile: #{newvalue}" if oldvalue != newvalue | ||
newvalue | ||
end | ||
end | ||
|
||
else | ||
puts "#{student_html} isn't the correct template. Page will not be scraped." | ||
end | ||
|
||
rescue OpenURI::HTTPError => e | ||
puts "No profile found at " + student_html | ||
puts e | ||
end | ||
end | ||
end | ||
|
||
|
||
def insert_into_db(db_name) | ||
@students.each do |id, student| | ||
columns_names = student.keys.join(",").gsub(",", " TEXT, ") + " TEXT" | ||
student_attributes = student.keys.join(",") | ||
student_values = student.keys.to_s.gsub("]", "").gsub("[", "") | ||
|
||
db = SQLite3::Database.open db_name | ||
db.execute("CREATE TABLE IF NOT EXISTS students(id INTEGER PRIMARY KEY AUTOINCREMENT, | ||
#{columns_names})" | ||
) | ||
|
||
# insert specific student into students table if it doesn't exist | ||
db.execute("DELETE FROM students WHERE name=?", student[:name]) | ||
db.execute("INSERT INTO students(#{student_attributes}) | ||
VALUES (#{student_values})", student | ||
) | ||
end | ||
puts "\nSuccessfully updated database!" | ||
end | ||
|
||
end | ||
|
||
|
||
db_name = "flatiron.db" | ||
index_html = "http://students.flatironschool.com" | ||
|
||
scraper = Scraper.new | ||
scraper.create_database(db_name) | ||
|
||
scraper.scrape_index(index_html) | ||
scraper.scrape_students(index_html) | ||
# pp scraper.students | ||
|
||
scraper.insert_into_db(db_name) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
students.flatironschool.com |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
Deploy on Day One Project | ||
|
||
Goal: To add yourself to the student section of the FlatironSchool.com and create your profile page. | ||
|
||
Steps: | ||
- Clone the students website to your code directory. | ||
- [email protected]:flatiron-school/002.students.flatironschool.com.git | ||
- git clone [email protected]:flatiron-school/002.students.flatironschool.com.git | ||
- Create a feature branch for your profile add-profile-aviflombaum, feature-studentgithub | ||
- git co -b add-profile-aviflombaum | ||
- Create your profile page within the students directory aviflombaum.htm | ||
- touch students/aviflombaum.html | ||
- Add it, commit, push | ||
- git add . | ||
- git commit -am "Add profile for Avi Flombaum" | ||
- git push | ||
Create a pull request to merge your feature branch, add-profile-aviflombaum to the flatiron-school origin repository | ||
https://github.com/flatiron-school/002.students.flatironschool.com/pull/new/add-profile-aviflombaum | ||
|
||
- Squashing commits? | ||
|
||
Ping Avi or Tag him in a comment (@aviflombaum) to let him know that your pull request needs to be reviewed | ||
- Admin Merges Pull Request | ||
- Deploy master of main repository | ||
- Our Git Flow | ||
- Anything in master branch is deployable. | ||
- Create feature branches off master. | ||
- Commit locally and regularly push your work to the same named branch on the server. | ||
- When you need help or you think your branch is ready to merge, open a pull request on github.com | ||
- After someone reviews and +1, you can merge that pull request into the master. | ||
- Once a pull request is merged to master, deploy. | ||
|
||
Never, ever, do anything in a master branch. It must remain stable. | ||
- Do not merge the upstream develop with your feature branch, rebase your branch on top of the upstream/master to synch your feature branch with the latest master. | ||
- Issue branches should be prefixed with the issue # and description. | ||
|
||
We used these steps: | ||
|
||
♕ git add . | ||
|
||
♕ git commit -m 'messed with profile' | ||
|
||
♕ git push upstream add-profile-adamjonas | ||
|
||
♕ git fetch upstream | ||
|
||
♕ git co master | ||
|
||
♕ git pull upstream master | ||
|
||
♕ git co add-profile-adamjonas | ||
|
||
♕ git rebase master | ||
|
||
|
||
http://scottchacon.com/2011/08/31/github-flow.html | ||
|
||
https://github.com/diaspora/diaspora/wiki/Git-Workflow | ||
|
||
http://mettadore.com/analysis/a-simple-git-rebase-workflow-explained/ | ||
|
||
http://zachholman.com/talk/how-github-uses-github-to-build-github | ||
|
||
https://openshift.redhat.com/community/wiki/github-workflow-for-submitting-pull-requests |
Oops, something went wrong.