diff --git a/jedeschule/old/__init__.py b/jedeschule/old/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/jedeschule/old/jugendforscht.py b/jedeschule/old/jugendforscht.py deleted file mode 100644 index ee64746..0000000 --- a/jedeschule/old/jugendforscht.py +++ /dev/null @@ -1,57 +0,0 @@ -import scrapy -from scrapy.shell import inspect_response - -class SachsenSpider(scrapy.Spider): - name = "jugendforscht" - base_url = "http://jugend-forscht.bmbfcluster.de" - list = "&V=list#mpl" - - start_urls = ['http://jugend-forscht.bmbfcluster.de/index.php?M=445&PID=19'] - - def parse(self, response): - #inspect_response(response, self) - for li in response.css(".contextcontent li"): - link = li.css('a::attr(href)').extract_first() - request = scrapy.Request(self.base_url + link + self.list, callback=self.parse_state) - yield request - - def parse_state(self, response): - #inspect_response(response, self) - for li in response.css('.geo_list li'): - link = li.css('a::attr(href)').extract_first() - request = scrapy.Request(self.base_url + link, callback=self.parse_locality) - yield request - - def parse_locality(self, response): - #inspect_response(response, self) - for li in response.css('.geo_list li'): - link = li.css('a::attr(href)').extract_first() - request = scrapy.Request(self.base_url + link, callback=self.parse_school) - yield request - - def parse_school(self, response): - for li in response.css('.geo_list li'): - link = li.css('a::attr(href)').extract_first() - request = scrapy.Request(self.base_url + link, callback=self.parse_item) - yield request - - def parse_item(self, response): - #inspect_response(response, self) - collection = {} - h4 = response.css('.even h4') - p = response.css('.even p') - - response.h4 = h4 - response.p = p - - #inspect_response(response, self) - - if (len(h4) > 0): - collection['Schule'] = h4[0].css('::text').extract_first() - if (len(p) > 0): - collection['Ort'] = p[0].css('::text').extract_first() - if (len(h4) > 1): - collection['Wettbewerb'] = h4[1].css('::text').extract_first() - if (len(p) > 1): - collection['partner'] = p[1].css('::text').extract_first() - return collection diff --git a/jedeschule/old/klimaschutzschulenatlas.py b/jedeschule/old/klimaschutzschulenatlas.py deleted file mode 100644 index a387d5e..0000000 --- a/jedeschule/old/klimaschutzschulenatlas.py +++ /dev/null @@ -1,42 +0,0 @@ -import scrapy -from jedeschule.utils import cleanjoin -from scrapy.shell import inspect_response - - -class KlimaschutzSchulenAtlasSpider(scrapy.Spider): - name = "klimaschutzschulenatlas" - start_urls = ['https://www.klimaschutzschulenatlas.de/der-atlas'] - - def parse(self, response): - #inspect_response(response, self) - yield scrapy.FormRequest.from_response( - response, callback=self.parse_projectlist) - - def parse_projectlist(self, response): - #inspect_response(response, self) - schoollinks = response.css(".media-body > a::attr(href)").extract() - for link in schoollinks: - yield scrapy.Request('https://www.klimaschutzschulenatlas.de' + link, - callback=self.parse_school) - if len(schoollinks) == 16: - next_page = response.css('.pagination a::attr(href)').extract()[-2] - yield scrapy.Request('https://www.klimaschutzschulenatlas.de' + next_page, - callback=self.parse_projectlist) - - def parse_school(self, response): - #inspect_response(response, self) - school = {} - school_information = response.css('.school-info li::text').extract() - school['type'] = school_information[0] if len(school_information) > 0 else '' - school['state'] = school_information[1] if len(school_information) > 1 else '' - school['street'] = school_information[2] if len(school_information) > 2 else '' - if len(school_information) > 4: - address_information = school_information[3].strip().split(' ') - school['plz'] = address_information[0] - school['place'] = address_information[1] - - projects = response.css('.col-xs-6 a::attr(title)').extract() - - for project in projects: - school['project'] = project - yield school \ No newline at end of file diff --git a/jedeschule/old/schule-gegen-rassisum.py b/jedeschule/old/schule-gegen-rassisum.py deleted file mode 100644 index 8783b83..0000000 --- a/jedeschule/old/schule-gegen-rassisum.py +++ /dev/null @@ -1,43 +0,0 @@ -import scrapy -from jedeschule.utils import cleanjoin -from scrapy.shell import inspect_response - - -class SchuleGegenRassismusSpider(scrapy.Spider): - name = "schule-gegen-rassismus" - start_urls = ['http://www.schule-ohne-rassismus.org/courage-schulen/alle-courage-schulen/'] - - def parse(self, response): - schoolcards = response.css(".news-list-item") - for schoolcard in schoolcards: - school = {} - link = schoolcard.css('#schoolcard_name a') - school['name'] = link.css('::text').extract_first().strip() - school['link'] = link.css('::attr(href)').extract_first().strip() - godfather = schoolcard.css('#schoolcard_godparent p::text').extract_first().split(':') - school['pate'] = godfather[1] if len(godfather) > 1 else godfather[0] - school['date'] = schoolcard.css('#schoolcard_title .news-list-date::text').extract_first().strip() - school['category'] = schoolcard.css('#schoolcard_legend::text').extract_first().strip() - yield scrapy.Request('http://www.schule-ohne-rassismus.org/' + school['link'], - meta= {'school': school}, - callback=self.parse_detail) - if (len(schoolcards) == 20): - next = response.css("div.news-list-browse a:contains('chste')::attr(href)").extract_first() - request = scrapy.Request('http://www.schule-ohne-rassismus.org/' + next, - callback=self.parse) - yield request - - def parse_detail(self, response): - school = response.meta['school'] - - address = response.css('.news-single-item p::text').extract() - #inspect_response(response, self) - school['street'] = address[0] - if (len(address) > 1): - address2 = address[1].split(' ') - school['postcode'] = address2[0] - address2.pop(0) - school['place'] = ' '.join(address2) - yield school - -