Skip to content

Commit fadf77a

Browse files
committed
add model, update test
1 parent 0a3e59f commit fadf77a

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package model;
2+
3+
// https://www.zenrows.com/blog/web-scraping-java#extract-data
4+
5+
public class PokemonProduct {
6+
private String url;
7+
private String image;
8+
private String name;
9+
private String price;
10+
11+
// getters and setters omitted for brevity...
12+
public String getUrl() {
13+
return url;
14+
}
15+
16+
public void setUrl(String url) {
17+
this.url = url;
18+
}
19+
20+
public String getImage() {
21+
return image;
22+
}
23+
24+
public void setImage(String image) {
25+
this.image = image;
26+
}
27+
28+
public String getName() {
29+
return name;
30+
}
31+
32+
public void setName(String name) {
33+
this.name = name;
34+
}
35+
36+
public String getPrice() {
37+
return price;
38+
}
39+
40+
public void setPrice(String price) {
41+
this.price = price;
42+
}
43+
44+
@Override
45+
public String toString() {
46+
return "{ \"url\":\"" + url + "\", "
47+
+ " \"image\": \"" + image + "\", "
48+
+ "\"name\":\"" + name + "\", "
49+
+ "\"price\": \"" + price + "\" }";
50+
}
51+
52+
}

dev_projects/ScrapingService/src/test/java/com/yen/WebScrapingTest.java

+58
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.yen;
22

3+
import model.PokemonProduct;
34
import org.apache.commons.csv.CSVFormat;
45
import org.apache.commons.csv.CSVPrinter;
56
import org.jsoup.Jsoup;
@@ -17,6 +18,8 @@
1718
import java.net.URL;
1819
import java.nio.file.Files;
1920
import java.nio.file.Paths;
21+
import java.util.ArrayList;
22+
import java.util.List;
2023

2124
public class WebScrapingTest {
2225

@@ -68,4 +71,59 @@ public void webScrapTest1() throws IOException {
6871

6972
}
7073

74+
75+
// https://www.zenrows.com/blog/web-scraping-java#connect-to-target-website
76+
@Test
77+
public void webScrapTest2(){
78+
79+
String URL = "https://scrapeme.live/shop";
80+
81+
// initializing the HTML Document page variable
82+
Document doc;
83+
84+
List<PokemonProduct> pokemonProducts = new ArrayList<>();
85+
86+
try {
87+
// fetching the target website
88+
// doc = Jsoup.connect(URL).get();
89+
doc = Jsoup
90+
.connect(URL)
91+
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36")
92+
.header("Accept-Language", "*")
93+
.get();
94+
//System.out.println("doc = " + doc);
95+
96+
// Elements products = doc.select("li.product");
97+
// System.out.println("products = " + products);
98+
99+
// initializing the list of Java object to store
100+
// the scraped data
101+
//List<PokemonProduct> pokemonProducts = new ArrayList<>();
102+
103+
// retrieving the list of product HTML elements
104+
Elements products = doc.select("li.product");
105+
106+
// iterating over the list of HTML products
107+
for (Element product : products) {
108+
PokemonProduct pokemonProduct = new PokemonProduct();
109+
110+
// extracting the data of interest from the product HTML element
111+
// and storing it in pokemonProduct
112+
pokemonProduct.setUrl(product.selectFirst("a").attr("href"));
113+
pokemonProduct.setImage(product.selectFirst("img").attr("src"));
114+
pokemonProduct.setName(product.selectFirst("h2").text());
115+
pokemonProduct.setPrice(product.selectFirst("span").text());
116+
117+
// adding pokemonProduct to the list of the scraped products
118+
pokemonProducts.add(pokemonProduct);
119+
}
120+
121+
} catch (IOException e) {
122+
throw new RuntimeException(e);
123+
}
124+
125+
System.out.println(">>> pokemonProducts = ");
126+
System.out.println(pokemonProducts);
127+
}
128+
71129
}

0 commit comments

Comments
 (0)