-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathParser.java
80 lines (71 loc) · 2.29 KB
/
Parser.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
package com.yurii.salimov.lesson11.task02;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* @author Yuriy Salimov ([email protected])
* @version 1.0
*/
public final class Parser {
private final URL url;
private String html;
private List<URL> urls;
public Parser(final URL url) {
this.url = url;
}
public Parser(final String url) throws MalformedURLException {
this(new URL(url));
}
public String getHtml() {
if (this.html == null) {
this.html = loadPage(this.url);
}
return this.html;
}
public List<URL> getUrls() {
if (this.urls == null || this.urls.isEmpty()) {
this.urls = parseHtml(getHtml());
}
return this.urls;
}
private static String loadPage(final URL url) {
final StringBuilder page = new StringBuilder();
try {
final HttpURLConnection connection = (HttpURLConnection) url.openConnection();
try (InputStream stream = connection.getInputStream()) {
byte[] buffer = new byte[5000];
int size;
while ((size = stream.read(buffer)) >= 0) {
page.append(new String(buffer, 0, size));
}
} finally {
connection.disconnect();
}
} catch (Exception ex) {
ex.printStackTrace();
}
return page.toString();
}
private List<URL> parseHtml(final String html) {
final List<URL> list = new ArrayList<>();
int positionStart;
int positionEnd = 0;
do {
positionStart = html.indexOf("href=\"", positionEnd);
if (positionStart >= 0) {
positionStart += "href=\"".length();
positionEnd = html.indexOf("\"", positionStart);
final String urlStr = html.substring(positionStart, positionEnd);
try {
list.add(new URL(this.url + urlStr));
} catch (MalformedURLException ex) {
ex.printStackTrace();
}
}
} while (positionStart >= 0);
return list;
}
}