Skip to content

Commit 8522d1d

Browse files
committed
Minor improvements on the tests.
1 parent 33244f3 commit 8522d1d

File tree

3 files changed

+16
-8
lines changed

3 files changed

+16
-8
lines changed

java-warc/src/main/java/com/github/bottomlessarchive/warc/service/WarcRecordStreamFactory.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNu
2424
return WarcRecordStreamFactory.streamOf(url, EVERY_WARC_RECORD_TYPE);
2525
}
2626

27+
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URL url,
28+
@NotNull @NonNull final WarcRecordType... requiredRecordTypes) {
29+
try {
30+
return streamOf(new AvailableInputStream(new BufferedInputStream(url.openStream())),
31+
WarcReader.DEFAULT_CHARSET, true, List.of(requiredRecordTypes));
32+
} catch (IOException e) {
33+
throw new WarcNetworkException("Unable to open WARC location: " + url + "!", e);
34+
}
35+
}
36+
2737
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URL url,
2838
@NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
2939
try {

java-warc/src/test/java/com/github/bottomlessarchive/warc/test/TestFileWarcReader.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66
import com.github.bottomlessarchive.warc.service.content.response.domain.ResponseContentBlock;
77
import com.github.bottomlessarchive.warc.service.record.domain.WarcRecord;
88

9-
import java.io.File;
109
import java.io.FileInputStream;
1110
import java.util.Optional;
1211

1312
public class TestFileWarcReader {
1413

1514
public static void main(final String... arg) throws Exception {
1615
final WarcReader warcReader = new WarcReader(new FileInputStream(
17-
new File("C:\\warc-test\\CC-MAIN-20180716232549-20180717012549-00001.warc.gz")));
16+
"C:\\warc-test\\CC-MAIN-20180716232549-20180717012549-00001.warc.gz"));
1817

1918
boolean hasNext = true;
2019
while (hasNext) {
@@ -23,8 +22,7 @@ public static void main(final String... arg) throws Exception {
2322

2423
optionalWarcRecord
2524
.filter(WarcRecord::isResponse)
26-
.map(warcRecord -> ((ResponseContentBlock) warcRecord.getWarcContentBlock())
27-
.getPayloadAsString())
25+
.map(warcRecord -> ((ResponseContentBlock) warcRecord.getContentBlock()).getPayloadAsString())
2826
.ifPresent(System.out::println);
2927

3028
hasNext = optionalWarcRecord.isPresent();

java-warc/src/test/java/com/github/bottomlessarchive/warc/test/TestUrlWarcReader.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
import com.github.bottomlessarchive.warc.service.WarcRecordStreamFactory;
44
import com.github.bottomlessarchive.warc.service.content.response.domain.ResponseContentBlock;
5-
import com.github.bottomlessarchive.warc.service.record.domain.WarcRecord;
5+
import com.github.bottomlessarchive.warc.service.record.domain.WarcRecordType;
6+
67
import java.net.URL;
78

89
public class TestUrlWarcReader {
@@ -11,9 +12,8 @@ public static void main(final String... arg) throws Exception {
1112
final URL warcUrl = new URL(
1213
"https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2018-43/segments/1539583508988.18/warc/CC-MAIN-20181015080248-20181015101748-00000.warc.gz");
1314

14-
WarcRecordStreamFactory.streamOf(warcUrl)
15-
.filter(WarcRecord::isResponse)
16-
.map(entry -> ((ResponseContentBlock) entry.getWarcContentBlock()).getPayloadAsString())
15+
WarcRecordStreamFactory.streamOf(warcUrl, WarcRecordType.RESPONSE)
16+
.map(entry -> ((ResponseContentBlock) entry.getContentBlock()).getPayloadAsString())
1717
.forEach(System.out::println);
1818
}
1919
}

0 commit comments

Comments
 (0)