diff --git a/be/src/clucene b/be/src/clucene index ffc2051f090ff4..3526de75334f64 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit ffc2051f090ff4a2e36e9a243b7650dd822f00d3 +Subproject commit 3526de75334f64aea3f299d395c8a460a9054e37 diff --git a/be/test/olap/rowset/segment_v2/inverted_index/data/data1.csv b/be/test/olap/rowset/segment_v2/inverted_index/data/data1.csv new file mode 100644 index 00000000000000..fa4e2129584999 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/inverted_index/data/data1.csv @@ -0,0 +1,1000 @@ +89,fine,https://musicstream.com,97 +44,good,https://yourblog.net,74 +72,ok,https://musicstream.com,80 +14,yes,https://shoponline.com,50 +47,maybe,https://github.com,16 +97,maybe,http://forum.com,88 +17,yes,https://musicstream.com,13 +87,good,https://musicstream.com,53 +6,excellent,http://forum.com,49 +4,terrible,https://yourblog.net,58 +56,bad,https://google.com,10 +5,no,http://example.com,93 +30,bad,http://news.com,28 +30,maybe,https://musicstream.com,13 +83,bad,https://yourblog.net,48 +41,great,https://videosite.com,73 +10,terrible,http://news.com,6 +80,no,https://shoponline.com,21 +92,fine,https://github.com,20 +71,terrible,http://forum.com,8 +84,good,http://mysite.org,77 +25,no,http://example.com,71 +52,maybe,http://mysite.org,0 +99,great,http://news.com,74 +59,ok,http://mysite.org,46 +47,ok,http://mysite.org,26 +77,bad,https://musicstream.com,73 +53,excellent,https://github.com,97 +97,ok,http://example.com,69 +88,great,https://musicstream.com,47 +31,fine,https://videosite.com,1 +66,great,https://google.com,67 +16,yes,http://forum.com,43 +31,yes,http://news.com,49 +35,good,https://github.com,48 +11,terrible,https://google.com,56 +53,maybe,http://mysite.org,87 +16,bad,https://google.com,77 +55,terrible,http://news.com,33 +51,yes,https://google.com,68 +71,good,https://yourblog.net,71 +32,great,https://github.com,37 +22,maybe,http://example.com,16 +36,ok,http://forum.com,52 +97,good,https://yourblog.net,100 +54,no,https://videosite.com,90 +38,terrible,https://shoponline.com,17 +81,excellent,https://yourblog.net,11 +66,great,http://mysite.org,4 +27,fine,https://shoponline.com,88 +48,good,https://musicstream.com,16 +78,good,https://shoponline.com,63 +34,bad,https://shoponline.com,77 +19,yes,https://shoponline.com,98 +39,terrible,https://github.com,86 +83,excellent,http://news.com,63 +38,no,https://google.com,64 +43,terrible,https://yourblog.net,30 +30,fine,https://videosite.com,72 +73,great,http://mysite.org,31 +96,good,http://example.com,16 +25,yes,http://mysite.org,80 +98,fine,https://shoponline.com,46 +60,fine,https://musicstream.com,66 +90,yes,http://news.com,80 +41,excellent,https://github.com,56 +86,no,http://forum.com,31 +81,ok,http://forum.com,71 +42,bad,https://musicstream.com,97 +90,terrible,https://videosite.com,57 +22,yes,http://forum.com,15 +34,excellent,https://shoponline.com,77 +30,no,https://yourblog.net,85 +51,ok,http://news.com,52 +28,terrible,http://example.com,37 +23,yes,http://forum.com,83 +42,no,http://forum.com,15 +83,maybe,http://forum.com,10 +59,ok,https://github.com,100 +83,maybe,http://mysite.org,78 +4,good,https://videosite.com,68 +63,good,http://forum.com,71 +61,ok,http://mysite.org,66 +83,great,http://example.com,75 +82,no,https://google.com,95 +71,terrible,https://musicstream.com,19 +42,bad,http://mysite.org,66 +14,great,https://google.com,83 +4,yes,https://yourblog.net,84 +49,great,https://github.com,2 +2,yes,http://mysite.org,84 +33,excellent,http://forum.com,78 +5,good,http://example.com,53 +5,great,https://yourblog.net,86 +80,yes,http://mysite.org,85 +8,great,https://shoponline.com,84 +26,no,https://github.com,56 +82,fine,https://shoponline.com,39 +0,bad,https://videosite.com,49 +30,terrible,https://videosite.com,41 +76,maybe,http://news.com,60 +44,maybe,http://example.com,33 +9,terrible,http://forum.com,93 +25,ok,https://yourblog.net,62 +45,great,https://google.com,17 +5,terrible,http://news.com,95 +49,good,http://example.com,97 +29,great,https://shoponline.com,69 +73,good,https://google.com,23 +94,yes,http://forum.com,25 +58,bad,https://videosite.com,32 +98,excellent,https://musicstream.com,36 +25,good,http://example.com,56 +14,ok,https://yourblog.net,34 +30,fine,http://example.com,38 +47,yes,http://mysite.org,71 +20,fine,http://news.com,1 +89,bad,https://google.com,50 +8,fine,https://google.com,64 +21,maybe,https://musicstream.com,11 +12,fine,https://videosite.com,48 +27,maybe,https://musicstream.com,2 +76,ok,https://musicstream.com,30 +38,excellent,https://videosite.com,54 +57,terrible,https://yourblog.net,22 +21,good,https://google.com,39 +32,good,https://yourblog.net,68 +47,terrible,https://google.com,49 +90,good,http://mysite.org,21 +5,yes,http://news.com,33 +49,no,http://news.com,54 +76,maybe,http://news.com,16 +24,maybe,http://forum.com,17 +77,good,http://example.com,92 +28,excellent,http://example.com,27 +27,no,https://github.com,52 +55,ok,http://forum.com,97 +85,bad,https://shoponline.com,43 +24,terrible,http://news.com,39 +30,excellent,http://mysite.org,72 +70,maybe,http://mysite.org,76 +14,good,https://videosite.com,32 +73,no,http://news.com,25 +29,terrible,http://mysite.org,45 +52,great,http://news.com,11 +26,yes,http://forum.com,17 +30,excellent,http://news.com,88 +95,great,https://musicstream.com,63 +33,great,https://github.com,70 +26,great,https://github.com,17 +76,great,https://shoponline.com,94 +93,bad,https://google.com,29 +48,no,https://yourblog.net,93 +42,yes,https://videosite.com,71 +4,fine,https://github.com,62 +34,maybe,http://forum.com,78 +36,yes,https://google.com,39 +48,bad,https://shoponline.com,17 +84,no,https://videosite.com,11 +87,ok,https://musicstream.com,51 +7,ok,https://google.com,73 +81,bad,https://github.com,91 +92,ok,https://google.com,82 +15,terrible,http://forum.com,64 +6,excellent,https://google.com,70 +35,bad,http://example.com,48 +96,maybe,https://shoponline.com,96 +25,fine,http://news.com,43 +9,no,http://mysite.org,14 +66,yes,https://videosite.com,87 +76,maybe,https://musicstream.com,13 +51,good,http://forum.com,1 +31,great,http://news.com,51 +72,fine,http://news.com,70 +63,no,http://forum.com,5 +73,great,https://yourblog.net,81 +13,terrible,https://github.com,27 +98,excellent,https://videosite.com,29 +71,good,http://news.com,68 +89,no,https://musicstream.com,60 +68,excellent,http://news.com,54 +76,good,https://google.com,79 +69,good,http://mysite.org,99 +35,good,http://news.com,70 +97,yes,https://yourblog.net,63 +44,ok,https://yourblog.net,4 +73,terrible,http://news.com,36 +37,yes,https://github.com,61 +26,fine,https://videosite.com,41 +37,excellent,https://musicstream.com,9 +18,yes,https://github.com,81 +54,excellent,https://shoponline.com,52 +73,great,https://yourblog.net,67 +19,bad,http://example.com,86 +1,terrible,http://mysite.org,16 +62,fine,https://musicstream.com,64 +61,excellent,https://shoponline.com,93 +38,bad,https://videosite.com,61 +55,great,http://news.com,35 +47,great,https://yourblog.net,42 +36,good,https://yourblog.net,40 +26,ok,http://example.com,58 +40,great,https://github.com,0 +81,fine,https://musicstream.com,92 +50,yes,https://github.com,21 +98,maybe,https://google.com,2 +37,bad,https://videosite.com,90 +39,excellent,https://yourblog.net,31 +81,ok,https://musicstream.com,90 +10,bad,http://news.com,18 +93,good,http://example.com,93 +65,fine,http://news.com,72 +10,yes,https://yourblog.net,77 +5,good,http://forum.com,52 +75,fine,https://github.com,67 +40,excellent,http://example.com,27 +21,good,http://example.com,0 +70,fine,http://forum.com,71 +72,no,https://videosite.com,87 +86,no,http://example.com,63 +5,yes,http://example.com,3 +5,fine,http://example.com,68 +36,excellent,https://videosite.com,77 +3,bad,https://github.com,12 +40,bad,http://example.com,53 +33,maybe,https://shoponline.com,3 +80,maybe,https://musicstream.com,49 +42,ok,https://musicstream.com,53 +17,ok,https://yourblog.net,54 +94,yes,https://musicstream.com,20 +16,great,https://videosite.com,42 +32,great,https://shoponline.com,96 +48,ok,http://mysite.org,34 +30,no,http://forum.com,99 +17,excellent,http://forum.com,57 +73,bad,http://forum.com,55 +24,no,https://videosite.com,97 +99,no,http://forum.com,96 +91,excellent,https://musicstream.com,85 +72,excellent,https://videosite.com,60 +74,maybe,http://mysite.org,1 +65,good,https://videosite.com,16 +58,maybe,http://forum.com,9 +97,good,http://example.com,68 +39,good,http://news.com,29 +30,no,http://forum.com,96 +99,maybe,http://mysite.org,64 +66,terrible,https://yourblog.net,0 +72,good,http://example.com,59 +85,terrible,https://yourblog.net,69 +41,great,https://github.com,97 +95,good,https://yourblog.net,42 +56,good,https://shoponline.com,43 +57,good,http://news.com,9 +68,good,http://example.com,80 +80,yes,https://videosite.com,14 +95,great,https://musicstream.com,43 +2,terrible,http://mysite.org,23 +94,terrible,https://shoponline.com,12 +45,maybe,https://github.com,67 +61,maybe,https://shoponline.com,46 +28,excellent,https://musicstream.com,10 +9,no,http://example.com,36 +75,great,https://google.com,43 +54,maybe,http://news.com,16 +49,bad,https://google.com,62 +33,fine,http://example.com,13 +93,excellent,http://example.com,11 +37,ok,https://videosite.com,67 +92,ok,http://forum.com,33 +1,excellent,https://shoponline.com,100 +87,no,http://mysite.org,0 +89,terrible,http://example.com,11 +26,yes,https://github.com,79 +44,terrible,https://yourblog.net,98 +69,excellent,http://news.com,71 +76,excellent,https://yourblog.net,26 +19,fine,https://google.com,16 +71,great,http://mysite.org,100 +99,no,http://forum.com,20 +98,fine,https://google.com,53 +48,fine,http://news.com,42 +29,maybe,http://news.com,29 +6,terrible,https://videosite.com,40 +77,great,http://forum.com,13 +34,terrible,https://yourblog.net,86 +46,terrible,https://musicstream.com,86 +80,great,https://videosite.com,31 +33,fine,https://google.com,80 +49,excellent,http://example.com,70 +80,ok,http://news.com,49 +72,yes,https://musicstream.com,78 +95,ok,https://videosite.com,4 +70,no,https://github.com,100 +1,yes,https://musicstream.com,35 +26,no,http://mysite.org,45 +72,good,https://videosite.com,28 +23,bad,http://mysite.org,56 +4,great,http://example.com,56 +15,great,https://github.com,52 +89,good,http://example.com,77 +85,ok,http://forum.com,86 +50,fine,http://mysite.org,7 +74,bad,https://videosite.com,78 +1,no,https://github.com,43 +71,maybe,https://github.com,11 +79,no,https://musicstream.com,57 +90,maybe,https://github.com,68 +19,bad,https://github.com,53 +53,good,https://google.com,59 +5,fine,http://mysite.org,67 +34,maybe,http://example.com,76 +40,terrible,https://google.com,50 +99,excellent,http://news.com,9 +77,bad,https://google.com,90 +36,bad,https://videosite.com,75 +85,excellent,https://github.com,40 +8,good,http://mysite.org,17 +43,good,http://mysite.org,79 +15,yes,https://musicstream.com,86 +22,maybe,https://yourblog.net,88 +86,yes,https://github.com,65 +18,terrible,http://news.com,48 +4,terrible,https://yourblog.net,85 +100,fine,https://yourblog.net,3 +41,good,http://forum.com,65 +64,terrible,https://github.com,34 +92,bad,https://google.com,21 +87,excellent,http://mysite.org,37 +30,excellent,https://github.com,14 +39,good,http://example.com,44 +68,yes,https://videosite.com,38 +31,maybe,https://yourblog.net,97 +36,bad,https://shoponline.com,20 +69,yes,https://shoponline.com,94 +45,good,http://mysite.org,30 +84,bad,http://news.com,73 +71,excellent,https://musicstream.com,26 +46,bad,http://mysite.org,4 +98,fine,https://yourblog.net,51 +17,excellent,https://videosite.com,77 +32,excellent,https://yourblog.net,23 +37,good,http://news.com,98 +60,ok,https://musicstream.com,24 +68,ok,https://videosite.com,44 +96,fine,https://musicstream.com,44 +65,yes,https://github.com,81 +25,terrible,https://musicstream.com,61 +54,terrible,https://shoponline.com,72 +5,fine,https://yourblog.net,93 +27,terrible,http://example.com,3 +30,bad,https://google.com,9 +99,excellent,http://forum.com,6 +31,yes,https://google.com,93 +82,good,https://google.com,56 +38,fine,https://google.com,56 +29,ok,https://yourblog.net,42 +91,no,https://google.com,62 +58,good,http://example.com,80 +75,fine,http://example.com,97 +59,maybe,https://google.com,13 +23,ok,http://mysite.org,38 +50,great,https://shoponline.com,43 +0,no,http://forum.com,98 +4,no,https://github.com,47 +20,fine,https://shoponline.com,52 +38,fine,http://example.com,21 +43,bad,http://mysite.org,15 +39,great,https://google.com,65 +92,yes,https://videosite.com,10 +35,terrible,http://mysite.org,53 +58,terrible,https://videosite.com,92 +56,yes,https://musicstream.com,49 +30,ok,https://shoponline.com,45 +24,excellent,https://github.com,68 +33,bad,http://forum.com,3 +87,no,http://forum.com,88 +31,fine,http://mysite.org,32 +40,yes,http://mysite.org,44 +17,yes,http://mysite.org,53 +4,fine,https://shoponline.com,31 +89,bad,https://musicstream.com,86 +24,excellent,https://github.com,97 +58,no,https://github.com,49 +26,ok,http://example.com,62 +59,great,https://shoponline.com,72 +22,yes,https://videosite.com,82 +27,great,http://example.com,24 +1,terrible,https://github.com,25 +2,bad,https://videosite.com,94 +39,good,http://news.com,16 +90,no,http://news.com,25 +6,no,https://yourblog.net,68 +73,ok,http://mysite.org,27 +68,no,https://google.com,64 +26,maybe,http://forum.com,63 +32,excellent,https://musicstream.com,58 +63,fine,https://videosite.com,35 +94,great,https://videosite.com,6 +8,ok,https://yourblog.net,16 +50,ok,https://videosite.com,46 +15,terrible,http://mysite.org,6 +53,excellent,https://github.com,30 +83,maybe,https://github.com,62 +78,good,https://shoponline.com,10 +96,fine,http://news.com,7 +73,ok,https://google.com,90 +95,great,https://shoponline.com,45 +61,excellent,https://github.com,26 +80,no,https://google.com,89 +80,great,http://forum.com,87 +12,excellent,https://musicstream.com,35 +59,great,https://musicstream.com,60 +27,ok,http://example.com,94 +68,maybe,https://google.com,25 +19,terrible,https://yourblog.net,77 +59,yes,http://mysite.org,67 +65,great,https://musicstream.com,70 +38,good,https://google.com,63 +2,no,http://example.com,40 +7,great,http://forum.com,32 +93,great,https://yourblog.net,47 +66,excellent,http://mysite.org,89 +28,excellent,https://shoponline.com,42 +28,no,https://google.com,71 +83,terrible,https://videosite.com,82 +33,good,https://musicstream.com,87 +24,terrible,http://news.com,30 +87,yes,http://example.com,25 +0,ok,http://example.com,87 +93,bad,https://musicstream.com,24 +16,yes,http://mysite.org,63 +81,yes,http://example.com,0 +85,yes,https://videosite.com,96 +62,bad,http://mysite.org,86 +43,great,https://shoponline.com,56 +62,no,https://musicstream.com,45 +35,terrible,http://forum.com,66 +65,ok,https://videosite.com,95 +70,excellent,https://yourblog.net,13 +50,bad,https://google.com,94 +54,no,http://forum.com,44 +1,bad,https://videosite.com,4 +82,yes,https://yourblog.net,9 +96,maybe,https://videosite.com,3 +87,excellent,https://musicstream.com,14 +99,fine,https://google.com,36 +38,bad,http://example.com,71 +21,terrible,http://mysite.org,91 +23,ok,https://yourblog.net,17 +88,good,http://news.com,23 +83,great,https://musicstream.com,68 +25,good,http://news.com,93 +60,fine,https://google.com,89 +45,excellent,http://example.com,21 +59,bad,https://google.com,38 +22,bad,https://shoponline.com,11 +54,maybe,http://example.com,97 +18,great,http://forum.com,35 +37,ok,https://google.com,11 +59,no,http://news.com,68 +5,great,http://example.com,12 +32,yes,https://yourblog.net,15 +91,fine,http://forum.com,12 +40,no,http://news.com,40 +87,terrible,https://videosite.com,28 +15,no,https://videosite.com,52 +63,yes,http://forum.com,39 +81,terrible,http://news.com,36 +53,good,https://google.com,45 +12,excellent,https://shoponline.com,42 +20,ok,http://news.com,78 +65,excellent,http://forum.com,65 +64,maybe,https://shoponline.com,100 +73,no,https://yourblog.net,86 +44,great,http://example.com,58 +7,good,https://yourblog.net,21 +10,good,https://google.com,33 +11,no,https://musicstream.com,24 +59,no,https://github.com,12 +35,yes,http://news.com,65 +78,good,http://example.com,97 +22,no,https://google.com,2 +64,excellent,http://mysite.org,17 +18,ok,https://shoponline.com,21 +91,fine,https://musicstream.com,50 +92,great,http://news.com,9 +13,terrible,https://github.com,17 +13,excellent,https://yourblog.net,43 +93,no,http://mysite.org,80 +90,fine,http://forum.com,59 +33,great,https://videosite.com,54 +94,bad,http://news.com,87 +52,maybe,https://videosite.com,63 +55,terrible,http://forum.com,4 +12,yes,https://videosite.com,5 +50,terrible,http://forum.com,35 +42,bad,https://google.com,77 +69,great,https://shoponline.com,57 +74,maybe,https://google.com,66 +52,ok,http://example.com,86 +18,yes,https://google.com,7 +7,fine,http://forum.com,94 +11,great,https://google.com,69 +76,excellent,https://shoponline.com,79 +20,excellent,https://yourblog.net,98 +60,no,https://videosite.com,21 +52,no,http://forum.com,33 +52,excellent,http://forum.com,57 +48,ok,https://github.com,33 +32,excellent,https://musicstream.com,74 +39,ok,https://yourblog.net,88 +98,excellent,http://example.com,0 +3,excellent,http://example.com,97 +76,excellent,http://mysite.org,16 +44,good,https://videosite.com,45 +51,great,http://mysite.org,96 +44,ok,http://news.com,62 +26,excellent,http://news.com,54 +69,fine,https://shoponline.com,12 +16,excellent,http://forum.com,24 +36,bad,https://musicstream.com,54 +12,good,http://news.com,58 +72,bad,https://videosite.com,87 +2,yes,http://mysite.org,62 +38,yes,https://shoponline.com,29 +31,good,https://yourblog.net,36 +43,bad,https://google.com,34 +72,fine,https://shoponline.com,96 +8,fine,https://yourblog.net,94 +29,ok,https://github.com,90 +53,ok,http://mysite.org,12 +12,excellent,http://forum.com,41 +25,terrible,http://mysite.org,19 +97,bad,https://yourblog.net,32 +2,good,http://mysite.org,3 +96,bad,https://github.com,35 +67,yes,http://forum.com,15 +37,fine,https://google.com,74 +57,no,https://google.com,62 +50,excellent,http://forum.com,21 +40,terrible,https://google.com,29 +68,good,https://shoponline.com,63 +51,maybe,http://news.com,35 +0,great,http://example.com,96 +71,great,http://mysite.org,46 +33,yes,http://news.com,23 +51,great,https://videosite.com,21 +16,excellent,http://example.com,69 +88,no,https://github.com,73 +7,maybe,https://github.com,76 +33,terrible,https://musicstream.com,39 +92,great,https://yourblog.net,47 +57,maybe,https://videosite.com,32 +37,ok,https://videosite.com,20 +12,maybe,https://musicstream.com,84 +72,bad,https://google.com,44 +35,ok,http://mysite.org,4 +17,yes,http://mysite.org,22 +62,excellent,http://news.com,35 +33,bad,https://yourblog.net,66 +45,excellent,https://shoponline.com,38 +99,fine,https://yourblog.net,93 +83,terrible,https://videosite.com,55 +61,fine,https://videosite.com,56 +91,yes,http://forum.com,72 +91,yes,http://news.com,15 +89,fine,http://example.com,17 +32,ok,http://example.com,86 +50,bad,http://mysite.org,65 +12,fine,https://musicstream.com,30 +45,no,http://news.com,75 +42,terrible,https://shoponline.com,95 +48,excellent,http://mysite.org,50 +30,terrible,https://yourblog.net,34 +54,good,http://forum.com,58 +2,excellent,http://mysite.org,89 +15,no,http://news.com,61 +88,fine,https://google.com,76 +96,no,https://musicstream.com,26 +98,fine,https://yourblog.net,18 +52,maybe,https://videosite.com,17 +34,great,https://yourblog.net,20 +80,ok,https://shoponline.com,70 +13,ok,http://news.com,17 +2,terrible,http://forum.com,3 +86,good,http://news.com,68 +51,bad,http://example.com,7 +29,maybe,https://videosite.com,85 +5,no,http://mysite.org,53 +90,ok,http://forum.com,33 +80,fine,https://google.com,62 +52,yes,http://forum.com,2 +38,yes,http://news.com,90 +96,ok,https://videosite.com,9 +13,ok,https://github.com,98 +18,maybe,http://example.com,59 +22,yes,https://videosite.com,40 +36,great,http://example.com,24 +66,terrible,https://shoponline.com,46 +99,maybe,http://example.com,92 +85,good,http://forum.com,96 +14,fine,https://google.com,68 +7,good,http://news.com,75 +37,no,https://shoponline.com,47 +86,excellent,https://yourblog.net,100 +70,no,http://mysite.org,7 +40,yes,http://mysite.org,36 +92,good,https://shoponline.com,92 +79,excellent,http://example.com,80 +58,no,https://yourblog.net,99 +28,yes,https://yourblog.net,36 +81,great,http://mysite.org,36 +14,excellent,https://musicstream.com,23 +18,no,https://musicstream.com,54 +17,great,https://musicstream.com,3 +21,great,https://shoponline.com,15 +10,terrible,https://musicstream.com,68 +94,terrible,http://example.com,13 +2,good,https://google.com,55 +76,yes,https://musicstream.com,84 +10,maybe,https://google.com,99 +97,yes,http://forum.com,42 +63,excellent,https://videosite.com,11 +13,ok,http://example.com,87 +76,great,http://news.com,40 +75,good,https://yourblog.net,98 +35,great,https://shoponline.com,94 +71,maybe,http://mysite.org,6 +34,no,http://forum.com,59 +29,fine,http://example.com,4 +1,yes,http://mysite.org,27 +74,maybe,https://shoponline.com,14 +26,great,http://news.com,84 +30,no,https://videosite.com,1 +21,excellent,https://google.com,81 +74,yes,http://example.com,24 +23,no,https://videosite.com,88 +46,good,https://musicstream.com,77 +62,ok,https://shoponline.com,97 +37,ok,https://yourblog.net,36 +48,excellent,https://yourblog.net,4 +27,excellent,https://videosite.com,33 +24,terrible,https://google.com,17 +1,yes,http://news.com,28 +57,ok,http://example.com,93 +84,good,https://yourblog.net,88 +92,great,https://musicstream.com,61 +64,yes,https://musicstream.com,85 +94,ok,https://yourblog.net,96 +67,ok,http://forum.com,29 +66,great,https://github.com,67 +91,bad,http://example.com,19 +95,great,http://mysite.org,0 +68,ok,http://example.com,67 +3,excellent,https://github.com,49 +61,maybe,https://yourblog.net,100 +19,bad,https://videosite.com,100 +58,excellent,http://forum.com,25 +45,ok,https://yourblog.net,57 +43,maybe,https://shoponline.com,52 +60,terrible,http://example.com,97 +19,bad,https://yourblog.net,76 +26,fine,https://musicstream.com,76 +51,excellent,https://videosite.com,88 +84,fine,http://mysite.org,31 +99,great,https://github.com,30 +60,terrible,http://forum.com,48 +28,ok,https://yourblog.net,58 +23,terrible,https://musicstream.com,37 +41,great,https://shoponline.com,1 +18,ok,https://github.com,6 +39,ok,http://mysite.org,65 +14,terrible,https://google.com,96 +86,fine,http://forum.com,15 +24,yes,https://yourblog.net,1 +86,good,https://github.com,9 +20,good,http://news.com,3 +94,maybe,https://github.com,52 +19,yes,http://forum.com,88 +0,great,https://google.com,37 +5,ok,https://yourblog.net,69 +29,terrible,https://google.com,56 +26,great,https://musicstream.com,89 +64,fine,https://yourblog.net,5 +51,yes,https://yourblog.net,15 +14,excellent,https://google.com,89 +48,ok,https://github.com,60 +77,great,https://musicstream.com,80 +28,bad,https://musicstream.com,46 +8,no,https://videosite.com,32 +48,great,https://google.com,9 +91,ok,https://github.com,12 +82,great,https://videosite.com,2 +99,terrible,http://forum.com,46 +82,maybe,http://forum.com,21 +73,good,http://forum.com,51 +32,bad,https://google.com,30 +52,maybe,http://mysite.org,72 +53,maybe,https://github.com,100 +100,bad,http://example.com,53 +8,ok,http://example.com,28 +62,bad,http://example.com,56 +61,yes,https://shoponline.com,17 +44,bad,http://mysite.org,32 +54,good,https://shoponline.com,16 +4,bad,http://forum.com,75 +65,excellent,http://news.com,4 +72,yes,https://google.com,25 +68,ok,https://shoponline.com,72 +81,excellent,https://yourblog.net,19 +99,good,https://shoponline.com,9 +21,ok,http://news.com,86 +14,maybe,https://videosite.com,83 +63,bad,http://example.com,20 +14,bad,http://example.com,66 +9,no,https://shoponline.com,61 +53,fine,https://shoponline.com,6 +6,no,https://github.com,86 +69,good,https://videosite.com,23 +59,fine,https://musicstream.com,30 +35,maybe,https://github.com,44 +87,fine,https://musicstream.com,17 +55,excellent,https://musicstream.com,82 +22,fine,https://yourblog.net,88 +65,no,https://musicstream.com,22 +90,great,https://google.com,14 +49,good,https://github.com,77 +84,great,https://musicstream.com,10 +31,great,https://github.com,82 +48,terrible,http://news.com,74 +12,fine,http://mysite.org,74 +13,great,http://forum.com,28 +92,no,https://videosite.com,81 +6,no,https://videosite.com,100 +0,good,http://news.com,73 +33,good,https://google.com,38 +51,terrible,https://shoponline.com,17 +72,terrible,https://yourblog.net,52 +47,bad,https://shoponline.com,59 +41,ok,https://yourblog.net,98 +68,great,https://videosite.com,94 +77,good,http://news.com,87 +39,ok,http://example.com,89 +47,excellent,http://example.com,56 +31,bad,http://mysite.org,56 +53,fine,https://shoponline.com,18 +8,yes,https://github.com,63 +99,yes,http://example.com,70 +7,ok,http://example.com,85 +35,bad,https://shoponline.com,14 +73,terrible,https://yourblog.net,84 +26,no,https://musicstream.com,84 +43,bad,https://videosite.com,44 +10,great,https://google.com,48 +53,terrible,https://github.com,55 +58,good,http://forum.com,14 +27,fine,http://news.com,67 +73,ok,http://example.com,9 +28,ok,https://google.com,84 +78,maybe,https://yourblog.net,59 +79,great,https://shoponline.com,64 +16,ok,https://yourblog.net,67 +75,yes,https://musicstream.com,3 +33,good,https://shoponline.com,12 +78,great,https://yourblog.net,30 +90,yes,https://videosite.com,17 +29,ok,http://example.com,40 +94,good,http://example.com,88 +30,maybe,https://videosite.com,93 +57,no,https://google.com,35 +50,no,https://yourblog.net,55 +65,maybe,https://videosite.com,47 +85,maybe,https://google.com,74 +5,maybe,https://yourblog.net,4 +16,terrible,https://google.com,50 +69,yes,https://musicstream.com,45 +47,terrible,http://example.com,4 +27,ok,http://example.com,60 +17,yes,http://example.com,43 +8,yes,https://musicstream.com,98 +75,no,https://github.com,78 +41,great,https://google.com,34 +88,bad,http://example.com,78 +94,good,http://news.com,41 +26,fine,http://forum.com,37 +23,excellent,https://shoponline.com,72 +76,fine,https://yourblog.net,97 +72,terrible,https://shoponline.com,56 +57,fine,https://videosite.com,80 +42,excellent,https://musicstream.com,40 +96,fine,https://shoponline.com,69 +13,fine,https://yourblog.net,22 +2,fine,https://shoponline.com,43 +39,excellent,https://yourblog.net,41 +18,excellent,http://example.com,55 +14,no,https://google.com,52 +30,no,https://yourblog.net,44 +42,good,https://videosite.com,37 +98,yes,https://videosite.com,44 +77,ok,https://musicstream.com,48 +39,ok,https://yourblog.net,41 +60,yes,http://news.com,41 +30,good,https://musicstream.com,61 +33,fine,https://github.com,98 +25,fine,https://videosite.com,45 +43,yes,http://mysite.org,29 +69,maybe,https://musicstream.com,9 +56,bad,https://github.com,83 +29,ok,https://yourblog.net,43 +57,no,https://musicstream.com,99 +72,bad,http://mysite.org,33 +0,terrible,https://videosite.com,70 +43,ok,https://shoponline.com,79 +97,terrible,http://example.com,2 +45,bad,https://musicstream.com,98 +52,no,https://google.com,11 +69,bad,https://videosite.com,14 +52,yes,https://google.com,17 +89,fine,https://github.com,36 +6,yes,https://videosite.com,67 +72,excellent,http://mysite.org,68 +0,maybe,https://google.com,24 +95,yes,https://shoponline.com,62 +40,maybe,https://yourblog.net,14 +41,no,https://videosite.com,87 +58,fine,https://yourblog.net,43 +27,no,http://news.com,49 +46,great,http://example.com,1 +91,maybe,http://news.com,58 +34,no,https://github.com,19 +63,no,http://example.com,71 +100,good,https://musicstream.com,94 +81,yes,https://shoponline.com,46 +95,great,http://forum.com,5 +84,yes,https://shoponline.com,45 +72,excellent,https://github.com,73 +40,no,https://videosite.com,6 +6,excellent,http://news.com,10 +62,great,https://musicstream.com,82 +76,terrible,http://forum.com,4 +13,no,http://mysite.org,86 +36,great,http://example.com,83 +67,maybe,https://yourblog.net,43 +22,excellent,http://example.com,35 +18,great,https://google.com,15 +65,terrible,https://github.com,30 +44,fine,http://forum.com,18 +3,excellent,http://forum.com,37 +10,bad,http://forum.com,65 +17,ok,https://shoponline.com,66 +3,maybe,https://shoponline.com,13 +51,maybe,https://yourblog.net,28 +42,bad,http://news.com,23 +70,excellent,https://yourblog.net,91 +94,bad,https://videosite.com,20 +5,terrible,http://example.com,85 +21,excellent,https://yourblog.net,66 +71,fine,https://yourblog.net,59 +33,terrible,http://news.com,16 +36,bad,https://shoponline.com,86 +45,terrible,https://github.com,94 +62,excellent,https://github.com,42 +80,terrible,http://mysite.org,49 +81,bad,http://news.com,41 +68,bad,http://forum.com,54 +80,bad,https://shoponline.com,15 +63,good,https://videosite.com,34 +67,ok,http://example.com,59 +40,terrible,https://google.com,55 +34,bad,https://yourblog.net,49 +58,no,https://yourblog.net,37 +70,terrible,https://yourblog.net,19 +90,great,http://example.com,78 +5,maybe,https://github.com,96 +34,bad,http://example.com,100 +21,ok,http://news.com,32 +0,no,https://musicstream.com,92 +44,yes,https://musicstream.com,27 +71,bad,https://videosite.com,79 +6,terrible,http://mysite.org,46 +72,excellent,https://musicstream.com,50 +20,maybe,https://google.com,37 +56,great,http://mysite.org,22 +35,maybe,https://shoponline.com,18 +40,terrible,http://mysite.org,33 +40,yes,http://forum.com,39 +95,excellent,https://google.com,58 +31,great,https://musicstream.com,40 +45,good,http://forum.com,46 +59,excellent,http://example.com,44 +11,no,https://videosite.com,33 +78,terrible,https://google.com,92 +70,terrible,https://videosite.com,18 +38,excellent,https://videosite.com,97 +21,yes,http://forum.com,22 +30,fine,http://example.com,46 +64,fine,https://github.com,83 +22,no,https://shoponline.com,92 +14,good,http://forum.com,90 +93,maybe,https://google.com,22 +68,no,https://videosite.com,79 +13,maybe,https://videosite.com,47 +98,great,http://news.com,16 +3,great,https://videosite.com,72 +90,good,https://yourblog.net,78 +85,terrible,http://forum.com,92 +1,fine,https://videosite.com,74 +90,yes,http://forum.com,90 +86,yes,http://example.com,31 +40,terrible,https://github.com,84 +71,yes,https://shoponline.com,69 +4,terrible,https://shoponline.com,15 +74,excellent,http://news.com,97 +97,great,https://github.com,79 +67,great,https://videosite.com,76 +94,bad,https://github.com,51 +26,good,https://github.com,33 +52,fine,https://musicstream.com,17 +43,yes,https://github.com,12 +71,excellent,https://shoponline.com,53 +51,no,http://mysite.org,75 +46,excellent,https://shoponline.com,84 +84,excellent,https://videosite.com,79 +33,maybe,http://mysite.org,45 +17,yes,https://yourblog.net,71 +0,maybe,https://shoponline.com,32 +77,no,https://google.com,24 +5,fine,http://news.com,51 +20,great,https://shoponline.com,58 +26,fine,https://google.com,61 +11,terrible,http://forum.com,85 +14,fine,https://github.com,84 +54,ok,https://google.com,60 +7,excellent,https://google.com,47 +53,no,https://shoponline.com,38 +56,good,https://yourblog.net,35 +60,bad,https://shoponline.com,63 +62,great,https://videosite.com,21 +58,maybe,https://yourblog.net,19 +59,good,https://yourblog.net,11 +40,bad,https://github.com,61 +4,yes,http://example.com,81 +70,great,https://yourblog.net,6 +53,great,https://yourblog.net,37 +50,no,http://example.com,72 +97,ok,https://google.com,16 +83,fine,https://yourblog.net,84 +44,maybe,http://forum.com,38 +77,yes,https://github.com,55 +1,no,https://musicstream.com,22 +35,no,https://google.com,99 +88,excellent,https://yourblog.net,45 +62,ok,http://news.com,16 +51,fine,http://news.com,93 +17,fine,https://musicstream.com,44 +44,maybe,https://videosite.com,32 +6,terrible,https://videosite.com,39 +79,terrible,http://example.com,89 +41,great,https://shoponline.com,95 +63,yes,http://forum.com,92 +34,yes,http://example.com,28 +35,excellent,https://musicstream.com,25 +95,bad,https://shoponline.com,22 +47,maybe,http://example.com,50 +5,terrible,https://musicstream.com,6 +37,fine,http://forum.com,92 +94,good,https://github.com,33 +14,no,http://example.com,95 +63,excellent,http://forum.com,36 +24,fine,https://videosite.com,98 +7,fine,https://yourblog.net,71 +48,good,http://mysite.org,34 +32,fine,https://yourblog.net,31 +48,great,http://news.com,14 +35,no,http://example.com,77 +72,fine,https://google.com,37 +55,excellent,https://google.com,46 +63,great,http://mysite.org,60 +1,excellent,https://github.com,44 +37,fine,https://shoponline.com,72 +97,no,https://yourblog.net,88 +76,excellent,http://mysite.org,11 +88,ok,https://github.com,92 +83,no,http://news.com,44 +44,fine,https://videosite.com,64 +66,fine,https://google.com,87 +31,ok,http://mysite.org,64 +20,good,https://yourblog.net,69 +68,fine,http://forum.com,63 +41,terrible,http://example.com,47 +81,maybe,https://shoponline.com,33 +60,maybe,http://mysite.org,59 +26,good,http://news.com,87 +1,excellent,https://google.com,84 +79,ok,http://example.com,56 +72,excellent,http://mysite.org,64 +0,good,https://videosite.com,1 +97,no,http://news.com,91 +34,terrible,http://forum.com,36 diff --git a/be/test/olap/rowset/segment_v2/inverted_index/data/data2.csv b/be/test/olap/rowset/segment_v2/inverted_index/data/data2.csv new file mode 100644 index 00000000000000..0c8a15bafdd575 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/inverted_index/data/data2.csv @@ -0,0 +1,1000 @@ +37,no,http://example.com,34 +59,ok,https://shoponline.com,9 +44,ok,http://news.com,40 +64,no,https://github.com,37 +61,no,https://musicstream.com,42 +68,great,http://mysite.org,49 +23,good,http://example.com,90 +49,fine,https://yourblog.net,23 +34,excellent,https://musicstream.com,69 +23,maybe,https://yourblog.net,15 +19,great,http://example.com,61 +68,excellent,https://github.com,78 +65,yes,https://shoponline.com,67 +84,excellent,https://videosite.com,8 +45,terrible,http://news.com,96 +13,bad,https://videosite.com,100 +8,great,http://mysite.org,66 +15,fine,http://news.com,18 +35,bad,https://videosite.com,27 +42,bad,https://videosite.com,0 +100,fine,http://forum.com,89 +94,yes,https://shoponline.com,28 +26,good,https://google.com,6 +55,yes,https://google.com,0 +66,excellent,https://google.com,30 +78,terrible,https://github.com,57 +31,excellent,http://news.com,61 +86,fine,http://mysite.org,30 +8,maybe,https://shoponline.com,15 +43,yes,http://mysite.org,57 +79,bad,http://forum.com,26 +28,no,http://mysite.org,64 +29,good,http://mysite.org,25 +17,excellent,https://shoponline.com,87 +72,good,http://forum.com,33 +89,excellent,https://google.com,100 +7,bad,https://google.com,22 +33,excellent,https://github.com,23 +42,fine,https://shoponline.com,79 +96,fine,https://github.com,94 +91,maybe,http://news.com,61 +53,good,http://forum.com,12 +96,yes,http://news.com,33 +20,bad,https://shoponline.com,9 +86,fine,https://musicstream.com,48 +76,maybe,https://google.com,38 +33,yes,https://videosite.com,35 +73,ok,https://videosite.com,40 +41,no,http://news.com,96 +15,bad,http://example.com,44 +18,excellent,https://shoponline.com,11 +46,excellent,https://yourblog.net,71 +87,bad,http://news.com,37 +50,yes,http://news.com,94 +80,excellent,https://musicstream.com,91 +95,maybe,https://google.com,45 +48,terrible,https://github.com,58 +91,ok,http://news.com,90 +42,yes,http://mysite.org,45 +27,excellent,https://github.com,50 +11,ok,https://github.com,61 +61,excellent,https://shoponline.com,54 +1,bad,http://mysite.org,20 +9,yes,http://example.com,30 +81,good,http://news.com,47 +24,ok,https://yourblog.net,52 +1,great,https://yourblog.net,34 +78,ok,http://mysite.org,51 +45,fine,http://news.com,46 +46,excellent,http://mysite.org,63 +18,bad,https://musicstream.com,9 +88,fine,https://github.com,65 +45,terrible,https://videosite.com,36 +79,no,http://forum.com,47 +59,great,https://videosite.com,20 +9,good,http://example.com,38 +17,maybe,https://yourblog.net,55 +52,terrible,http://forum.com,99 +32,maybe,https://videosite.com,12 +81,great,http://example.com,10 +27,yes,https://shoponline.com,5 +29,maybe,https://shoponline.com,32 +80,maybe,https://musicstream.com,4 +15,yes,http://forum.com,26 +58,maybe,https://github.com,39 +13,no,https://shoponline.com,13 +76,great,http://example.com,97 +15,excellent,https://yourblog.net,6 +34,good,http://mysite.org,57 +79,no,https://musicstream.com,85 +49,terrible,https://github.com,90 +92,great,https://google.com,60 +41,great,https://shoponline.com,27 +54,good,http://news.com,2 +24,maybe,http://news.com,57 +3,fine,https://musicstream.com,32 +42,bad,https://shoponline.com,48 +67,good,http://mysite.org,67 +86,ok,https://yourblog.net,6 +19,ok,https://google.com,23 +60,great,http://forum.com,42 +20,good,https://musicstream.com,21 +96,no,http://news.com,94 +4,good,https://videosite.com,85 +4,no,https://github.com,87 +28,bad,https://google.com,21 +85,excellent,https://musicstream.com,78 +80,great,http://mysite.org,18 +22,fine,https://videosite.com,19 +76,maybe,https://github.com,5 +55,bad,https://yourblog.net,49 +50,ok,https://shoponline.com,79 +86,terrible,https://google.com,84 +52,excellent,https://yourblog.net,72 +23,ok,https://musicstream.com,30 +15,excellent,https://videosite.com,23 +1,terrible,https://google.com,23 +59,terrible,http://mysite.org,71 +66,fine,https://musicstream.com,49 +37,bad,http://forum.com,18 +35,ok,https://videosite.com,53 +83,bad,https://google.com,90 +54,excellent,http://news.com,7 +52,fine,http://example.com,71 +51,fine,https://musicstream.com,5 +88,good,http://example.com,5 +0,terrible,https://yourblog.net,91 +72,great,https://videosite.com,34 +8,no,http://forum.com,76 +97,bad,https://yourblog.net,20 +40,fine,https://yourblog.net,76 +29,terrible,http://mysite.org,29 +5,yes,https://shoponline.com,6 +66,maybe,https://musicstream.com,97 +65,fine,https://yourblog.net,70 +3,terrible,http://mysite.org,96 +56,excellent,http://example.com,7 +93,yes,https://google.com,33 +11,terrible,https://github.com,87 +10,yes,http://forum.com,9 +28,terrible,http://mysite.org,40 +85,bad,https://google.com,71 +47,maybe,https://yourblog.net,45 +56,maybe,https://videosite.com,73 +30,terrible,http://mysite.org,22 +66,maybe,http://mysite.org,17 +34,good,https://google.com,56 +0,excellent,http://example.com,54 +24,yes,https://github.com,94 +50,good,https://videosite.com,70 +39,good,http://example.com,62 +41,terrible,http://forum.com,50 +26,great,http://example.com,79 +100,yes,https://google.com,30 +37,great,https://shoponline.com,45 +25,terrible,http://forum.com,72 +82,bad,https://yourblog.net,66 +62,ok,http://news.com,5 +67,excellent,https://musicstream.com,65 +26,maybe,http://mysite.org,37 +77,yes,http://forum.com,100 +32,terrible,https://musicstream.com,49 +20,great,https://yourblog.net,5 +45,maybe,https://google.com,30 +76,good,http://mysite.org,16 +79,great,https://github.com,91 +40,good,https://yourblog.net,83 +89,no,https://yourblog.net,92 +45,yes,http://mysite.org,23 +47,great,http://mysite.org,41 +80,terrible,https://musicstream.com,24 +49,good,https://shoponline.com,100 +85,terrible,https://yourblog.net,3 +31,great,https://github.com,53 +85,fine,https://videosite.com,53 +85,terrible,http://mysite.org,100 +42,bad,https://github.com,73 +93,no,http://forum.com,32 +64,excellent,http://example.com,92 +79,terrible,https://videosite.com,83 +83,bad,http://mysite.org,15 +41,yes,https://shoponline.com,73 +85,yes,http://forum.com,63 +43,yes,http://news.com,23 +27,bad,https://musicstream.com,57 +87,yes,https://google.com,82 +22,fine,https://musicstream.com,0 +68,good,https://shoponline.com,92 +79,excellent,https://videosite.com,39 +68,good,http://forum.com,18 +77,yes,https://github.com,13 +38,great,https://google.com,56 +91,maybe,https://yourblog.net,7 +59,bad,http://news.com,69 +24,great,http://forum.com,53 +91,great,http://news.com,12 +59,bad,https://musicstream.com,15 +14,excellent,https://musicstream.com,40 +79,maybe,http://mysite.org,96 +4,yes,http://forum.com,4 +15,bad,http://news.com,34 +87,good,http://news.com,79 +21,bad,https://github.com,21 +62,terrible,https://google.com,34 +54,ok,https://google.com,24 +57,no,https://videosite.com,9 +40,good,https://shoponline.com,60 +97,maybe,https://google.com,38 +99,no,https://yourblog.net,64 +24,no,https://shoponline.com,82 +17,fine,http://example.com,46 +55,no,https://google.com,47 +47,fine,http://forum.com,54 +17,yes,https://google.com,44 +28,ok,http://news.com,46 +57,excellent,https://google.com,79 +70,yes,https://google.com,36 +46,excellent,https://shoponline.com,71 +30,terrible,https://videosite.com,16 +55,excellent,https://shoponline.com,69 +90,maybe,https://videosite.com,73 +57,terrible,https://videosite.com,20 +7,ok,http://news.com,23 +53,yes,http://news.com,73 +16,bad,https://github.com,56 +35,ok,https://musicstream.com,97 +82,excellent,https://videosite.com,66 +22,yes,https://videosite.com,59 +70,no,http://mysite.org,57 +32,yes,https://shoponline.com,39 +78,yes,https://github.com,33 +70,ok,http://news.com,0 +38,fine,https://shoponline.com,58 +93,no,https://shoponline.com,34 +51,excellent,https://musicstream.com,11 +77,bad,http://example.com,52 +78,ok,http://forum.com,65 +30,bad,https://videosite.com,20 +41,ok,https://github.com,9 +68,yes,https://videosite.com,17 +88,terrible,http://news.com,99 +15,terrible,https://github.com,55 +38,ok,http://forum.com,78 +8,maybe,http://mysite.org,67 +77,bad,http://forum.com,46 +98,terrible,http://news.com,93 +42,maybe,https://videosite.com,57 +23,yes,https://musicstream.com,51 +79,great,http://mysite.org,27 +83,fine,http://example.com,8 +4,yes,http://news.com,48 +99,yes,http://forum.com,51 +40,no,https://google.com,59 +5,terrible,http://example.com,80 +33,great,https://yourblog.net,4 +36,maybe,https://videosite.com,36 +66,maybe,https://shoponline.com,30 +46,maybe,https://musicstream.com,30 +36,great,http://news.com,3 +79,no,http://forum.com,31 +77,excellent,https://google.com,30 +66,terrible,https://google.com,19 +12,yes,https://google.com,28 +13,terrible,https://yourblog.net,40 +46,yes,http://example.com,59 +91,terrible,https://videosite.com,84 +99,maybe,https://videosite.com,29 +100,fine,https://yourblog.net,81 +35,no,http://mysite.org,19 +11,no,https://google.com,28 +71,terrible,http://forum.com,98 +91,great,https://musicstream.com,87 +89,bad,https://videosite.com,28 +79,yes,https://musicstream.com,88 +48,fine,https://yourblog.net,4 +26,ok,https://yourblog.net,66 +77,great,https://shoponline.com,8 +80,fine,https://musicstream.com,37 +63,fine,http://forum.com,100 +95,excellent,http://news.com,90 +65,great,http://mysite.org,71 +21,yes,http://mysite.org,76 +84,terrible,https://shoponline.com,100 +38,ok,https://videosite.com,80 +17,maybe,https://shoponline.com,62 +51,bad,http://mysite.org,18 +71,excellent,https://github.com,60 +77,yes,https://musicstream.com,18 +1,bad,https://musicstream.com,55 +48,terrible,https://google.com,80 +15,bad,https://shoponline.com,68 +68,ok,https://videosite.com,7 +54,bad,https://musicstream.com,72 +79,excellent,https://videosite.com,53 +69,maybe,http://mysite.org,52 +66,bad,https://github.com,18 +86,no,https://shoponline.com,27 +30,great,https://google.com,90 +44,maybe,http://forum.com,80 +90,maybe,https://google.com,49 +84,good,https://github.com,18 +51,good,http://forum.com,69 +68,bad,http://mysite.org,17 +50,good,https://github.com,8 +39,great,http://news.com,45 +53,bad,https://shoponline.com,75 +45,yes,https://github.com,59 +70,bad,http://news.com,4 +83,ok,http://forum.com,98 +33,great,https://shoponline.com,48 +83,bad,https://yourblog.net,97 +34,yes,http://example.com,41 +78,yes,https://shoponline.com,70 +49,yes,http://mysite.org,9 +59,maybe,http://mysite.org,88 +75,good,http://example.com,96 +94,fine,http://forum.com,57 +53,good,http://news.com,66 +18,fine,https://musicstream.com,54 +81,yes,http://mysite.org,0 +63,fine,https://musicstream.com,57 +9,good,http://example.com,35 +93,bad,http://news.com,84 +28,no,https://videosite.com,4 +66,ok,http://example.com,42 +12,terrible,https://github.com,20 +95,yes,http://news.com,82 +58,excellent,https://yourblog.net,26 +21,terrible,https://musicstream.com,16 +23,excellent,https://github.com,56 +21,bad,https://github.com,33 +90,excellent,http://forum.com,65 +39,yes,https://google.com,76 +18,good,http://mysite.org,81 +44,good,http://example.com,71 +98,fine,http://news.com,21 +20,ok,http://news.com,62 +25,bad,https://shoponline.com,72 +2,fine,https://yourblog.net,37 +47,terrible,https://videosite.com,3 +18,no,https://yourblog.net,54 +32,bad,http://news.com,58 +22,terrible,https://videosite.com,54 +56,yes,https://shoponline.com,17 +56,no,http://forum.com,96 +5,yes,https://github.com,10 +12,bad,http://example.com,41 +22,no,http://example.com,80 +45,good,https://shoponline.com,35 +42,yes,http://mysite.org,24 +36,ok,https://google.com,80 +28,excellent,http://example.com,4 +5,good,https://shoponline.com,81 +68,great,https://yourblog.net,0 +20,yes,http://example.com,30 +75,good,https://yourblog.net,45 +81,no,http://mysite.org,88 +8,no,https://github.com,78 +41,terrible,https://github.com,59 +10,terrible,https://videosite.com,38 +42,fine,http://news.com,91 +27,terrible,https://shoponline.com,2 +9,terrible,http://mysite.org,30 +57,yes,http://forum.com,29 +67,excellent,https://github.com,92 +67,great,https://videosite.com,21 +88,bad,https://google.com,63 +86,terrible,https://github.com,30 +16,fine,http://news.com,80 +77,good,https://videosite.com,95 +16,ok,http://mysite.org,20 +8,terrible,http://example.com,34 +71,excellent,https://google.com,59 +31,maybe,https://shoponline.com,74 +57,no,http://mysite.org,21 +73,fine,https://shoponline.com,49 +88,great,http://forum.com,61 +89,fine,http://mysite.org,38 +84,fine,https://yourblog.net,24 +50,excellent,https://videosite.com,6 +82,good,https://github.com,96 +72,bad,https://yourblog.net,18 +55,no,http://example.com,22 +100,excellent,http://example.com,94 +50,maybe,http://mysite.org,14 +88,fine,http://forum.com,13 +56,ok,https://musicstream.com,69 +46,excellent,https://musicstream.com,14 +22,yes,https://github.com,57 +19,maybe,https://google.com,19 +68,excellent,http://example.com,11 +23,yes,https://videosite.com,64 +19,bad,https://shoponline.com,72 +91,yes,https://github.com,41 +99,good,http://mysite.org,91 +39,bad,https://github.com,46 +10,great,http://example.com,82 +72,bad,http://example.com,92 +63,bad,https://videosite.com,14 +33,great,http://example.com,37 +18,great,https://github.com,3 +92,ok,http://example.com,43 +37,excellent,http://mysite.org,92 +31,maybe,https://musicstream.com,24 +34,fine,https://yourblog.net,73 +27,good,http://mysite.org,32 +20,excellent,https://google.com,42 +56,ok,http://example.com,67 +80,fine,https://videosite.com,71 +94,great,http://forum.com,33 +89,no,https://videosite.com,19 +69,no,https://github.com,46 +10,excellent,https://yourblog.net,55 +31,no,https://musicstream.com,47 +76,bad,http://mysite.org,39 +77,excellent,https://yourblog.net,44 +28,maybe,https://google.com,14 +63,ok,http://news.com,69 +98,bad,https://musicstream.com,3 +52,maybe,https://google.com,70 +42,fine,http://mysite.org,90 +45,no,http://forum.com,69 +69,great,https://github.com,67 +41,maybe,http://mysite.org,100 +25,terrible,http://forum.com,55 +88,no,https://videosite.com,50 +27,good,http://example.com,45 +18,excellent,https://videosite.com,22 +52,ok,http://mysite.org,48 +25,bad,https://google.com,58 +42,maybe,https://musicstream.com,55 +17,no,http://forum.com,87 +57,maybe,https://shoponline.com,23 +5,terrible,https://musicstream.com,65 +15,maybe,http://news.com,63 +74,good,https://shoponline.com,45 +60,terrible,http://news.com,91 +37,fine,http://news.com,97 +57,good,https://github.com,6 +28,no,https://yourblog.net,15 +3,excellent,https://musicstream.com,60 +63,bad,https://yourblog.net,94 +10,terrible,http://example.com,31 +93,no,https://google.com,3 +97,yes,https://shoponline.com,30 +40,ok,http://mysite.org,51 +52,great,https://google.com,80 +25,maybe,https://yourblog.net,85 +48,no,https://musicstream.com,43 +39,bad,http://mysite.org,34 +71,no,https://shoponline.com,90 +30,great,https://yourblog.net,64 +28,maybe,https://shoponline.com,84 +82,excellent,http://example.com,27 +15,bad,http://mysite.org,84 +5,good,http://mysite.org,18 +32,good,http://example.com,58 +97,yes,https://google.com,86 +35,bad,https://shoponline.com,30 +4,excellent,https://videosite.com,31 +10,terrible,https://videosite.com,54 +28,terrible,https://musicstream.com,43 +45,excellent,https://musicstream.com,87 +57,great,http://mysite.org,67 +15,excellent,http://forum.com,5 +91,maybe,http://forum.com,78 +29,good,https://shoponline.com,89 +50,ok,http://mysite.org,4 +98,yes,http://example.com,41 +20,no,https://videosite.com,68 +7,terrible,http://mysite.org,57 +59,yes,http://example.com,26 +87,good,https://github.com,77 +89,fine,http://forum.com,88 +37,no,https://github.com,19 +52,maybe,https://videosite.com,65 +43,maybe,http://example.com,54 +17,excellent,http://forum.com,67 +0,yes,http://example.com,6 +46,terrible,http://news.com,40 +92,great,http://news.com,54 +7,great,https://github.com,66 +94,terrible,https://videosite.com,9 +55,no,https://github.com,8 +99,ok,https://videosite.com,50 +69,fine,https://videosite.com,79 +56,no,https://google.com,87 +53,yes,https://videosite.com,15 +15,ok,http://example.com,59 +87,excellent,http://forum.com,63 +99,fine,https://shoponline.com,33 +59,maybe,https://google.com,90 +38,fine,https://videosite.com,67 +73,terrible,https://shoponline.com,52 +81,terrible,https://github.com,1 +65,excellent,http://mysite.org,74 +60,ok,https://musicstream.com,35 +40,excellent,https://yourblog.net,0 +64,no,http://news.com,83 +74,good,http://news.com,45 +93,no,https://google.com,80 +72,terrible,http://mysite.org,16 +28,excellent,http://mysite.org,26 +81,maybe,http://forum.com,20 +5,yes,https://shoponline.com,68 +61,terrible,https://yourblog.net,52 +90,ok,http://mysite.org,26 +97,no,http://mysite.org,6 +44,terrible,https://google.com,83 +49,no,https://github.com,64 +81,great,http://forum.com,30 +67,good,https://videosite.com,93 +66,ok,https://google.com,66 +20,fine,https://yourblog.net,45 +53,excellent,https://musicstream.com,20 +27,terrible,https://github.com,36 +90,yes,https://videosite.com,67 +72,ok,https://shoponline.com,41 +100,no,https://github.com,39 +59,yes,http://mysite.org,43 +98,good,https://musicstream.com,99 +96,bad,http://forum.com,17 +71,great,https://shoponline.com,50 +80,great,http://example.com,19 +30,excellent,https://github.com,85 +70,bad,http://news.com,61 +6,bad,https://shoponline.com,57 +51,yes,https://shoponline.com,51 +63,no,http://example.com,37 +24,great,http://forum.com,0 +10,yes,http://mysite.org,52 +38,maybe,http://mysite.org,41 +79,maybe,http://example.com,61 +99,fine,http://example.com,25 +76,ok,https://musicstream.com,73 +79,no,http://news.com,97 +32,yes,https://github.com,64 +29,fine,https://shoponline.com,98 +34,excellent,http://forum.com,74 +19,fine,http://example.com,37 +11,great,http://mysite.org,57 +48,great,http://mysite.org,70 +29,yes,http://forum.com,52 +69,no,https://shoponline.com,45 +39,no,https://musicstream.com,59 +42,fine,http://forum.com,50 +10,yes,http://forum.com,95 +1,bad,https://yourblog.net,95 +93,terrible,https://musicstream.com,24 +96,ok,http://example.com,70 +75,ok,https://google.com,45 +60,terrible,https://videosite.com,96 +77,excellent,http://mysite.org,69 +30,fine,https://yourblog.net,77 +13,maybe,https://shoponline.com,90 +25,good,https://github.com,41 +59,no,https://videosite.com,32 +43,excellent,https://github.com,12 +32,maybe,https://google.com,17 +94,maybe,https://shoponline.com,29 +5,great,http://example.com,94 +48,great,http://example.com,59 +98,terrible,https://yourblog.net,71 +9,excellent,https://github.com,85 +52,maybe,http://example.com,44 +25,excellent,https://musicstream.com,1 +88,bad,http://mysite.org,3 +83,bad,https://google.com,78 +86,fine,https://google.com,76 +8,maybe,https://yourblog.net,61 +95,great,https://shoponline.com,39 +86,terrible,https://github.com,31 +48,bad,https://videosite.com,36 +14,fine,https://google.com,61 +27,no,http://news.com,97 +96,ok,http://news.com,88 +38,no,https://musicstream.com,38 +86,no,https://shoponline.com,48 +90,excellent,https://yourblog.net,72 +68,maybe,http://example.com,0 +100,good,http://news.com,68 +43,good,https://yourblog.net,12 +83,great,http://mysite.org,64 +84,bad,https://yourblog.net,20 +94,yes,https://github.com,15 +33,excellent,https://musicstream.com,53 +65,bad,http://forum.com,9 +25,yes,http://example.com,30 +8,maybe,https://videosite.com,83 +66,maybe,http://example.com,86 +27,terrible,https://yourblog.net,40 +7,good,http://news.com,29 +21,good,https://google.com,14 +35,maybe,https://videosite.com,45 +46,great,https://videosite.com,38 +18,yes,https://github.com,40 +90,good,https://videosite.com,84 +82,great,http://example.com,20 +5,excellent,http://forum.com,100 +30,terrible,http://example.com,63 +60,no,https://google.com,93 +72,maybe,https://google.com,99 +53,maybe,http://forum.com,52 +87,great,http://forum.com,71 +51,bad,https://shoponline.com,71 +22,good,http://forum.com,89 +87,great,http://forum.com,90 +58,excellent,https://github.com,93 +62,no,https://videosite.com,2 +86,maybe,https://shoponline.com,81 +51,excellent,http://news.com,1 +20,no,http://forum.com,72 +51,no,http://news.com,35 +57,good,https://yourblog.net,41 +71,great,http://news.com,33 +51,fine,https://videosite.com,2 +15,yes,https://musicstream.com,14 +74,yes,http://news.com,64 +41,ok,http://mysite.org,21 +58,fine,https://musicstream.com,29 +0,terrible,http://news.com,77 +94,fine,https://github.com,91 +66,maybe,http://news.com,47 +97,no,http://news.com,18 +25,maybe,https://github.com,26 +26,bad,https://yourblog.net,21 +25,great,https://google.com,33 +17,fine,https://google.com,83 +20,good,http://example.com,40 +3,fine,https://shoponline.com,16 +29,yes,http://news.com,23 +71,excellent,https://github.com,30 +61,ok,https://yourblog.net,24 +14,bad,http://mysite.org,73 +37,ok,http://news.com,52 +92,great,http://news.com,40 +70,bad,https://yourblog.net,29 +39,terrible,https://videosite.com,9 +23,ok,http://example.com,38 +52,excellent,https://yourblog.net,46 +56,ok,https://musicstream.com,76 +83,yes,http://forum.com,96 +1,no,http://news.com,27 +72,excellent,http://mysite.org,12 +75,no,http://forum.com,97 +21,ok,https://yourblog.net,87 +41,yes,http://forum.com,44 +89,excellent,http://mysite.org,40 +46,good,https://google.com,39 +75,terrible,https://shoponline.com,88 +51,terrible,https://shoponline.com,33 +15,terrible,http://mysite.org,20 +93,great,https://yourblog.net,92 +45,excellent,https://musicstream.com,7 +23,excellent,https://google.com,81 +54,no,http://example.com,90 +45,great,https://yourblog.net,33 +67,yes,https://github.com,52 +20,good,https://github.com,44 +39,excellent,https://musicstream.com,60 +20,maybe,https://google.com,20 +16,yes,http://forum.com,19 +52,bad,https://yourblog.net,35 +40,fine,https://google.com,86 +43,ok,https://videosite.com,17 +69,terrible,https://shoponline.com,8 +86,terrible,https://github.com,45 +17,no,https://google.com,29 +53,fine,https://shoponline.com,4 +31,great,https://shoponline.com,5 +51,maybe,https://videosite.com,92 +21,bad,https://videosite.com,0 +96,bad,http://example.com,82 +89,bad,http://forum.com,97 +73,terrible,http://mysite.org,17 +58,bad,https://videosite.com,18 +13,excellent,https://musicstream.com,47 +9,good,https://google.com,3 +46,ok,https://musicstream.com,75 +47,excellent,https://yourblog.net,15 +49,good,https://musicstream.com,16 +47,terrible,https://yourblog.net,60 +63,good,https://musicstream.com,19 +56,good,https://videosite.com,97 +88,great,http://news.com,75 +46,terrible,http://mysite.org,61 +51,ok,https://google.com,90 +67,excellent,http://example.com,56 +4,yes,https://musicstream.com,67 +60,excellent,http://news.com,24 +29,no,https://musicstream.com,38 +37,ok,https://musicstream.com,51 +31,yes,http://mysite.org,66 +4,bad,https://google.com,90 +41,good,http://news.com,64 +19,excellent,https://musicstream.com,19 +79,maybe,https://musicstream.com,56 +100,no,http://news.com,97 +64,yes,https://yourblog.net,8 +45,fine,http://news.com,87 +10,terrible,https://yourblog.net,52 +50,excellent,https://musicstream.com,82 +45,great,http://mysite.org,96 +10,no,https://musicstream.com,0 +28,excellent,http://news.com,77 +100,good,https://shoponline.com,72 +3,fine,http://news.com,68 +87,good,https://shoponline.com,93 +16,ok,http://news.com,65 +41,maybe,http://forum.com,99 +81,maybe,http://mysite.org,33 +1,maybe,http://mysite.org,45 +33,great,https://google.com,64 +73,bad,https://google.com,56 +67,terrible,http://example.com,14 +0,no,https://yourblog.net,8 +25,good,https://google.com,65 +4,ok,http://example.com,74 +89,maybe,https://videosite.com,86 +32,great,http://forum.com,27 +58,no,http://forum.com,91 +64,good,https://github.com,45 +32,bad,https://github.com,92 +31,maybe,http://news.com,12 +70,good,https://yourblog.net,77 +25,bad,https://google.com,8 +25,bad,https://musicstream.com,82 +49,good,http://mysite.org,95 +11,excellent,http://forum.com,39 +28,no,https://yourblog.net,62 +48,maybe,http://example.com,12 +12,excellent,https://shoponline.com,44 +19,excellent,https://shoponline.com,22 +77,yes,https://yourblog.net,94 +24,fine,http://news.com,20 +95,great,http://forum.com,51 +21,maybe,http://example.com,58 +66,no,https://google.com,94 +32,great,https://videosite.com,92 +26,good,https://github.com,54 +52,terrible,https://videosite.com,54 +24,good,http://example.com,89 +67,fine,https://musicstream.com,24 +64,bad,http://news.com,79 +40,bad,https://yourblog.net,76 +2,excellent,https://videosite.com,51 +59,no,http://mysite.org,39 +64,ok,https://yourblog.net,82 +56,ok,http://example.com,75 +82,maybe,https://google.com,87 +14,good,http://news.com,24 +23,yes,http://forum.com,16 +45,great,https://shoponline.com,51 +91,fine,https://google.com,18 +60,bad,https://musicstream.com,64 +33,fine,https://shoponline.com,66 +83,fine,http://mysite.org,40 +7,yes,https://github.com,84 +9,no,https://musicstream.com,90 +51,ok,https://google.com,32 +83,bad,http://example.com,63 +2,terrible,https://videosite.com,2 +76,terrible,https://yourblog.net,43 +6,yes,https://videosite.com,26 +98,great,https://shoponline.com,34 +38,ok,http://example.com,14 +90,fine,http://mysite.org,44 +21,fine,http://forum.com,17 +20,yes,http://mysite.org,11 +47,no,https://shoponline.com,19 +69,bad,https://videosite.com,60 +30,excellent,http://forum.com,48 +43,great,https://yourblog.net,35 +94,yes,https://google.com,54 +97,good,http://forum.com,63 +62,excellent,https://videosite.com,44 +42,ok,https://shoponline.com,14 +11,no,http://example.com,80 +80,no,https://videosite.com,60 +2,excellent,https://musicstream.com,64 +49,maybe,https://videosite.com,18 +48,maybe,http://mysite.org,88 +46,bad,https://musicstream.com,65 +86,excellent,https://yourblog.net,90 +75,ok,https://google.com,54 +81,ok,http://mysite.org,64 +59,good,https://github.com,74 +24,good,http://mysite.org,84 +97,excellent,http://example.com,38 +73,excellent,http://mysite.org,98 +78,bad,http://example.com,57 +37,fine,https://videosite.com,84 +89,maybe,http://news.com,12 +89,bad,https://yourblog.net,36 +63,terrible,http://example.com,5 +91,no,https://videosite.com,68 +52,fine,http://forum.com,26 +34,no,https://shoponline.com,51 +38,no,https://google.com,66 +78,ok,https://shoponline.com,59 +14,great,https://shoponline.com,90 +93,good,http://mysite.org,31 +96,ok,https://shoponline.com,93 +73,maybe,https://shoponline.com,9 +5,ok,https://shoponline.com,37 +73,excellent,http://example.com,49 +32,no,https://github.com,39 +61,bad,http://example.com,62 +20,ok,https://musicstream.com,55 +33,yes,https://shoponline.com,2 +31,terrible,https://google.com,58 +1,bad,http://forum.com,78 +62,great,https://yourblog.net,38 +80,great,http://mysite.org,54 +6,terrible,http://mysite.org,49 +45,yes,https://videosite.com,13 +97,fine,https://shoponline.com,71 +18,no,http://mysite.org,91 +94,bad,https://yourblog.net,52 +4,excellent,http://example.com,92 +95,great,http://forum.com,100 +99,no,https://yourblog.net,36 +13,terrible,https://github.com,55 +36,bad,https://yourblog.net,82 +18,fine,https://videosite.com,7 +43,good,https://github.com,60 +83,excellent,https://google.com,14 +0,fine,https://shoponline.com,34 +66,bad,https://videosite.com,14 +66,no,http://news.com,70 +29,good,http://news.com,65 +83,yes,http://news.com,49 +71,terrible,http://forum.com,85 +52,yes,http://example.com,55 +59,no,http://mysite.org,94 +30,no,http://forum.com,38 +98,good,http://example.com,89 +52,ok,https://yourblog.net,71 +100,bad,http://example.com,73 +43,maybe,https://musicstream.com,56 +56,no,https://yourblog.net,33 +30,yes,http://forum.com,71 +19,maybe,https://google.com,15 +33,ok,http://news.com,69 +84,excellent,https://shoponline.com,6 +75,fine,https://musicstream.com,70 +95,great,https://musicstream.com,16 +5,terrible,https://github.com,90 +90,yes,https://videosite.com,10 +58,excellent,https://yourblog.net,0 +4,bad,https://musicstream.com,30 +54,good,http://example.com,20 +56,excellent,https://videosite.com,48 +65,fine,http://example.com,16 +61,bad,http://example.com,69 +87,maybe,http://forum.com,0 +61,bad,https://github.com,16 +0,maybe,https://google.com,60 +88,terrible,http://mysite.org,24 +17,maybe,https://github.com,81 +61,excellent,https://musicstream.com,39 +84,excellent,https://yourblog.net,30 +51,yes,http://news.com,99 +84,excellent,http://news.com,79 +56,yes,https://videosite.com,51 +23,yes,https://github.com,75 +30,no,https://github.com,92 +81,excellent,https://videosite.com,67 +93,ok,http://example.com,13 +82,good,http://forum.com,75 +100,maybe,http://example.com,37 +52,terrible,https://musicstream.com,26 +90,yes,https://videosite.com,8 +7,yes,http://news.com,5 +35,excellent,https://github.com,90 +16,terrible,http://forum.com,82 +7,terrible,https://github.com,45 +18,great,http://example.com,18 +29,maybe,https://yourblog.net,3 +46,good,http://forum.com,83 +1,maybe,http://example.com,27 +31,fine,https://videosite.com,29 +10,bad,https://musicstream.com,86 +68,no,http://example.com,19 +17,great,http://forum.com,93 +17,good,https://videosite.com,54 +83,bad,https://musicstream.com,51 +89,yes,https://yourblog.net,58 +42,fine,https://github.com,19 +16,yes,https://google.com,93 +72,good,https://videosite.com,53 +56,yes,https://google.com,39 +70,maybe,https://musicstream.com,82 +3,fine,https://shoponline.com,25 +71,great,https://videosite.com,100 +8,yes,http://mysite.org,91 +20,yes,https://videosite.com,88 +40,fine,http://example.com,31 +65,good,http://forum.com,3 +23,yes,http://forum.com,69 +32,ok,http://mysite.org,94 +38,terrible,http://example.com,23 +80,good,https://github.com,63 +28,yes,https://videosite.com,11 +92,bad,http://forum.com,83 +67,maybe,https://musicstream.com,70 +98,maybe,http://forum.com,4 +34,maybe,https://videosite.com,68 +61,great,https://google.com,9 +83,fine,https://yourblog.net,100 +35,bad,https://yourblog.net,65 +16,fine,https://videosite.com,5 +90,yes,http://mysite.org,8 +78,terrible,https://shoponline.com,95 +4,yes,https://google.com,47 +4,good,https://musicstream.com,13 +56,terrible,http://example.com,96 +97,no,http://example.com,95 +77,maybe,http://news.com,15 +88,maybe,https://google.com,60 +0,fine,https://shoponline.com,14 +72,no,https://musicstream.com,69 +61,terrible,http://example.com,31 +24,yes,https://github.com,85 +68,great,http://example.com,97 +50,great,https://videosite.com,19 +6,excellent,https://yourblog.net,91 +81,excellent,https://yourblog.net,98 +22,no,http://mysite.org,45 +67,yes,https://yourblog.net,53 +60,ok,https://yourblog.net,20 +44,good,https://yourblog.net,37 +73,ok,https://github.com,4 +54,good,http://example.com,14 +69,good,https://yourblog.net,39 +59,excellent,http://example.com,15 +53,terrible,https://google.com,29 +56,no,https://musicstream.com,46 +76,ok,https://shoponline.com,64 +2,good,https://musicstream.com,74 +51,excellent,http://forum.com,66 +17,terrible,http://news.com,8 +66,maybe,https://shoponline.com,92 +26,fine,http://mysite.org,51 +45,excellent,http://news.com,50 +92,yes,https://videosite.com,93 +34,terrible,https://google.com,50 +40,yes,https://shoponline.com,73 +50,bad,http://forum.com,10 +38,bad,https://google.com,28 +1,terrible,http://mysite.org,97 +7,fine,https://shoponline.com,18 +36,excellent,https://videosite.com,73 +44,no,https://yourblog.net,10 +60,ok,https://videosite.com,45 +59,ok,https://videosite.com,81 +93,yes,http://news.com,75 +73,no,https://github.com,44 +39,yes,https://musicstream.com,71 +46,excellent,https://musicstream.com,25 +0,maybe,http://mysite.org,82 +65,bad,http://mysite.org,27 +57,no,http://example.com,60 +29,maybe,https://google.com,48 +68,maybe,http://example.com,34 +33,excellent,http://forum.com,70 +12,maybe,https://yourblog.net,63 +83,maybe,http://forum.com,43 +25,yes,https://yourblog.net,74 +58,terrible,https://yourblog.net,79 +31,no,http://example.com,72 +21,great,http://mysite.org,74 +5,no,http://forum.com,41 +44,fine,https://github.com,75 +63,great,https://github.com,72 +9,good,https://github.com,93 +96,maybe,https://yourblog.net,83 +18,yes,http://example.com,14 +13,terrible,https://github.com,80 +30,bad,https://yourblog.net,65 +69,yes,http://news.com,30 +19,yes,http://mysite.org,96 +91,fine,http://example.com,46 +68,maybe,https://github.com,37 +2,terrible,https://yourblog.net,41 +33,ok,https://shoponline.com,39 +54,terrible,http://news.com,60 +60,maybe,http://forum.com,89 +76,great,http://mysite.org,37 +66,bad,https://google.com,16 +63,terrible,https://musicstream.com,60 +44,no,https://yourblog.net,61 +66,great,http://example.com,16 +4,good,https://github.com,67 +17,great,https://shoponline.com,100 +84,good,http://mysite.org,56 +29,maybe,https://musicstream.com,32 +19,fine,https://shoponline.com,51 +36,good,http://mysite.org,77 +37,terrible,http://mysite.org,95 diff --git a/be/test/olap/rowset/segment_v2/inverted_index/index_compaction_with_deleted_term.cpp b/be/test/olap/rowset/segment_v2/inverted_index/index_compaction_with_deleted_term.cpp new file mode 100644 index 00000000000000..af2e2de4aea1b2 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/inverted_index/index_compaction_with_deleted_term.cpp @@ -0,0 +1,583 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +#include "common/exception.h" +#include "common/status.h" +#include "json2pb/json_to_pb.h" +#include "olap/base_compaction.h" +#include "olap/rowset/beta_rowset.h" +#include "olap/rowset/beta_rowset_writer.h" +#include "olap/rowset/rowset_factory.h" +#include "olap/rowset/segment_v2/inverted_index_compaction.h" +#include "olap/rowset/segment_v2/inverted_index_compound_directory.h" +#include "olap/storage_engine.h" + +namespace doris { + +using namespace doris::vectorized; + +constexpr static uint32_t MAX_PATH_LEN = 1024; +constexpr static std::string_view dest_dir = "/ut_dir/inverted_index_test"; +constexpr static std::string_view tmp_dir = "./ut_dir/tmp"; +static int64_t inc_id = 1000; + +struct DataRow { + int key; + std::string word; + std::string url; + int num; +}; + +static std::vector read_data(const std::string file_name) { + std::ifstream file(file_name); + EXPECT_TRUE(file.is_open()); + + std::string line; + std::vector data; + + while (std::getline(file, line)) { + std::stringstream ss(line); + std::string item; + DataRow row; + EXPECT_TRUE(std::getline(ss, item, ',')); + row.key = std::stoi(item); + EXPECT_TRUE(std::getline(ss, item, ',')); + row.word = item; + EXPECT_TRUE(std::getline(ss, item, ',')); + row.url = item; + EXPECT_TRUE(std::getline(ss, item, ',')); + row.num = std::stoi(item); + data.emplace_back(std::move(row)); + } + + file.close(); + return data; +} + +static void check_terms_stats(lucene::store::Directory* dir) { + lucene::index::IndexReader* r = lucene::index::IndexReader::open(dir); + + printf("Max Docs: %d\n", r->maxDoc()); + printf("Num Docs: %d\n", r->numDocs()); + + int64_t ver = r->getCurrentVersion(dir); + printf("Current Version: %f\n", (float_t)ver); + + lucene::index::TermEnum* te = r->terms(); + int32_t nterms; + for (nterms = 0; te->next(); nterms++) { + /* empty */ + std::string token = + lucene_wcstoutf8string(te->term(false)->text(), te->term(false)->textLength()); + std::string field = lucene_wcstoutf8string(te->term(false)->field(), + lenOfString(te->term(false)->field())); + + printf("Field: %s ", field.c_str()); + printf("Term: %s ", token.c_str()); + printf("Freq: %d\n", te->docFreq()); + if (false) { + lucene::index::TermDocs* td = r->termDocs(te->term()); + while (td->next()) { + printf("DocID: %d ", td->doc()); + printf("TermFreq: %d\n", td->freq()); + } + _CLLDELETE(td); + } + } + printf("Term count: %d\n\n", nterms); + te->close(); + _CLLDELETE(te); + + r->close(); + _CLLDELETE(r); +} +static Status check_idx_file_correctness(lucene::store::Directory* index_reader, + lucene::store::Directory* tmp_index_reader) { + lucene::index::IndexReader* idx_reader = lucene::index::IndexReader::open(index_reader); + lucene::index::IndexReader* tmp_idx_reader = lucene::index::IndexReader::open(tmp_index_reader); + + // compare numDocs + if (idx_reader->numDocs() != tmp_idx_reader->numDocs()) { + return Status::InternalError( + "index compaction correctness check failed, numDocs not equal, idx_numDocs={}, " + "tmp_idx_numDocs={}", + idx_reader->numDocs(), tmp_idx_reader->numDocs()); + } + + lucene::index::TermEnum* term_enum = idx_reader->terms(); + lucene::index::TermEnum* tmp_term_enum = tmp_idx_reader->terms(); + lucene::index::TermDocs* term_docs = nullptr; + lucene::index::TermDocs* tmp_term_docs = nullptr; + + // iterate TermEnum + while (term_enum->next() && tmp_term_enum->next()) { + std::string token = lucene_wcstoutf8string(term_enum->term(false)->text(), + term_enum->term(false)->textLength()); + std::string field = lucene_wcstoutf8string(term_enum->term(false)->field(), + lenOfString(term_enum->term(false)->field())); + std::string tmp_token = lucene_wcstoutf8string(tmp_term_enum->term(false)->text(), + tmp_term_enum->term(false)->textLength()); + std::string tmp_field = + lucene_wcstoutf8string(tmp_term_enum->term(false)->field(), + lenOfString(tmp_term_enum->term(false)->field())); + // compare token and field + if (field != tmp_field) { + return Status::InternalError( + "index compaction correctness check failed, fields not equal, field={}, " + "tmp_field={}", + field, field); + } + if (token != tmp_token) { + return Status::InternalError( + "index compaction correctness check failed, tokens not equal, token={}, " + "tmp_token={}", + token, tmp_token); + } + + // get term's docId and freq + term_docs = idx_reader->termDocs(term_enum->term(false)); + tmp_term_docs = tmp_idx_reader->termDocs(tmp_term_enum->term(false)); + + // compare term's docId and freq + while (term_docs->next() && tmp_term_docs->next()) { + if (term_docs->doc() != tmp_term_docs->doc() || + term_docs->freq() != tmp_term_docs->freq()) { + return Status::InternalError( + "index compaction correctness check failed, docId or freq not equal, " + "docId={}, tmp_docId={}, freq={}, tmp_freq={}", + term_docs->doc(), tmp_term_docs->doc(), term_docs->freq(), + tmp_term_docs->freq()); + } + } + + // check if there are remaining docs + if (term_docs->next() || tmp_term_docs->next()) { + return Status::InternalError( + "index compaction correctness check failed, number of docs not equal for " + "term={}, tmp_term={}", + token, tmp_token); + } + if (term_docs) { + term_docs->close(); + _CLLDELETE(term_docs); + } + if (tmp_term_docs) { + tmp_term_docs->close(); + _CLLDELETE(tmp_term_docs); + } + } + + // check if there are remaining terms + if (term_enum->next() || tmp_term_enum->next()) { + return Status::InternalError( + "index compaction correctness check failed, number of terms not equal"); + } + if (term_enum) { + term_enum->close(); + _CLLDELETE(term_enum); + } + if (tmp_term_enum) { + tmp_term_enum->close(); + _CLLDELETE(tmp_term_enum); + } + if (idx_reader) { + idx_reader->close(); + _CLLDELETE(idx_reader); + } + if (tmp_idx_reader) { + tmp_idx_reader->close(); + _CLLDELETE(tmp_idx_reader); + } + return Status::OK(); +} + +static RowsetSharedPtr do_compaction(std::vector rowsets, + StorageEngine* engine_ref, TabletSharedPtr tablet, + bool is_index_compaction) { + config::inverted_index_compaction_enable = is_index_compaction; + // only base compaction can handle delete predicate + BaseCompaction compaction(tablet); + compaction._input_rowsets = std::move(rowsets); + compaction.build_basic_info(); + + std::vector input_rs_readers; + input_rs_readers.reserve(compaction._input_rowsets.size()); + for (auto& rowset : compaction._input_rowsets) { + RowsetReaderSharedPtr rs_reader; + EXPECT_TRUE(rowset->create_reader(&rs_reader).ok()); + input_rs_readers.push_back(std::move(rs_reader)); + } + + RowsetWriterContext ctx; + EXPECT_TRUE(compaction.construct_output_rowset_writer(ctx, true).ok()); + + if (is_index_compaction) { + EXPECT_TRUE(ctx.skip_inverted_index.size() == 2); + // col v1 + EXPECT_TRUE(ctx.skip_inverted_index.contains(1)); + // col v2 + EXPECT_TRUE(ctx.skip_inverted_index.contains(2)); + } + + Merger::Statistics stats; + stats.rowid_conversion = &compaction._rowid_conversion; + Status st = Merger::vertical_merge_rowsets(tablet, compaction.compaction_type(), + compaction._cur_tablet_schema, input_rs_readers, + compaction._output_rs_writer.get(), 100000, &stats); + EXPECT_TRUE(st.ok()) << st.to_string(); + + st = compaction._output_rs_writer->build(compaction._output_rowset); + EXPECT_TRUE(st.ok()) << st.to_string(); + + EXPECT_TRUE(compaction._output_rowset->num_segments() == 1); + + // do index compaction + + if (stats.rowid_conversion && config::inverted_index_compaction_enable && + !ctx.skip_inverted_index.empty()) { + OlapStopWatch inverted_watch; + + // translation vec + // <> + // the first level vector: index indicates src segment. + // the second level vector: index indicates row id of source segment, + // value indicates row id of destination segment. + // indicates current row not exist. + std::vector>> trans_vec = + stats.rowid_conversion->get_rowid_conversion_map(); + + // source rowset,segment -> index_id + std::map, uint32_t> src_seg_to_id_map = + stats.rowid_conversion->get_src_segment_to_id_map(); + // dest rowset id + RowsetId dest_rowset_id = stats.rowid_conversion->get_dst_rowset_id(); + // dest segment id -> num rows + std::vector dest_segment_num_rows; + auto st = compaction._output_rs_writer->get_segment_num_rows(&dest_segment_num_rows); + EXPECT_TRUE(st.ok()) << st.to_string(); + + auto src_segment_num = src_seg_to_id_map.size(); + auto dest_segment_num = dest_segment_num_rows.size(); + + if (dest_segment_num > 0) { + // src index files + // format: rowsetId_segmentId + std::vector src_index_files(src_segment_num); + for (const auto& m : src_seg_to_id_map) { + std::pair p = m.first; + src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second); + } + + // dest index files + // format: rowsetId_segmentId + std::vector dest_index_files(dest_segment_num); + for (int i = 0; i < dest_segment_num; ++i) { + auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i); + dest_index_files[i] = prefix; + } + + // create index_writer to compaction indexes + auto& fs = compaction._output_rowset->rowset_meta()->fs(); + auto& tablet_path = tablet->tablet_path(); + + // we choose the first destination segment name as the temporary index writer path + // Used to distinguish between different index compaction + auto index_writer_path = tablet_path + "/" + dest_index_files[0]; + Status status = Status::OK(); + std::for_each( + ctx.skip_inverted_index.cbegin(), ctx.skip_inverted_index.cend(), + [&src_segment_num, &dest_segment_num, &index_writer_path, &src_index_files, + &dest_index_files, &fs, &tablet_path, &trans_vec, &dest_segment_num_rows, + &status, &compaction](int32_t column_uniq_id) { + auto error_handler = [&compaction](int64_t index_id, + int64_t column_uniq_id) { + for (auto& rowset : compaction._input_rowsets) { + rowset->set_skip_index_compaction(column_uniq_id); + } + }; + + auto index_id = + compaction._cur_tablet_schema->get_inverted_index(column_uniq_id) + ->index_id(); + + // if index properties are different, index compaction maybe needs to be skipped. + std::optional> first_properties; + for (const auto& rowset : compaction._input_rowsets) { + const auto* tablet_index = + rowset->tablet_schema()->get_inverted_index(column_uniq_id); + const auto& properties = tablet_index->properties(); + if (!first_properties.has_value()) { + first_properties = properties; + } else { + if (properties != first_properties.value()) { + error_handler(index_id, column_uniq_id); + return; + } + } + } + + try { + Status st = compact_column(index_id, src_segment_num, dest_segment_num, + src_index_files, dest_index_files, fs, + index_writer_path, tablet_path, trans_vec, + dest_segment_num_rows); + if (!st.ok()) { + error_handler(index_id, column_uniq_id); + status = Status::Error( + st.msg()); + } + } catch (CLuceneError& e) { + error_handler(index_id, column_uniq_id); + status = Status::Error( + e.what()); + } + }); + + EXPECT_TRUE(status.ok()) << status.to_string(); + + } else { + EXPECT_TRUE(false) << "should not reach skip doing index compaction"; + } + } + + return compaction._output_rowset; +} + +class IndexCompactionDeleteTest : public ::testing::Test { +protected: + void SetUp() override { + // absolute dir + char buffer[MAX_PATH_LEN]; + EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); + _curreent_dir = std::string(buffer); + _absolute_dir = _curreent_dir + std::string(dest_dir); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok()); + + // tmp dir + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok()); + std::vector paths; + paths.emplace_back(std::string(tmp_dir), 1024000000); + + // storage engine + doris::EngineOptions options; + _engine_ref = new StorageEngine(options); + _data_dir = std::make_unique(_absolute_dir); + static_cast(_data_dir->update_capacity()); + ExecEnv::GetInstance()->set_storage_engine(_engine_ref); + + // tablet_schema + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + + construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000, "key_index", 0, + "INT", "key"); + construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, "v1_index", 1, + "STRING", "v1"); + construct_column(schema_pb.add_column(), schema_pb.add_index(), 10002, "v2_index", 2, + "STRING", "v2", true); + construct_column(schema_pb.add_column(), schema_pb.add_index(), 10003, "v3_index", 3, "INT", + "v3"); + + _tablet_schema.reset(new TabletSchema); + _tablet_schema->init_from_pb(schema_pb); + + // tablet + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + tablet_meta->_schema = _tablet_schema; + + _tablet.reset(new Tablet(tablet_meta, _data_dir.get())); + EXPECT_TRUE(_tablet->init().ok()); + } + void TearDown() override { + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok()); + + if (_engine_ref != nullptr) { + _engine_ref->stop(); + delete _engine_ref; + _engine_ref = nullptr; + ExecEnv::GetInstance()->set_storage_engine(nullptr); + } + } + + void init_rs_meta(RowsetMetaSharedPtr& rs_meta, int64_t start, int64_t end) { + std::string json_rowset_meta = R"({ + "rowset_id": 540081, + "tablet_id": 15673, + "partition_id": 10000, + "tablet_schema_hash": 567997577, + "rowset_type": "BETA_ROWSET", + "rowset_state": "VISIBLE", + "empty": false + })"; + RowsetMetaPB rowset_meta_pb; + json2pb::JsonToProtoMessage(json_rowset_meta, &rowset_meta_pb); + rowset_meta_pb.set_start_version(start); + rowset_meta_pb.set_end_version(end); + rs_meta->init_from_pb(rowset_meta_pb); + } + + RowsetSharedPtr create_delete_predicate_rowset(const TabletSchemaSPtr& schema, std::string pred, + int64_t version) { + DeletePredicatePB del_pred; + del_pred.add_sub_predicates(pred); + del_pred.set_version(1); + RowsetMetaSharedPtr rsm(new RowsetMeta()); + init_rs_meta(rsm, version, version); + RowsetId id; + id.init(version); + rsm->set_rowset_id(id); + rsm->set_delete_predicate(std::move(del_pred)); + rsm->set_tablet_schema(schema); + return std::make_shared(schema, _tablet->tablet_path(), rsm); + } + + void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index, int64_t index_id, + const std::string& index_name, int32_t col_unique_id, + const std::string& column_type, const std::string& column_name, + bool parser = false) { + column_pb->set_unique_id(col_unique_id); + column_pb->set_name(column_name); + column_pb->set_type(column_type); + column_pb->set_is_key(false); + column_pb->set_is_nullable(true); + tablet_index->set_index_id(index_id); + tablet_index->set_index_name(index_name); + tablet_index->set_index_type(IndexType::INVERTED); + tablet_index->add_col_unique_id(col_unique_id); + if (parser) { + auto* properties = tablet_index->mutable_properties(); + (*properties)[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_UNICODE; + } + } + + RowsetWriterContext rowset_writer_context() { + RowsetWriterContext context; + RowsetId rowset_id; + rowset_id.init(inc_id); + context.rowset_id = rowset_id; + context.rowset_type = BETA_ROWSET; + context.data_dir = _data_dir.get(); + context.rowset_state = VISIBLE; + context.tablet_schema = _tablet_schema; + context.rowset_dir = _tablet->tablet_path(); + context.version = Version(inc_id, inc_id); + context.max_rows_per_segment = 200; + inc_id++; + return context; + } + + IndexCompactionDeleteTest() = default; + ~IndexCompactionDeleteTest() override = default; + +private: + TabletSchemaSPtr _tablet_schema = nullptr; + StorageEngine* _engine_ref = nullptr; + std::unique_ptr _data_dir = nullptr; + TabletSharedPtr _tablet = nullptr; + std::string _absolute_dir; + std::string _curreent_dir; +}; + +TEST_F(IndexCompactionDeleteTest, delete_index_test) { + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok()); + std::string data_file1 = + _curreent_dir + "/be/test/olap/rowset/segment_v2/inverted_index/data/data1.csv"; + std::string data_file2 = + _curreent_dir + "/be/test/olap/rowset/segment_v2/inverted_index/data/data2.csv"; + + std::vector> data; + data.emplace_back(read_data(data_file1)); + data.emplace_back(read_data(data_file2)); + + std::vector rowsets(data.size()); + for (int i = 0; i < data.size(); i++) { + std::unique_ptr rowset_writer; + const auto& res = + RowsetFactory::create_rowset_writer(rowset_writer_context(), false, &rowset_writer); + EXPECT_TRUE(res.ok()) << res.to_string(); + + Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + for (const auto& row : data[i]) { + vectorized::Field key = Int32(row.key); + vectorized::Field v1 = row.word; + vectorized::Field v2 = row.url; + vectorized::Field v3 = Int32(row.num); + columns[0]->insert(key); + columns[1]->insert(v1); + columns[2]->insert(v2); + columns[3]->insert(v3); + } + Status st = rowset_writer->add_block(&block); + EXPECT_TRUE(st.ok()) << st.to_string(); + st = rowset_writer->flush(); + EXPECT_TRUE(st.ok()) << st.to_string(); + EXPECT_TRUE(rowset_writer->build(rowsets[i]).ok()); + EXPECT_TRUE(_tablet->add_rowset(rowsets[i]).ok()); + EXPECT_TRUE(rowsets[i]->num_segments() == 5); + } + + // create delete predicate rowset and add to tablet + auto delete_rowset = create_delete_predicate_rowset(_tablet_schema, "v1='great'", inc_id++); + EXPECT_TRUE(_tablet->add_rowset(delete_rowset).ok()); + EXPECT_TRUE(_tablet->rowset_map().size() == 3); + rowsets.push_back(delete_rowset); + EXPECT_TRUE(rowsets.size() == 3); + + auto output_rowset_index = do_compaction(rowsets, _engine_ref, _tablet, true); + int seg_id = 0; + auto segment_file_name_index = + fmt::format("{}_{}.dat", output_rowset_index->rowset_id().to_string(), seg_id); + std::cout << "segment_file_name: " << segment_file_name_index << std::endl; + auto index_file_name_index = + InvertedIndexDescriptor::get_index_file_name(segment_file_name_index, 10001); + auto inverted_index_file_reader_index = std::make_unique( + DorisCompoundDirectoryFactory::getDirectory(output_rowset_index->_rowset_meta->fs(), + output_rowset_index->tablet_path().c_str()), + index_file_name_index.c_str(), config::inverted_index_read_buffer_size); + + auto output_rowset_normal = do_compaction(rowsets, _engine_ref, _tablet, false); + + auto segment_file_name_normal = + fmt::format("{}_{}.dat", output_rowset_index->rowset_id().to_string(), seg_id); + auto index_file_name_normal = + InvertedIndexDescriptor::get_index_file_name(segment_file_name_normal, 10001); + auto inverted_index_file_reader_normal = std::make_unique( + DorisCompoundDirectoryFactory::getDirectory( + output_rowset_normal->_rowset_meta->fs(), + output_rowset_normal->tablet_path().c_str()), + index_file_name_normal.c_str(), config::inverted_index_read_buffer_size); + + // check index file terms + check_terms_stats(inverted_index_file_reader_index.get()); + check_terms_stats(inverted_index_file_reader_normal.get()); + auto st = check_idx_file_correctness(inverted_index_file_reader_index.get(), + inverted_index_file_reader_normal.get()); + EXPECT_TRUE(st.ok()) << st.to_string(); +} + +} // namespace doris