19
19
import io .trino .aws .proxy .server .testing .containers .PySparkContainer ;
20
20
import io .trino .aws .proxy .server .testing .harness .BuilderFilter ;
21
21
import io .trino .aws .proxy .server .testing .harness .TrinoAwsProxyTest ;
22
+ import org .junit .jupiter .api .BeforeAll ;
22
23
import org .junit .jupiter .api .Test ;
23
24
import software .amazon .awssdk .services .s3 .S3Client ;
24
25
@@ -54,6 +55,13 @@ public TestPySparkSql(S3Client s3Client, PySparkContainer pySparkContainer)
54
55
this .pySparkContainer = requireNonNull (pySparkContainer , "pySparkContainer is null" );
55
56
}
56
57
58
+ @ BeforeAll
59
+ public void setupBucket ()
60
+ {
61
+ // create the test bucket
62
+ s3Client .createBucket (r -> r .bucket ("test" ));
63
+ }
64
+
57
65
@ Test
58
66
public void testSql ()
59
67
throws Exception
@@ -75,12 +83,29 @@ public void testSql()
75
83
"spark.sql(\" select * from %s.%s\" ).show()" .formatted (DATABASE_NAME , TABLE_NAME )), line -> line .equals ("| c| 30|" ));
76
84
}
77
85
78
- public static void createDatabaseAndTable (S3Client s3Client , PySparkContainer container )
86
+ @ Test
87
+ public void testParquet ()
79
88
throws Exception
80
89
{
81
- // create the test bucket
82
- s3Client .createBucket (r -> r .bucket ("test" ));
90
+ // upload a CSV file
91
+ s3Client .putObject (r -> r .bucket ("test" ). key ( "test_parquet/file.csv" ), Path . of ( Resources . getResource ( "test.csv" ). toURI () ));
83
92
93
+ // read the CSV file and write it as Parquet
94
+ clearInputStreamAndClose (inputToContainerStdin (pySparkContainer .containerId (), """
95
+ df = spark.read.csv("s3a://test/test_parquet/file.csv")
96
+ df.write.parquet("s3a://test/test_parquet/file.parquet")
97
+ """ ), line -> line .equals (">>> " ) || line .matches (".*Write Job [\\ w-]+ committed.*" ));
98
+
99
+ // read the Parquet file
100
+ clearInputStreamAndClose (inputToContainerStdin (pySparkContainer .containerId (), """
101
+ parquetDF = spark.read.parquet("s3a://test/test_parquet/file.parquet")
102
+ parquetDF.show()
103
+ """ ), line -> line .equals ("| John Galt| 28|" ));
104
+ }
105
+
106
+ public static void createDatabaseAndTable (S3Client s3Client , PySparkContainer container )
107
+ throws Exception
108
+ {
84
109
// upload a CSV file as a potential table
85
110
s3Client .putObject (r -> r .bucket ("test" ).key ("table/file.csv" ), Path .of (Resources .getResource ("test.csv" ).toURI ()));
86
111
0 commit comments