-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path04-Spark Provide Schema.py
35 lines (23 loc) · 1.01 KB
/
04-Spark Provide Schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Databricks notebook source
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Spark DataFrames").getOrCreate()
# COMMAND ----------
df = spark.read.options(inferSchema='True', header='True', delimiter=',').csv('/FileStore/tables/StudentData.csv')
df.show()
df.printSchema()
# COMMAND ----------
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
schema = StructType([
StructField("age", IntegerType(), True),
StructField("gender", StringType(), True),
StructField("name", StringType(), True),
StructField("course", StringType(), True),
StructField("roll", StringType(), True),
StructField("marks", IntegerType(), True),
StructField("email", StringType(), True)
])
# COMMAND ----------
df = spark.read.options(header='True').schema(schema).csv('/FileStore/tables/StudentData.csv')
df.show()
df.printSchema()
# COMMAND ----------