-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathSchema_json_1.py
93 lines (35 loc) · 1.08 KB
/
Schema_json_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
# coding: utf-8
# In[2]:
from pyspark.sql import SparkSession
# In[3]:
spark = SparkSession.builder.appName('basics').getOrCreate()
# In[5]:
df = spark.read.json('/home/tushar/Desktop/spark-and-python-for-big-data-with-pyspark/Python-and-Spark-for-Big-Data-master/Spark_DataFrames/people.json')
# In[7]:
df.show()
# In[29]:
df.printSchema()
# In[30]:
df.columns
# In[31]:
df.describe()
# In[32]:
df.describe().show()
# In[34]:
#From this we are changing the schemma for the file
#structfield is used to change the structure of the fields
#Structtype is used to
from pyspark.sql.types import StructField,StringType,IntegerType,StructType
#
#
# In[35]:
data_schemma = [StructField('age',IntegerType(),True),StructField('name',StringType(),True)]
# In[38]:
#giving the object for the sche
finalSchemma = StructType(fields=data_schemma)
# In[39]:
df =spark.read.json('/home/tushar/spark-2.3.0-bin-hadoop2.7/python/Python-and-Spark-for-Big-Data-master/Spark_DataFrames',schema=finalSchemma)
# In[40]:
df.printSchema()
# In[ ]: