-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSamples.fsx
150 lines (104 loc) · 4.53 KB
/
Samples.fsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#r @"../../bin/HiveTypeProvider.dll"
open Hive.HiveRuntime
open Microsoft.FSharp.Data.UnitSystems.SI.UnitSymbols
open Microsoft.FSharp.Linq.NullableOperators
//-------------------------------------------------------
// Statically typed view of existing tables
[<Literal>]
let dsn = "Sample Hortonworks Hive DSN; pwd=hadoop"
type Conn = Hive.HiveTypeProvider<dsn,DefaultMetadataTimeout=1000>
let context = Conn.GetDataContext()
context.DataContext.RequestSent.Add (fun data -> printfn "REQUEST: %s" data)
let query = hiveQuery {for line in context.sample_07 do
where (line.salary ?< 20000)
select line.description}
query.Run(timeout=1000<s>)
let query2 = hiveQuery {for line in context.sample_07 do
where (line.salary ?< 30000)
averageBy (float (line.salary.Value))}
//--------------------------------------------------------
// Partially typed view of tables using the underlying runtime support
//
// - All column names and table names are strings
let timeout = 5<s>
let hive = Hive.HiveRuntime.HiveDataContext(dsn, 1000*timeout, 1000*timeout, true)
type NullableSchema = string * string * System.Nullable<int> * System.Nullable<int>
let (table : HiveTable<NullableSchema>) = hive.GetTable("sample_07")
hiveQuery { for row in table do
where ((fun (_, _, _, salary) -> salary ?< 20000) row)
select row}
|> printfn "%A"
//Manually construct a record type as the table schema
type SchemaRecord =
{Code : string;
Description : string;
Total_Emp : System.Nullable<int>;
Salary : System.Nullable<int>;}
let (table2 : HiveTable<SchemaRecord>) = hive.GetTable("sample_07")
hiveQuery { for row in table2 do
where (row.Salary ?< 20000)
select row.Description }
|> printfn "%A"
//Print the schema of a random table from the system
let randomSchema =
let (|Len|) a = Array.length a
let names = hive.GetTableNames()
match names with
| Len 0 -> printfn "No tables found"
| Len n -> let random = new System.Random()
names.[random.Next(Array.length names)]
|> hive.GetTableSchema
|> printfn "%A"
//Create and delete a table
hive.ExecuteCommand("CREATE TABLE mock (id INT, name STRING, age INT)")
hive.ExecuteCommand("DROP TABLE mock")
//Using required annotations
type Conn' = Hive.HiveTypeProvider<dsn,UseRequiredAnnotations=true>
let context' = Conn'.GetDataContext()
context'.DataContext.RequestSent.Add (fun data -> printfn "REQUEST: %s" data)
let query' = hiveQuery {for line in context'.new_sample do
where (line.salary ?< 20000)
select line.description}
query'.Run()
//hive.ExecuteCommand("ALTER TABLE sample_08 CHANGE salary salary int COMMENT '(required)'")
let query2 = hiveQuery {for line in context'.sample_08 do
where (line.description = "Actors")
select line.salary}
query2.Run()
//Other stuff:
context.DataContext.ExecuteCommand("DROP TABLE sample_bucket")
let newTable = hiveQuery {for line in context.sample_bucket do
newTable "table4" line.description }
newTable.GetSchema()
newTable.TableName
newTable.GetPartitionNames()
let queryYY = hiveQuery { for line in context.sample_bucket do
writeDistributedFile "file5" (line.description, line.salary)
distinct
writeDistributedFile "file6" (line.description, line.salary)
select line.code
}
hiveQuery {for line in context.sample_07 do
where (line.description.Length > 10)
count}
hiveQuery {for line in context.sample_07 do
where (line.description.Replace("a","b") = "bbb" )
count}
hiveQuery {for line in context.sample_07 do
where (line.description.Replace("a","b") = "bbb" )
count}
hiveQuery {for line in context.sample_07 do
where (line.total_emp.Value = 1)
count}
hiveQuery {for line in context.sample_07 do
where (double line.total_emp.Value * 2.0 = 2.0)
select line }
|> string
hiveQuery {for line in context.sample_07 do
where (line.total_emp.Value * 2 = 2)
select line }
|> string
hiveQuery.NewTable (hiveQuery.For(context.sample_bucket,
(fun line -> hiveQuery.Yield line)),
"table4",
(fun line -> line.description))