-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmrjob
32 lines (21 loc) · 1.53 KB
/
mrjob
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
python MovieRatings.py -r hadoop hdfs:///ratings.data
create table irate(uid int, mid int, rating int, ts int);
create table exrate(uid int, mid int, rating int, ts int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '/t' LINES TERMINATED BY '\n;
load data local infile 'ratings.data' into table irate field terminated by '/t' line terminated by '/n';
/ratings.data
sqoop import --connect jdbc:mysql://localhost:3306/importedata --username root --password password --table irate --target-dir /newimported --m 1
sqoop export --connect jdbc:mysql://localhost:3306/importedata --table erated --username root --password password --export-dir /ratings.data --fields-terminated-by '\t' --lines-terminated-by '\n'
Employee performance analytics: build a platform to analyze employee performance data, identify key performance indicator using spark dataframe
yum-config-manager --save --setopt=HDP-SOLR-2.6-100.skip_if_unavailable=true
yum install https://repo.ius.io/ius-release-el7.rpm https://dl.fedoraproject.org/pub/epel...
yum install python-pip
pip install pathlib
pip install mrjob==0.7.4
pip install PyYAML==5.4.1
yum install nano
yarn jar /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 10 100
https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html
Running locally:
python MovieRatings.py ratings.data
Running on Hadoop cluster:
python MovieRatings.py –r hadoop --hadoop-streaming-jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-streaming.jar ratings.data