Skip to content

Commit e6439b5

Browse files
Add files via upload
1 parent 4c4e530 commit e6439b5

File tree

25 files changed

+934
-0
lines changed

25 files changed

+934
-0
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
<groupId>edu.rmit.cosc2637.s3400652</groupId>
5+
<artifactId>Assignment</artifactId>
6+
<name>Assignment</name>
7+
<version>0.0.1-SNAPSHOT</version>
8+
<url>http://maven.apache.org</url>
9+
<build>
10+
<plugins>
11+
<plugin>
12+
<artifactId>maven-shade-plugin</artifactId>
13+
<version>3.2.1</version>
14+
<executions>
15+
<execution>
16+
<phase>package</phase>
17+
<goals>
18+
<goal>shade</goal>
19+
</goals>
20+
<configuration>
21+
<transformers>
22+
<transformer>
23+
<mainClass>edu.rmit.cosc2637.s3400652.Assignment.NYCDriver</mainClass>
24+
</transformer>
25+
</transformers>
26+
</configuration>
27+
</execution>
28+
</executions>
29+
</plugin>
30+
</plugins>
31+
</build>
32+
<dependencies>
33+
<dependency>
34+
<groupId>junit</groupId>
35+
<artifactId>junit</artifactId>
36+
<version>3.8.1</version>
37+
<scope>test</scope>
38+
</dependency>
39+
</dependencies>
40+
<properties>
41+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
42+
</properties>
43+
</project>

Java Source Code/note.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
hadoop fs -copyToLocal /user/s3400652/Assignment-0.0.1-SNAPSHOT.jar /home/hadoop/
2+
hadoop jar Assignment-0.0.1-SNAPSHOT.jar /user/s3400652/green_tripdata_2019-01_sample.csv /user/s3400652/outputNYC3 -Dmapred.map.tasks=2 -Dmapred.reduce.tasks=4
3+
hadoop jar Assignment-0.0.1-SNAPSHOT.jar /user/s3400652/fhv_tripdata_2015-01.csv /user/s3400652/outputNYC55
4+
5+
hadoop jar Assignment-0.0.1-SNAPSHOT.jar arn:aws:s3:::nyc-tlc/trip+data/yellow_tripdata_2019-01.csv /user/s3400652/outputNYC5
6+
7+
8+
arn:aws:s3:::nyc-tlc/trip+data/yellow_tripdata_2019-01.csv
9+
10+
hadoop fs -cat /user/s3400652/outputNYC/part-r-00000 | sort -n -k2 -r | head -n3
11+
hadoop fs -cat /user/s3400652/outputNYC/part-r-0000* | sort -n -k2 -r | head -n5 //concatanate output
12+
13+
+++++++++++++++++++++++++++++
14+
15+
tranfer dataset to HDFS from AWS s3 bucket
16+
hadoop distcp s3a://nyc-tlc/"trip data"/yellow_tripdata_2018-12.csv /user/s3400652/
17+
18+
copy jar file to master node
19+
hadoop fs -copyToLocal /user/s3400652/Assignment-0.0.1-SNAPSHOT.jar /home/hadoop/
20+
21+
Deploy jar file
22+
hadoop jar Assignment-0.0.1-SNAPSHOT.jar edu.rmit.cosc2637.s3400652.Assignment.NYCDriver
23+

Java Source Code/pom.xml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<groupId>edu.rmit.cosc2637.s3400652</groupId>
6+
<artifactId>Assignment</artifactId>
7+
<version>0.0.1-SNAPSHOT</version>
8+
<packaging>jar</packaging>
9+
10+
<name>Assignment</name>
11+
<url>http://maven.apache.org</url>
12+
13+
<properties>
14+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15+
</properties>
16+
17+
<dependencies>
18+
19+
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
20+
<dependency>
21+
<groupId>org.apache.hadoop</groupId>
22+
<artifactId>hadoop-mapreduce-client-core</artifactId>
23+
<version>3.2.0</version>
24+
</dependency>
25+
26+
27+
28+
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
29+
<dependency>
30+
<groupId>org.apache.hadoop</groupId>
31+
<artifactId>hadoop-common</artifactId>
32+
<version>3.2.0</version>
33+
</dependency>
34+
35+
<dependency>
36+
<groupId>junit</groupId>
37+
<artifactId>junit</artifactId>
38+
<version>3.8.1</version>
39+
<scope>test</scope>
40+
</dependency>
41+
</dependencies>
42+
43+
<build>
44+
<plugins>
45+
<plugin>
46+
<groupId>org.apache.maven.plugins</groupId>
47+
<artifactId>maven-shade-plugin</artifactId>
48+
<version>3.2.1</version>
49+
<executions>
50+
<execution>
51+
<phase>package</phase>
52+
<goals>
53+
<goal>shade</goal>
54+
</goals>
55+
<configuration>
56+
<transformers>
57+
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
58+
<mainClass>edu.rmit.cosc2637.s3400652.Assignment.NYCDriver</mainClass>
59+
</transformer>
60+
</transformers>
61+
</configuration>
62+
</execution>
63+
</executions>
64+
</plugin>
65+
</plugins>
66+
</build>
67+
68+
</project>
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* --------------------------------------------------------------------
3+
* Developer Name : Udeshika Dissanayake
4+
* Subject : COSC2637 Big Data Processing
5+
* Assignment : Assignment 1 - Semester 2, 2019
6+
* Student Number : s3400652
7+
* Date : 12/10/2019 *
8+
*--------------------------------------------------------------------
9+
*/
10+
11+
package edu.rmit.cosc2637.s3400652.Assignment;
12+
13+
import java.io.IOException;
14+
import java.util.StringTokenizer;
15+
16+
import org.apache.hadoop.conf.Configuration;
17+
import org.apache.hadoop.fs.Path;
18+
import org.apache.hadoop.io.IntWritable;
19+
import org.apache.hadoop.io.Text;
20+
import org.apache.hadoop.mapreduce.Job;
21+
import org.apache.hadoop.mapreduce.Mapper;
22+
import org.apache.hadoop.mapreduce.Reducer;
23+
import org.apache.hadoop.mapreduce.Mapper.Context;
24+
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
25+
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
26+
import org.apache.hadoop.io.FloatWritable;
27+
28+
29+
30+
public class NYCDriver
31+
{
32+
33+
public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException
34+
{
35+
36+
/*//for hardcording data and output paths
37+
Path dataPath = new Path("/user/s3400652/green_tripdata_2019-01_sample.csv");
38+
Path outputDir = new Path("/user/s3400652/OutputNYC");
39+
*/
40+
41+
//Define configuration File for MapReduce Drive
42+
Configuration conf = new Configuration();
43+
Job job = Job.getInstance(conf, "NYC Taxi Analysis");
44+
45+
job.setJarByClass(NYCDriver.class);
46+
47+
//*****Selection of Mapper Class. [comment only one line out of below two lines]*****
48+
//job.setMapperClass(NYCMapper.class); //Uncomment this for standard Mapper; Comment the below line
49+
job.setMapperClass(NYCMapper_IMC.class); //Uncomment this for In-Mapper Combiner; comment the above line
50+
//**********************************************************************************
51+
52+
//*****Selection of Combiner Class. [uncomment only if standard combiner is used]
53+
//job.setCombinerClass(NYCReducer.class); //Uncomment this for standard Combiner
54+
//**********************************************************************************
55+
56+
//*****Selection of Reducer Class;
57+
job.setReducerClass(NYCReducer.class);
58+
//***************************************
59+
60+
job.setOutputKeyClass(Text.class);
61+
//job.setOutputValueClass(IntWritable.class); // for PULocation counter
62+
job.setOutputValueClass(FloatWritable.class); //for Total fare
63+
job.setMapOutputKeyClass(Text.class);
64+
//job.setMapOutputValueClass(IntWritable.class); // for PULocation counter
65+
job.setMapOutputValueClass(FloatWritable.class); //for Total fare
66+
67+
//setting arguments for input and output paths
68+
FileInputFormat.addInputPath(job, new Path(args[0]));
69+
FileOutputFormat.setOutputPath(job, new Path(args[1]));
70+
71+
System.exit(job.waitForCompletion(true) ? 0 : 1);
72+
}
73+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* --------------------------------------------------------------------
3+
* Developer Name : Udeshika Dissanayake
4+
* Subject : COSC2637 Big Data Processing
5+
* Assignment : Assignment 1 - Semester 2, 2019
6+
* Student Number : s3400652
7+
* Date : 12/10/2019 *
8+
*--------------------------------------------------------------------
9+
*/
10+
11+
package edu.rmit.cosc2637.s3400652.Assignment;
12+
13+
import java.io.IOException;
14+
15+
import org.apache.hadoop.io.LongWritable;
16+
import org.apache.hadoop.io.FloatWritable;
17+
import org.apache.hadoop.io.IntWritable;
18+
import org.apache.hadoop.io.Text;
19+
import org.apache.hadoop.mapreduce.Mapper;
20+
21+
//Mapper Class
22+
23+
//------Data Pattern of Green Taxi data
24+
//2,21/12/2018 15:17,21/12/2018 15:18,N,1,264,264,5,0,3,0.5,0.5,0,0,,0.3,4.3,2,1,
25+
//6th Column = PickUp Location; 7th Column = Drop Off Location
26+
//17th Column = Total fare; 4th Column = Number of Passengers
27+
28+
//------Data Pattern of Yellow Taxi data
29+
//1 01-02-19 0:59 01-02-19 1:07 1 2.1 1 N 48 234 1 9 0.5 0.5 2 0 0.3 12.3 0
30+
//8th Column = PickUp Location; 9th Column = Drop Off Location
31+
//17th Column = Total fare; 8th Column = Number of Passengers
32+
33+
//public class NYCMapper extends Mapper<LongWritable, Text, Text, IntWritable> //for number of Pick Up Locations
34+
public class NYCMapper extends Mapper<LongWritable, Text, Text, FloatWritable> //for total fare
35+
{
36+
private final static IntWritable one = new IntWritable(1);
37+
private FloatWritable Total_fare = new FloatWritable();
38+
39+
// ----mapper to count number of Pick Up Locations-----
40+
// Out - (PULoc_264, 1)
41+
/*
42+
@Override
43+
public void map(LongWritable key, Text value, Context context)
44+
throws IOException, InterruptedException
45+
{
46+
String line = value.toString();
47+
String[] fields = line.split(",");
48+
49+
if(fields.length > 6)
50+
{
51+
Text PULocation = new Text("PULoc_"+fields[5]);
52+
context.write(PULocation, one);
53+
}
54+
55+
56+
}
57+
*/
58+
59+
// ----mapper to get total fare for each Pick Up Locations-----
60+
// out - (PULoc_264, 4.3)
61+
@Override
62+
public void map(LongWritable key, Text value, Context context)
63+
throws IOException, InterruptedException
64+
{
65+
String line = value.toString();
66+
String[] fields = line.split(",");
67+
68+
if(fields.length > 16)
69+
{
70+
Text PULocation = new Text("PULoc_"+fields[7]);
71+
if(fields[16].matches("\\d+.+")) //check whether its numeric
72+
{
73+
float f = Float.parseFloat(fields[16]);
74+
Total_fare.set(f);
75+
}
76+
context.write(PULocation, Total_fare);
77+
}
78+
79+
80+
}
81+
82+
}
83+
84+

0 commit comments

Comments
 (0)