-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4c4e530
commit e6439b5
Showing
25 changed files
with
934 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<groupId>edu.rmit.cosc2637.s3400652</groupId> | ||
<artifactId>Assignment</artifactId> | ||
<name>Assignment</name> | ||
<version>0.0.1-SNAPSHOT</version> | ||
<url>http://maven.apache.org</url> | ||
<build> | ||
<plugins> | ||
<plugin> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<version>3.2.1</version> | ||
<executions> | ||
<execution> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
<configuration> | ||
<transformers> | ||
<transformer> | ||
<mainClass>edu.rmit.cosc2637.s3400652.Assignment.NYCDriver</mainClass> | ||
</transformer> | ||
</transformers> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
<dependencies> | ||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>3.8.1</version> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
</properties> | ||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
hadoop fs -copyToLocal /user/s3400652/Assignment-0.0.1-SNAPSHOT.jar /home/hadoop/ | ||
hadoop jar Assignment-0.0.1-SNAPSHOT.jar /user/s3400652/green_tripdata_2019-01_sample.csv /user/s3400652/outputNYC3 -Dmapred.map.tasks=2 -Dmapred.reduce.tasks=4 | ||
hadoop jar Assignment-0.0.1-SNAPSHOT.jar /user/s3400652/fhv_tripdata_2015-01.csv /user/s3400652/outputNYC55 | ||
|
||
hadoop jar Assignment-0.0.1-SNAPSHOT.jar arn:aws:s3:::nyc-tlc/trip+data/yellow_tripdata_2019-01.csv /user/s3400652/outputNYC5 | ||
|
||
|
||
arn:aws:s3:::nyc-tlc/trip+data/yellow_tripdata_2019-01.csv | ||
|
||
hadoop fs -cat /user/s3400652/outputNYC/part-r-00000 | sort -n -k2 -r | head -n3 | ||
hadoop fs -cat /user/s3400652/outputNYC/part-r-0000* | sort -n -k2 -r | head -n5 //concatanate output | ||
|
||
+++++++++++++++++++++++++++++ | ||
|
||
tranfer dataset to HDFS from AWS s3 bucket | ||
hadoop distcp s3a://nyc-tlc/"trip data"/yellow_tripdata_2018-12.csv /user/s3400652/ | ||
|
||
copy jar file to master node | ||
hadoop fs -copyToLocal /user/s3400652/Assignment-0.0.1-SNAPSHOT.jar /home/hadoop/ | ||
|
||
Deploy jar file | ||
hadoop jar Assignment-0.0.1-SNAPSHOT.jar edu.rmit.cosc2637.s3400652.Assignment.NYCDriver | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>edu.rmit.cosc2637.s3400652</groupId> | ||
<artifactId>Assignment</artifactId> | ||
<version>0.0.1-SNAPSHOT</version> | ||
<packaging>jar</packaging> | ||
|
||
<name>Assignment</name> | ||
<url>http://maven.apache.org</url> | ||
|
||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
</properties> | ||
|
||
<dependencies> | ||
|
||
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core --> | ||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-mapreduce-client-core</artifactId> | ||
<version>3.2.0</version> | ||
</dependency> | ||
|
||
|
||
|
||
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common --> | ||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-common</artifactId> | ||
<version>3.2.0</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>3.8.1</version> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<version>3.2.1</version> | ||
<executions> | ||
<execution> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
<configuration> | ||
<transformers> | ||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> | ||
<mainClass>edu.rmit.cosc2637.s3400652.Assignment.NYCDriver</mainClass> | ||
</transformer> | ||
</transformers> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
</project> |
73 changes: 73 additions & 0 deletions
73
Java Source Code/src/main/java/edu/rmit/cosc2637/s3400652/Assignment/NYCDriver.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
* -------------------------------------------------------------------- | ||
* Developer Name : Udeshika Dissanayake | ||
* Subject : COSC2637 Big Data Processing | ||
* Assignment : Assignment 1 - Semester 2, 2019 | ||
* Student Number : s3400652 | ||
* Date : 12/10/2019 * | ||
*-------------------------------------------------------------------- | ||
*/ | ||
|
||
package edu.rmit.cosc2637.s3400652.Assignment; | ||
|
||
import java.io.IOException; | ||
import java.util.StringTokenizer; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Job; | ||
import org.apache.hadoop.mapreduce.Mapper; | ||
import org.apache.hadoop.mapreduce.Reducer; | ||
import org.apache.hadoop.mapreduce.Mapper.Context; | ||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | ||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | ||
import org.apache.hadoop.io.FloatWritable; | ||
|
||
|
||
|
||
public class NYCDriver | ||
{ | ||
|
||
public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException | ||
{ | ||
|
||
/*//for hardcording data and output paths | ||
Path dataPath = new Path("/user/s3400652/green_tripdata_2019-01_sample.csv"); | ||
Path outputDir = new Path("/user/s3400652/OutputNYC"); | ||
*/ | ||
|
||
//Define configuration File for MapReduce Drive | ||
Configuration conf = new Configuration(); | ||
Job job = Job.getInstance(conf, "NYC Taxi Analysis"); | ||
|
||
job.setJarByClass(NYCDriver.class); | ||
|
||
//*****Selection of Mapper Class. [comment only one line out of below two lines]***** | ||
//job.setMapperClass(NYCMapper.class); //Uncomment this for standard Mapper; Comment the below line | ||
job.setMapperClass(NYCMapper_IMC.class); //Uncomment this for In-Mapper Combiner; comment the above line | ||
//********************************************************************************** | ||
|
||
//*****Selection of Combiner Class. [uncomment only if standard combiner is used] | ||
//job.setCombinerClass(NYCReducer.class); //Uncomment this for standard Combiner | ||
//********************************************************************************** | ||
|
||
//*****Selection of Reducer Class; | ||
job.setReducerClass(NYCReducer.class); | ||
//*************************************** | ||
|
||
job.setOutputKeyClass(Text.class); | ||
//job.setOutputValueClass(IntWritable.class); // for PULocation counter | ||
job.setOutputValueClass(FloatWritable.class); //for Total fare | ||
job.setMapOutputKeyClass(Text.class); | ||
//job.setMapOutputValueClass(IntWritable.class); // for PULocation counter | ||
job.setMapOutputValueClass(FloatWritable.class); //for Total fare | ||
|
||
//setting arguments for input and output paths | ||
FileInputFormat.addInputPath(job, new Path(args[0])); | ||
FileOutputFormat.setOutputPath(job, new Path(args[1])); | ||
|
||
System.exit(job.waitForCompletion(true) ? 0 : 1); | ||
} | ||
} |
84 changes: 84 additions & 0 deletions
84
Java Source Code/src/main/java/edu/rmit/cosc2637/s3400652/Assignment/NYCMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* -------------------------------------------------------------------- | ||
* Developer Name : Udeshika Dissanayake | ||
* Subject : COSC2637 Big Data Processing | ||
* Assignment : Assignment 1 - Semester 2, 2019 | ||
* Student Number : s3400652 | ||
* Date : 12/10/2019 * | ||
*-------------------------------------------------------------------- | ||
*/ | ||
|
||
package edu.rmit.cosc2637.s3400652.Assignment; | ||
|
||
import java.io.IOException; | ||
|
||
import org.apache.hadoop.io.LongWritable; | ||
import org.apache.hadoop.io.FloatWritable; | ||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Mapper; | ||
|
||
//Mapper Class | ||
|
||
//------Data Pattern of Green Taxi data | ||
//2,21/12/2018 15:17,21/12/2018 15:18,N,1,264,264,5,0,3,0.5,0.5,0,0,,0.3,4.3,2,1, | ||
//6th Column = PickUp Location; 7th Column = Drop Off Location | ||
//17th Column = Total fare; 4th Column = Number of Passengers | ||
|
||
//------Data Pattern of Yellow Taxi data | ||
//1 01-02-19 0:59 01-02-19 1:07 1 2.1 1 N 48 234 1 9 0.5 0.5 2 0 0.3 12.3 0 | ||
//8th Column = PickUp Location; 9th Column = Drop Off Location | ||
//17th Column = Total fare; 8th Column = Number of Passengers | ||
|
||
//public class NYCMapper extends Mapper<LongWritable, Text, Text, IntWritable> //for number of Pick Up Locations | ||
public class NYCMapper extends Mapper<LongWritable, Text, Text, FloatWritable> //for total fare | ||
{ | ||
private final static IntWritable one = new IntWritable(1); | ||
private FloatWritable Total_fare = new FloatWritable(); | ||
|
||
// ----mapper to count number of Pick Up Locations----- | ||
// Out - (PULoc_264, 1) | ||
/* | ||
@Override | ||
public void map(LongWritable key, Text value, Context context) | ||
throws IOException, InterruptedException | ||
{ | ||
String line = value.toString(); | ||
String[] fields = line.split(","); | ||
if(fields.length > 6) | ||
{ | ||
Text PULocation = new Text("PULoc_"+fields[5]); | ||
context.write(PULocation, one); | ||
} | ||
} | ||
*/ | ||
|
||
// ----mapper to get total fare for each Pick Up Locations----- | ||
// out - (PULoc_264, 4.3) | ||
@Override | ||
public void map(LongWritable key, Text value, Context context) | ||
throws IOException, InterruptedException | ||
{ | ||
String line = value.toString(); | ||
String[] fields = line.split(","); | ||
|
||
if(fields.length > 16) | ||
{ | ||
Text PULocation = new Text("PULoc_"+fields[7]); | ||
if(fields[16].matches("\\d+.+")) //check whether its numeric | ||
{ | ||
float f = Float.parseFloat(fields[16]); | ||
Total_fare.set(f); | ||
} | ||
context.write(PULocation, Total_fare); | ||
} | ||
|
||
|
||
} | ||
|
||
} | ||
|
||
|
Oops, something went wrong.