diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bef0727
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target/
+.idea
diff --git a/adamic-adar/.mvn/wrapper/MavenWrapperDownloader.java b/adamic-adar/.mvn/wrapper/MavenWrapperDownloader.java
new file mode 100644
index 0000000..e76d1f3
--- /dev/null
+++ b/adamic-adar/.mvn/wrapper/MavenWrapperDownloader.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2007-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.net.*;
+import java.io.*;
+import java.nio.channels.*;
+import java.util.Properties;
+
+public class MavenWrapperDownloader {
+
+ private static final String WRAPPER_VERSION = "0.5.6";
+ /**
+ * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
+ */
+ private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
+ + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
+
+ /**
+ * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
+ * use instead of the default one.
+ */
+ private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
+ ".mvn/wrapper/maven-wrapper.properties";
+
+ /**
+ * Path where the maven-wrapper.jar will be saved to.
+ */
+ private static final String MAVEN_WRAPPER_JAR_PATH =
+ ".mvn/wrapper/maven-wrapper.jar";
+
+ /**
+ * Name of the property which should be used to override the default download url for the wrapper.
+ */
+ private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
+
+ public static void main(String args[]) {
+ System.out.println("- Downloader started");
+ File baseDirectory = new File(args[0]);
+ System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
+
+ // If the maven-wrapper.properties exists, read it and check if it contains a custom
+ // wrapperUrl parameter.
+ File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
+ String url = DEFAULT_DOWNLOAD_URL;
+ if(mavenWrapperPropertyFile.exists()) {
+ FileInputStream mavenWrapperPropertyFileInputStream = null;
+ try {
+ mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
+ Properties mavenWrapperProperties = new Properties();
+ mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
+ url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
+ } catch (IOException e) {
+ System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
+ } finally {
+ try {
+ if(mavenWrapperPropertyFileInputStream != null) {
+ mavenWrapperPropertyFileInputStream.close();
+ }
+ } catch (IOException e) {
+ // Ignore ...
+ }
+ }
+ }
+ System.out.println("- Downloading from: " + url);
+
+ File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
+ if(!outputFile.getParentFile().exists()) {
+ if(!outputFile.getParentFile().mkdirs()) {
+ System.out.println(
+ "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
+ }
+ }
+ System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
+ try {
+ downloadFileFromURL(url, outputFile);
+ System.out.println("Done");
+ System.exit(0);
+ } catch (Throwable e) {
+ System.out.println("- Error downloading");
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ private static void downloadFileFromURL(String urlString, File destination) throws Exception {
+ if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
+ String username = System.getenv("MVNW_USERNAME");
+ char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
+ Authenticator.setDefault(new Authenticator() {
+ @Override
+ protected PasswordAuthentication getPasswordAuthentication() {
+ return new PasswordAuthentication(username, password);
+ }
+ });
+ }
+ URL website = new URL(urlString);
+ ReadableByteChannel rbc;
+ rbc = Channels.newChannel(website.openStream());
+ FileOutputStream fos = new FileOutputStream(destination);
+ fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
+ fos.close();
+ rbc.close();
+ }
+
+}
diff --git a/adamic-adar/.mvn/wrapper/maven-wrapper.jar b/adamic-adar/.mvn/wrapper/maven-wrapper.jar
new file mode 100644
index 0000000..2cc7d4a
Binary files /dev/null and b/adamic-adar/.mvn/wrapper/maven-wrapper.jar differ
diff --git a/adamic-adar/.mvn/wrapper/maven-wrapper.properties b/adamic-adar/.mvn/wrapper/maven-wrapper.properties
new file mode 100644
index 0000000..ffdc10e
--- /dev/null
+++ b/adamic-adar/.mvn/wrapper/maven-wrapper.properties
@@ -0,0 +1,2 @@
+distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.1/apache-maven-3.8.1-bin.zip
+wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
diff --git a/adamic-adar/dependency-reduced-pom.xml b/adamic-adar/dependency-reduced-pom.xml
new file mode 100644
index 0000000..2c71361
--- /dev/null
+++ b/adamic-adar/dependency-reduced-pom.xml
@@ -0,0 +1,341 @@
+
+
+ 4.0.0
+ org.hua
+ adamicadar
+ AdamicAdar
+ 0.1
+ http://maven.apache.org
+
+
+
+ maven-compiler-plugin
+ 2.5.1
+
+ 8
+ 8
+
+
+
+ maven-shade-plugin
+ 2.3
+
+
+ package
+
+ shade
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+
+ junit
+ junit
+ 4.8.2
+ test
+
+
+ org.apache.hadoop
+ hadoop-client
+ 3.2.0
+ provided
+
+
+ servlet-api
+ javax.servlet
+
+
+ hadoop-common
+ org.apache.hadoop
+
+
+ hadoop-hdfs-client
+ org.apache.hadoop
+
+
+ hadoop-yarn-api
+ org.apache.hadoop
+
+
+ hadoop-yarn-client
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-core
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-jobclient
+ org.apache.hadoop
+
+
+ hadoop-annotations
+ org.apache.hadoop
+
+
+
+
+ org.apache.spark
+ spark-core_2.13
+ 3.2.0
+ provided
+
+
+ scala-parallel-collections_2.13
+ org.scala-lang.modules
+
+
+ avro
+ org.apache.avro
+
+
+ avro-mapred
+ org.apache.avro
+
+
+ chill_2.13
+ com.twitter
+
+
+ chill-java
+ com.twitter
+
+
+ xbean-asm9-shaded
+ org.apache.xbean
+
+
+ hadoop-client-api
+ org.apache.hadoop
+
+
+ hadoop-client-runtime
+ org.apache.hadoop
+
+
+ spark-launcher_2.13
+ org.apache.spark
+
+
+ spark-kvstore_2.13
+ org.apache.spark
+
+
+ spark-network-common_2.13
+ org.apache.spark
+
+
+ spark-network-shuffle_2.13
+ org.apache.spark
+
+
+ spark-unsafe_2.13
+ org.apache.spark
+
+
+ activation
+ javax.activation
+
+
+ curator-recipes
+ org.apache.curator
+
+
+ zookeeper
+ org.apache.zookeeper
+
+
+ jakarta.servlet-api
+ jakarta.servlet
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-lang3
+ org.apache.commons
+
+
+ commons-math3
+ org.apache.commons
+
+
+ commons-text
+ org.apache.commons
+
+
+ commons-io
+ commons-io
+
+
+ commons-collections
+ commons-collections
+
+
+ slf4j-api
+ org.slf4j
+
+
+ jul-to-slf4j
+ org.slf4j
+
+
+ jcl-over-slf4j
+ org.slf4j
+
+
+ log4j
+ log4j
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ compress-lzf
+ com.ning
+
+
+ snappy-java
+ org.xerial.snappy
+
+
+ lz4-java
+ org.lz4
+
+
+ zstd-jni
+ com.github.luben
+
+
+ RoaringBitmap
+ org.roaringbitmap
+
+
+ commons-net
+ commons-net
+
+
+ scala-xml_2.13
+ org.scala-lang.modules
+
+
+ scala-library
+ org.scala-lang
+
+
+ scala-reflect
+ org.scala-lang
+
+
+ json4s-jackson_2.13
+ org.json4s
+
+
+ jersey-client
+ org.glassfish.jersey.core
+
+
+ jersey-common
+ org.glassfish.jersey.core
+
+
+ jersey-server
+ org.glassfish.jersey.core
+
+
+ jersey-container-servlet
+ org.glassfish.jersey.containers
+
+
+ jersey-container-servlet-core
+ org.glassfish.jersey.containers
+
+
+ jersey-hk2
+ org.glassfish.jersey.inject
+
+
+ netty-all
+ io.netty
+
+
+ stream
+ com.clearspring.analytics
+
+
+ metrics-core
+ io.dropwizard.metrics
+
+
+ metrics-jvm
+ io.dropwizard.metrics
+
+
+ metrics-json
+ io.dropwizard.metrics
+
+
+ metrics-graphite
+ io.dropwizard.metrics
+
+
+ metrics-jmx
+ io.dropwizard.metrics
+
+
+ jackson-module-scala_2.13
+ com.fasterxml.jackson.module
+
+
+ ivy
+ org.apache.ivy
+
+
+ oro
+ oro
+
+
+ pyrolite
+ net.razorvine
+
+
+ py4j
+ net.sf.py4j
+
+
+ spark-tags_2.13
+ org.apache.spark
+
+
+ commons-crypto
+ org.apache.commons
+
+
+ unused
+ org.spark-project.spark
+
+
+
+
+
+ UTF-8
+
+
+
diff --git a/adamic-adar/mvnw b/adamic-adar/mvnw
new file mode 100755
index 0000000..a16b543
--- /dev/null
+++ b/adamic-adar/mvnw
@@ -0,0 +1,310 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ----------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------
+# Maven Start Up Batch script
+#
+# Required ENV vars:
+# ------------------
+# JAVA_HOME - location of a JDK home dir
+#
+# Optional ENV vars
+# -----------------
+# M2_HOME - location of maven2's installed home dir
+# MAVEN_OPTS - parameters passed to the Java VM when running Maven
+# e.g. to debug Maven itself, use
+# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
+# MAVEN_SKIP_RC - flag to disable loading of mavenrc files
+# ----------------------------------------------------------------------------
+
+if [ -z "$MAVEN_SKIP_RC" ] ; then
+
+ if [ -f /etc/mavenrc ] ; then
+ . /etc/mavenrc
+ fi
+
+ if [ -f "$HOME/.mavenrc" ] ; then
+ . "$HOME/.mavenrc"
+ fi
+
+fi
+
+# OS specific support. $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+mingw=false
+case "`uname`" in
+ CYGWIN*) cygwin=true ;;
+ MINGW*) mingw=true;;
+ Darwin*) darwin=true
+ # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
+ # See https://developer.apple.com/library/mac/qa/qa1170/_index.html
+ if [ -z "$JAVA_HOME" ]; then
+ if [ -x "/usr/libexec/java_home" ]; then
+ export JAVA_HOME="`/usr/libexec/java_home`"
+ else
+ export JAVA_HOME="/Library/Java/Home"
+ fi
+ fi
+ ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+ if [ -r /etc/gentoo-release ] ; then
+ JAVA_HOME=`java-config --jre-home`
+ fi
+fi
+
+if [ -z "$M2_HOME" ] ; then
+ ## resolve links - $0 may be a link to maven's home
+ PRG="$0"
+
+ # need this for relative symlinks
+ while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG="`dirname "$PRG"`/$link"
+ fi
+ done
+
+ saveddir=`pwd`
+
+ M2_HOME=`dirname "$PRG"`/..
+
+ # make it fully qualified
+ M2_HOME=`cd "$M2_HOME" && pwd`
+
+ cd "$saveddir"
+ # echo Using m2 at $M2_HOME
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME=`cygpath --unix "$M2_HOME"`
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] &&
+ CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# For Mingw, ensure paths are in UNIX format before anything is touched
+if $mingw ; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME="`(cd "$M2_HOME"; pwd)`"
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
+fi
+
+if [ -z "$JAVA_HOME" ]; then
+ javaExecutable="`which javac`"
+ if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
+ # readlink(1) is not available as standard on Solaris 10.
+ readLink=`which readlink`
+ if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
+ if $darwin ; then
+ javaHome="`dirname \"$javaExecutable\"`"
+ javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
+ else
+ javaExecutable="`readlink -f \"$javaExecutable\"`"
+ fi
+ javaHome="`dirname \"$javaExecutable\"`"
+ javaHome=`expr "$javaHome" : '\(.*\)/bin'`
+ JAVA_HOME="$javaHome"
+ export JAVA_HOME
+ fi
+ fi
+fi
+
+if [ -z "$JAVACMD" ] ; then
+ if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ else
+ JAVACMD="`which java`"
+ fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+ echo "Error: JAVA_HOME is not defined correctly." >&2
+ echo " We cannot execute $JAVACMD" >&2
+ exit 1
+fi
+
+if [ -z "$JAVA_HOME" ] ; then
+ echo "Warning: JAVA_HOME environment variable is not set."
+fi
+
+CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
+
+# traverses directory structure from process work directory to filesystem root
+# first directory with .mvn subdirectory is considered project base directory
+find_maven_basedir() {
+
+ if [ -z "$1" ]
+ then
+ echo "Path not specified to find_maven_basedir"
+ return 1
+ fi
+
+ basedir="$1"
+ wdir="$1"
+ while [ "$wdir" != '/' ] ; do
+ if [ -d "$wdir"/.mvn ] ; then
+ basedir=$wdir
+ break
+ fi
+ # workaround for JBEAP-8937 (on Solaris 10/Sparc)
+ if [ -d "${wdir}" ]; then
+ wdir=`cd "$wdir/.."; pwd`
+ fi
+ # end of workaround
+ done
+ echo "${basedir}"
+}
+
+# concatenates all lines of a file
+concat_lines() {
+ if [ -f "$1" ]; then
+ echo "$(tr -s '\n' ' ' < "$1")"
+ fi
+}
+
+BASE_DIR=`find_maven_basedir "$(pwd)"`
+if [ -z "$BASE_DIR" ]; then
+ exit 1;
+fi
+
+##########################################################################################
+# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
+# This allows using the maven wrapper in projects that prohibit checking in binary data.
+##########################################################################################
+if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found .mvn/wrapper/maven-wrapper.jar"
+ fi
+else
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..."
+ fi
+ if [ -n "$MVNW_REPOURL" ]; then
+ jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
+ else
+ jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
+ fi
+ while IFS="=" read key value; do
+ case "$key" in (wrapperUrl) jarUrl="$value"; break ;;
+ esac
+ done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties"
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Downloading from: $jarUrl"
+ fi
+ wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar"
+ if $cygwin; then
+ wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"`
+ fi
+
+ if command -v wget > /dev/null; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found wget ... using wget"
+ fi
+ if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+ wget "$jarUrl" -O "$wrapperJarPath"
+ else
+ wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath"
+ fi
+ elif command -v curl > /dev/null; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found curl ... using curl"
+ fi
+ if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+ curl -o "$wrapperJarPath" "$jarUrl" -f
+ else
+ curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f
+ fi
+
+ else
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Falling back to using Java to download"
+ fi
+ javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java"
+ # For Cygwin, switch paths to Windows format before running javac
+ if $cygwin; then
+ javaClass=`cygpath --path --windows "$javaClass"`
+ fi
+ if [ -e "$javaClass" ]; then
+ if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo " - Compiling MavenWrapperDownloader.java ..."
+ fi
+ # Compiling the Java class
+ ("$JAVA_HOME/bin/javac" "$javaClass")
+ fi
+ if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
+ # Running the downloader
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo " - Running MavenWrapperDownloader.java ..."
+ fi
+ ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR")
+ fi
+ fi
+ fi
+fi
+##########################################################################################
+# End of extension
+##########################################################################################
+
+export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
+if [ "$MVNW_VERBOSE" = true ]; then
+ echo $MAVEN_PROJECTBASEDIR
+fi
+MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME=`cygpath --path --windows "$M2_HOME"`
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] &&
+ CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+ [ -n "$MAVEN_PROJECTBASEDIR" ] &&
+ MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
+fi
+
+# Provide a "standardized" way to retrieve the CLI args that will
+# work with both Windows and non-Windows executions.
+MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@"
+export MAVEN_CMD_LINE_ARGS
+
+WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
+
+exec "$JAVACMD" \
+ $MAVEN_OPTS \
+ -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
+ "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
+ ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"
diff --git a/adamic-adar/pom.xml b/adamic-adar/pom.xml
new file mode 100644
index 0000000..65631d3
--- /dev/null
+++ b/adamic-adar/pom.xml
@@ -0,0 +1,95 @@
+
+ 4.0.0
+
+ org.hua
+ adamicadar
+ 0.1
+ jar
+
+ AdamicAdar
+ http://maven.apache.org
+
+
+ UTF-8
+
+
+
+
+ junit
+ junit
+ 4.8.2
+ test
+
+
+ org.apache.hadoop
+ hadoop-client
+ 3.2.0
+
+
+ javax.servlet
+ servlet-api
+
+
+ provided
+
+
+ org.apache.spark
+ spark-core_2.13
+ 3.2.0
+ provided
+
+
+ com.google.guava
+ guava
+ 11.0.2
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.12.0
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 2.5.1
+
+ 8
+ 8
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 2.3
+
+
+
+ package
+
+ shade
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/adamic-adar/src/main/java/org/spark/AdamicAdar.java b/adamic-adar/src/main/java/org/spark/AdamicAdar.java
new file mode 100644
index 0000000..686e712
--- /dev/null
+++ b/adamic-adar/src/main/java/org/spark/AdamicAdar.java
@@ -0,0 +1,139 @@
+package org.spark;
+
+import java.util.*;
+import java.util.regex.Pattern;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import scala.Tuple2;
+
+public class AdamicAdar {
+
+ private static final Pattern SPACE = Pattern.compile("[ \\t\\x0B\\f\\r]+");
+
+ public static List> adamicAdar(JavaRDD lines, Integer numOfDisplayedScores) {
+
+ //create an RDD of edges containing both (a, b) and (b, a)
+ JavaPairRDD edges = lines.mapToPair((s) -> {
+ String[] tokens = SPACE.split(s);
+
+ return new Tuple2<>(tokens[0], tokens[1]);
+
+ });
+
+ edges.cache();
+
+ //1) mapToPair
+ // transform the edges to instances of (node, 1) where "1" corresponds to 1 neighbor
+ //2) reduceByKey
+ //create pairs (node, total neighbors)
+ JavaPairRDD nodesWithNeighborsCount = edges.mapToPair((s) -> {
+ return (new Tuple2<>(s._1(), 1));
+ }).reduceByKey((v1, v2) -> v1 + v2);
+
+ //1)
+ //join the edges with a copy of themselves to create edges ->
+ //-> of nodes connected to the neighbors of their neighbors
+ // output is a set of pairs of (common-neighbor, [node1, node2])
+ //2)
+ //this set of pairs also contain:
+ // 1. -> pairs of nodes with the type of (a, a)
+ // 2. -> all the reverse edges of the resulting edges e.g. both (a, b) and (b, a)
+ // 3. -> pairs of already connected nodes (due to exsiting connections of nodes with common neighbors)
+ JavaPairRDD> joinedEdges = edges.join(edges);
+
+ //caching
+ joinedEdges.cache();
+
+ //1) flatMapToPair + distinct
+ //remove the reversed edges and the edges of type (a, a) and keep only the distinct values of the result
+ //2) subtract
+ //subtract the existing edges from the new filtered result in order to keep the unconnected edges only
+ JavaPairRDD unconnectedEdgesRaw = joinedEdges.flatMapToPair(s -> {
+ ArrayList> arrayList = new ArrayList<>();
+ if (Integer.parseInt(s._2()._1()) < Integer.parseInt(s._2()._2())) {
+ arrayList.add(s._2());
+ }
+ return arrayList.iterator();
+ }).distinct().subtract(edges);
+
+
+ //write the unconnected edges in the form of (node1 node2, -1) to allow the future join action (explained below)
+ //-1 is dummy info
+ JavaPairRDD unconnectedEdges = unconnectedEdgesRaw
+ .mapToPair(s -> new Tuple2<>(s._1() + " <-> " + s._2(), -1));
+
+ //joinedEdges contain pairs of type (common-neighbor, (node1, node2))
+ //filter again the joinedEdges RDD by removing pairs of (a,a) and the reverse edges
+ JavaPairRDD> filteredJoinedEdges = joinedEdges.filter(s -> {
+ return Integer.parseInt(s._2()._1()) < Integer.parseInt(s._2()._2());
+ });
+
+ //1) join
+ //join the filteredJoinedEdges with the nodesWithNeighborsCount to create a set of pairs like below:
+ //for an edge (a, b) with common neighbors n1, n2 we create pairs of (n1, [(a,b), n1-total-neighbors]),
+ // (n2, [(a, b), n2-total-neighbors])
+ //2) mapToPair
+ // create a new set of pairs with type (node1 node2, common-neighbor-total-neighbors)
+ // for the above example it will produce (a b, n1-total-neighbors), (a b, n2-total-neighbors)
+ //groupByKey
+ // groupByKey will transform the flatMapToPair output to pairs of type ->
+ // -> (a b, [n1-total-neighbors, n2-total-neighbors])
+ JavaPairRDD> adamicAdarParameters = filteredJoinedEdges.join(nodesWithNeighborsCount)
+ .mapToPair(s -> {
+ return new Tuple2<>(s._2()._1()._1() + " <-> " + s._2()._1()._2(), s._2()._2());
+ }).groupByKey();
+
+ //now we are ready to compute adamic adar scores!
+ //1) join
+ // the join with the previously computed RDD of unconnected edges of type (unconnected-edge, -1) ->
+ //-> will let us keep the unconnected edges only in the final result
+ //2) mapValues
+ //in the mapValues we compute the AdamicAdar Scores
+ JavaPairRDD adamicAdarScores = adamicAdarParameters.join(unconnectedEdges).mapValues(v -> {
+ Iterable countsOfNeighborsOfCommonNeighbors = v._1();
+ double adamicAdar = 0;
+ for (Integer n : countsOfNeighborsOfCommonNeighbors) {
+ adamicAdar += (1 / Math.log10(n));
+ }
+ return adamicAdar;
+ });
+
+ //reverse the resulting tuples for the sortByKey below
+ JavaPairRDD reversedTuples = adamicAdarScores.mapToPair(s -> {
+ return new Tuple2<>(s._2(), s._1());
+ });
+
+ //return the sorted result
+ return reversedTuples.sortByKey(false).take(numOfDisplayedScores);
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ if (args.length < 2) {
+ System.err.println("Usage: AdamicAdar ");
+ System.exit(1);
+ }
+
+ SparkConf sparkConf = new SparkConf().setAppName("AdamicAdar").setMaster("local[*]");
+ JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+ //exclude the lines containing comments
+ JavaRDD lines = sc.textFile(args[0]).filter(l -> !l.contains("#"));
+
+ //calculate adamic adar scores for every unconnected edge of two nodes with at least one common neighbor
+ List> aaScores = adamicAdar(lines, Integer.parseInt(args[1]));
+
+ //print the result
+ for (Tuple2 score : aaScores) {
+ System.out.println(String.format("%.5f", score._1()) + ", " + score._2());
+ }
+
+ sc.stop();
+
+ }
+
+}
diff --git a/common-neighbors/.mvn/wrapper/MavenWrapperDownloader.java b/common-neighbors/.mvn/wrapper/MavenWrapperDownloader.java
new file mode 100644
index 0000000..e76d1f3
--- /dev/null
+++ b/common-neighbors/.mvn/wrapper/MavenWrapperDownloader.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2007-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.net.*;
+import java.io.*;
+import java.nio.channels.*;
+import java.util.Properties;
+
+public class MavenWrapperDownloader {
+
+ private static final String WRAPPER_VERSION = "0.5.6";
+ /**
+ * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
+ */
+ private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
+ + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
+
+ /**
+ * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
+ * use instead of the default one.
+ */
+ private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
+ ".mvn/wrapper/maven-wrapper.properties";
+
+ /**
+ * Path where the maven-wrapper.jar will be saved to.
+ */
+ private static final String MAVEN_WRAPPER_JAR_PATH =
+ ".mvn/wrapper/maven-wrapper.jar";
+
+ /**
+ * Name of the property which should be used to override the default download url for the wrapper.
+ */
+ private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
+
+ public static void main(String args[]) {
+ System.out.println("- Downloader started");
+ File baseDirectory = new File(args[0]);
+ System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
+
+ // If the maven-wrapper.properties exists, read it and check if it contains a custom
+ // wrapperUrl parameter.
+ File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
+ String url = DEFAULT_DOWNLOAD_URL;
+ if(mavenWrapperPropertyFile.exists()) {
+ FileInputStream mavenWrapperPropertyFileInputStream = null;
+ try {
+ mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
+ Properties mavenWrapperProperties = new Properties();
+ mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
+ url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
+ } catch (IOException e) {
+ System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
+ } finally {
+ try {
+ if(mavenWrapperPropertyFileInputStream != null) {
+ mavenWrapperPropertyFileInputStream.close();
+ }
+ } catch (IOException e) {
+ // Ignore ...
+ }
+ }
+ }
+ System.out.println("- Downloading from: " + url);
+
+ File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
+ if(!outputFile.getParentFile().exists()) {
+ if(!outputFile.getParentFile().mkdirs()) {
+ System.out.println(
+ "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
+ }
+ }
+ System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
+ try {
+ downloadFileFromURL(url, outputFile);
+ System.out.println("Done");
+ System.exit(0);
+ } catch (Throwable e) {
+ System.out.println("- Error downloading");
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ private static void downloadFileFromURL(String urlString, File destination) throws Exception {
+ if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
+ String username = System.getenv("MVNW_USERNAME");
+ char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
+ Authenticator.setDefault(new Authenticator() {
+ @Override
+ protected PasswordAuthentication getPasswordAuthentication() {
+ return new PasswordAuthentication(username, password);
+ }
+ });
+ }
+ URL website = new URL(urlString);
+ ReadableByteChannel rbc;
+ rbc = Channels.newChannel(website.openStream());
+ FileOutputStream fos = new FileOutputStream(destination);
+ fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
+ fos.close();
+ rbc.close();
+ }
+
+}
diff --git a/common-neighbors/.mvn/wrapper/maven-wrapper.jar b/common-neighbors/.mvn/wrapper/maven-wrapper.jar
new file mode 100644
index 0000000..2cc7d4a
Binary files /dev/null and b/common-neighbors/.mvn/wrapper/maven-wrapper.jar differ
diff --git a/common-neighbors/.mvn/wrapper/maven-wrapper.properties b/common-neighbors/.mvn/wrapper/maven-wrapper.properties
new file mode 100644
index 0000000..ffdc10e
--- /dev/null
+++ b/common-neighbors/.mvn/wrapper/maven-wrapper.properties
@@ -0,0 +1,2 @@
+distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.1/apache-maven-3.8.1-bin.zip
+wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
diff --git a/common-neighbors/dependency-reduced-pom.xml b/common-neighbors/dependency-reduced-pom.xml
new file mode 100644
index 0000000..b20595e
--- /dev/null
+++ b/common-neighbors/dependency-reduced-pom.xml
@@ -0,0 +1,341 @@
+
+
+ 4.0.0
+ org.hua
+ commonneighbors
+ CommonNeighbors
+ 0.1
+ http://maven.apache.org
+
+
+
+ maven-compiler-plugin
+ 2.5.1
+
+ 8
+ 8
+
+
+
+ maven-shade-plugin
+ 2.3
+
+
+ package
+
+ shade
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+
+ junit
+ junit
+ 4.8.2
+ test
+
+
+ org.apache.hadoop
+ hadoop-client
+ 3.2.0
+ provided
+
+
+ servlet-api
+ javax.servlet
+
+
+ hadoop-common
+ org.apache.hadoop
+
+
+ hadoop-hdfs-client
+ org.apache.hadoop
+
+
+ hadoop-yarn-api
+ org.apache.hadoop
+
+
+ hadoop-yarn-client
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-core
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-jobclient
+ org.apache.hadoop
+
+
+ hadoop-annotations
+ org.apache.hadoop
+
+
+
+
+ org.apache.spark
+ spark-core_2.13
+ 3.2.0
+ provided
+
+
+ scala-parallel-collections_2.13
+ org.scala-lang.modules
+
+
+ avro
+ org.apache.avro
+
+
+ avro-mapred
+ org.apache.avro
+
+
+ chill_2.13
+ com.twitter
+
+
+ chill-java
+ com.twitter
+
+
+ xbean-asm9-shaded
+ org.apache.xbean
+
+
+ hadoop-client-api
+ org.apache.hadoop
+
+
+ hadoop-client-runtime
+ org.apache.hadoop
+
+
+ spark-launcher_2.13
+ org.apache.spark
+
+
+ spark-kvstore_2.13
+ org.apache.spark
+
+
+ spark-network-common_2.13
+ org.apache.spark
+
+
+ spark-network-shuffle_2.13
+ org.apache.spark
+
+
+ spark-unsafe_2.13
+ org.apache.spark
+
+
+ activation
+ javax.activation
+
+
+ curator-recipes
+ org.apache.curator
+
+
+ zookeeper
+ org.apache.zookeeper
+
+
+ jakarta.servlet-api
+ jakarta.servlet
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-lang3
+ org.apache.commons
+
+
+ commons-math3
+ org.apache.commons
+
+
+ commons-text
+ org.apache.commons
+
+
+ commons-io
+ commons-io
+
+
+ commons-collections
+ commons-collections
+
+
+ slf4j-api
+ org.slf4j
+
+
+ jul-to-slf4j
+ org.slf4j
+
+
+ jcl-over-slf4j
+ org.slf4j
+
+
+ log4j
+ log4j
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ compress-lzf
+ com.ning
+
+
+ snappy-java
+ org.xerial.snappy
+
+
+ lz4-java
+ org.lz4
+
+
+ zstd-jni
+ com.github.luben
+
+
+ RoaringBitmap
+ org.roaringbitmap
+
+
+ commons-net
+ commons-net
+
+
+ scala-xml_2.13
+ org.scala-lang.modules
+
+
+ scala-library
+ org.scala-lang
+
+
+ scala-reflect
+ org.scala-lang
+
+
+ json4s-jackson_2.13
+ org.json4s
+
+
+ jersey-client
+ org.glassfish.jersey.core
+
+
+ jersey-common
+ org.glassfish.jersey.core
+
+
+ jersey-server
+ org.glassfish.jersey.core
+
+
+ jersey-container-servlet
+ org.glassfish.jersey.containers
+
+
+ jersey-container-servlet-core
+ org.glassfish.jersey.containers
+
+
+ jersey-hk2
+ org.glassfish.jersey.inject
+
+
+ netty-all
+ io.netty
+
+
+ stream
+ com.clearspring.analytics
+
+
+ metrics-core
+ io.dropwizard.metrics
+
+
+ metrics-jvm
+ io.dropwizard.metrics
+
+
+ metrics-json
+ io.dropwizard.metrics
+
+
+ metrics-graphite
+ io.dropwizard.metrics
+
+
+ metrics-jmx
+ io.dropwizard.metrics
+
+
+ jackson-module-scala_2.13
+ com.fasterxml.jackson.module
+
+
+ ivy
+ org.apache.ivy
+
+
+ oro
+ oro
+
+
+ pyrolite
+ net.razorvine
+
+
+ py4j
+ net.sf.py4j
+
+
+ spark-tags_2.13
+ org.apache.spark
+
+
+ commons-crypto
+ org.apache.commons
+
+
+ unused
+ org.spark-project.spark
+
+
+
+
+
+ UTF-8
+
+
+
diff --git a/common-neighbors/mvnw b/common-neighbors/mvnw
new file mode 100755
index 0000000..a16b543
--- /dev/null
+++ b/common-neighbors/mvnw
@@ -0,0 +1,310 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ----------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------
+# Maven Start Up Batch script
+#
+# Required ENV vars:
+# ------------------
+# JAVA_HOME - location of a JDK home dir
+#
+# Optional ENV vars
+# -----------------
+# M2_HOME - location of maven2's installed home dir
+# MAVEN_OPTS - parameters passed to the Java VM when running Maven
+# e.g. to debug Maven itself, use
+# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
+# MAVEN_SKIP_RC - flag to disable loading of mavenrc files
+# ----------------------------------------------------------------------------
+
+if [ -z "$MAVEN_SKIP_RC" ] ; then
+
+ if [ -f /etc/mavenrc ] ; then
+ . /etc/mavenrc
+ fi
+
+ if [ -f "$HOME/.mavenrc" ] ; then
+ . "$HOME/.mavenrc"
+ fi
+
+fi
+
+# OS specific support. $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+mingw=false
+case "`uname`" in
+ CYGWIN*) cygwin=true ;;
+ MINGW*) mingw=true;;
+ Darwin*) darwin=true
+ # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
+ # See https://developer.apple.com/library/mac/qa/qa1170/_index.html
+ if [ -z "$JAVA_HOME" ]; then
+ if [ -x "/usr/libexec/java_home" ]; then
+ export JAVA_HOME="`/usr/libexec/java_home`"
+ else
+ export JAVA_HOME="/Library/Java/Home"
+ fi
+ fi
+ ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+ if [ -r /etc/gentoo-release ] ; then
+ JAVA_HOME=`java-config --jre-home`
+ fi
+fi
+
+if [ -z "$M2_HOME" ] ; then
+ ## resolve links - $0 may be a link to maven's home
+ PRG="$0"
+
+ # need this for relative symlinks
+ while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG="`dirname "$PRG"`/$link"
+ fi
+ done
+
+ saveddir=`pwd`
+
+ M2_HOME=`dirname "$PRG"`/..
+
+ # make it fully qualified
+ M2_HOME=`cd "$M2_HOME" && pwd`
+
+ cd "$saveddir"
+ # echo Using m2 at $M2_HOME
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME=`cygpath --unix "$M2_HOME"`
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] &&
+ CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# For Mingw, ensure paths are in UNIX format before anything is touched
+if $mingw ; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME="`(cd "$M2_HOME"; pwd)`"
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
+fi
+
+if [ -z "$JAVA_HOME" ]; then
+ javaExecutable="`which javac`"
+ if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
+ # readlink(1) is not available as standard on Solaris 10.
+ readLink=`which readlink`
+ if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
+ if $darwin ; then
+ javaHome="`dirname \"$javaExecutable\"`"
+ javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
+ else
+ javaExecutable="`readlink -f \"$javaExecutable\"`"
+ fi
+ javaHome="`dirname \"$javaExecutable\"`"
+ javaHome=`expr "$javaHome" : '\(.*\)/bin'`
+ JAVA_HOME="$javaHome"
+ export JAVA_HOME
+ fi
+ fi
+fi
+
+if [ -z "$JAVACMD" ] ; then
+ if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ else
+ JAVACMD="`which java`"
+ fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+ echo "Error: JAVA_HOME is not defined correctly." >&2
+ echo " We cannot execute $JAVACMD" >&2
+ exit 1
+fi
+
+if [ -z "$JAVA_HOME" ] ; then
+ echo "Warning: JAVA_HOME environment variable is not set."
+fi
+
+CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
+
+# traverses directory structure from process work directory to filesystem root
+# first directory with .mvn subdirectory is considered project base directory
+find_maven_basedir() {
+
+ if [ -z "$1" ]
+ then
+ echo "Path not specified to find_maven_basedir"
+ return 1
+ fi
+
+ basedir="$1"
+ wdir="$1"
+ while [ "$wdir" != '/' ] ; do
+ if [ -d "$wdir"/.mvn ] ; then
+ basedir=$wdir
+ break
+ fi
+ # workaround for JBEAP-8937 (on Solaris 10/Sparc)
+ if [ -d "${wdir}" ]; then
+ wdir=`cd "$wdir/.."; pwd`
+ fi
+ # end of workaround
+ done
+ echo "${basedir}"
+}
+
+# concatenates all lines of a file
+concat_lines() {
+ if [ -f "$1" ]; then
+ echo "$(tr -s '\n' ' ' < "$1")"
+ fi
+}
+
+BASE_DIR=`find_maven_basedir "$(pwd)"`
+if [ -z "$BASE_DIR" ]; then
+ exit 1;
+fi
+
+##########################################################################################
+# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
+# This allows using the maven wrapper in projects that prohibit checking in binary data.
+##########################################################################################
+if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found .mvn/wrapper/maven-wrapper.jar"
+ fi
+else
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..."
+ fi
+ if [ -n "$MVNW_REPOURL" ]; then
+ jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
+ else
+ jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
+ fi
+ while IFS="=" read key value; do
+ case "$key" in (wrapperUrl) jarUrl="$value"; break ;;
+ esac
+ done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties"
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Downloading from: $jarUrl"
+ fi
+ wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar"
+ if $cygwin; then
+ wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"`
+ fi
+
+ if command -v wget > /dev/null; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found wget ... using wget"
+ fi
+ if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+ wget "$jarUrl" -O "$wrapperJarPath"
+ else
+ wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath"
+ fi
+ elif command -v curl > /dev/null; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found curl ... using curl"
+ fi
+ if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+ curl -o "$wrapperJarPath" "$jarUrl" -f
+ else
+ curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f
+ fi
+
+ else
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Falling back to using Java to download"
+ fi
+ javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java"
+ # For Cygwin, switch paths to Windows format before running javac
+ if $cygwin; then
+ javaClass=`cygpath --path --windows "$javaClass"`
+ fi
+ if [ -e "$javaClass" ]; then
+ if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo " - Compiling MavenWrapperDownloader.java ..."
+ fi
+ # Compiling the Java class
+ ("$JAVA_HOME/bin/javac" "$javaClass")
+ fi
+ if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
+ # Running the downloader
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo " - Running MavenWrapperDownloader.java ..."
+ fi
+ ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR")
+ fi
+ fi
+ fi
+fi
+##########################################################################################
+# End of extension
+##########################################################################################
+
+export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
+if [ "$MVNW_VERBOSE" = true ]; then
+ echo $MAVEN_PROJECTBASEDIR
+fi
+MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME=`cygpath --path --windows "$M2_HOME"`
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] &&
+ CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+ [ -n "$MAVEN_PROJECTBASEDIR" ] &&
+ MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
+fi
+
+# Provide a "standardized" way to retrieve the CLI args that will
+# work with both Windows and non-Windows executions.
+MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@"
+export MAVEN_CMD_LINE_ARGS
+
+WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
+
+exec "$JAVACMD" \
+ $MAVEN_OPTS \
+ -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
+ "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
+ ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"
diff --git a/common-neighbors/pom.xml b/common-neighbors/pom.xml
new file mode 100644
index 0000000..9d79c88
--- /dev/null
+++ b/common-neighbors/pom.xml
@@ -0,0 +1,95 @@
+
+ 4.0.0
+
+ org.hua
+ commonneighbors
+ 0.1
+ jar
+
+ CommonNeighbors
+ http://maven.apache.org
+
+
+ UTF-8
+
+
+
+
+ junit
+ junit
+ 4.8.2
+ test
+
+
+ org.apache.hadoop
+ hadoop-client
+ 3.2.0
+
+
+ javax.servlet
+ servlet-api
+
+
+ provided
+
+
+ org.apache.spark
+ spark-core_2.13
+ 3.2.0
+ provided
+
+
+ com.google.guava
+ guava
+ 11.0.2
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.12.0
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 2.5.1
+
+ 8
+ 8
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 2.3
+
+
+
+ package
+
+ shade
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/common-neighbors/src/main/java/org/spark/CommonNeighbors.java b/common-neighbors/src/main/java/org/spark/CommonNeighbors.java
new file mode 100644
index 0000000..e3d2cda
--- /dev/null
+++ b/common-neighbors/src/main/java/org/spark/CommonNeighbors.java
@@ -0,0 +1,105 @@
+package org.spark;
+
+import java.util.*;
+import java.util.regex.Pattern;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import scala.Tuple2;
+
+public class CommonNeighbors {
+
+ private static final Pattern SPACE = Pattern.compile("[ \\t\\x0B\\f\\r]+");
+
+ public static List> commonNeighbors(JavaRDD lines, Integer numOfDisplayedScores) {
+
+ //create an RDD of edges containing both (a, b) and (b, a)
+ //also ignore lines with comments
+ JavaPairRDD edges = lines.flatMapToPair(s -> {
+ String[] tokens = s.split(SPACE.pattern());
+ ArrayList> arrayList = new ArrayList<>();
+ if (!s.contains("#")) {
+ arrayList.add(new Tuple2<>(tokens[0], tokens[1]));
+ }
+ return arrayList.iterator();
+ });
+
+ //cache the result
+ edges.cache();
+
+ //1)
+ //join the edges with a copy of themselves to create edges ->
+ //-> of nodes connected to the neighbors of their neighbors
+ // output is a set of pairs of (common-neighbor, [node1, node2])
+ //2)
+ //this is not yet the desired outcome because the result will also contain:
+ // 1. -> pairs of nodes with the type of (a, a)
+ // 2. -> all the reverse edges of the resulting edges e.g. both (a, b) and (b, a)
+ // 3. -> pairs of already connected nodes (due to exsiting connections of nodes with common neighbors)
+ JavaPairRDD> joinedEdges = edges.join(edges);
+
+ //filter the previous result by removing all the (a, a) pairs
+ //remove also the reverse edges by keeping only (a,b) where a < b
+ JavaPairRDD tempResults = joinedEdges.flatMapToPair(s -> {
+ ArrayList> arrayList = new ArrayList<>();
+ if (Integer.parseInt(s._2()._1()) < Integer.parseInt(s._2()._2())) {
+ arrayList.add(s._2());
+ }
+ return arrayList.iterator();
+ });
+
+ //1) subtract
+ //subtract the existing edges of the graph from the filtered remaining edges of the previous result ->
+ //-> in order to keep the edges of unconnected nodes only
+ //2) mapToPair
+ // every instance of a pair of unconnected nodes is equal to a common neighbor of them
+ // Because of that, map every instance with value "1" for the future reduction
+ //3)
+ // result will now contain pairs of (a<->b, 1)
+ JavaPairRDD unconnectedEdgeInstances = tempResults.subtract(edges).mapToPair(s -> {
+ return new Tuple2(s._1() + " <-> " + s._2(), 1);
+ });
+
+ //1) reduceByKey
+ //this is the final result containing pairs of (edge-of-unconnected-nodes, number-of-common-neighbors)
+ JavaPairRDD scores = unconnectedEdgeInstances.reduceByKey((v1, v2) -> v1 + v2);
+
+ //reverse the resulting tuples a for the future sortByKey
+ JavaPairRDD reversedTuples = scores.mapToPair(s -> {
+ return new Tuple2<>(s._2(), s._1());
+ });
+
+ //return the top scores of the sorted results
+ //numOfDisplayedScores defines the number of scores to return
+ return reversedTuples.sortByKey(false).take(numOfDisplayedScores);
+ }
+
+
+ public static void main(String[] args) throws Exception {
+
+ if (args.length < 2) {
+ System.err.println("Usage: Arguments must be ");
+ System.exit(1);
+ }
+
+ SparkConf sparkConf = new SparkConf().setAppName("Common Neighbors").setMaster("local[*]");
+ JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+ JavaRDD lines = sc.textFile(args[0]);
+
+ //calculate common neighbors metric
+ List> cnScores = commonNeighbors(lines, Integer.parseInt(args[1]));
+
+ //print the result
+ for (Tuple2 score : cnScores) {
+ System.out.println(score);
+ }
+
+ sc.stop();
+
+ }
+
+}
diff --git a/jaccard-coefficient/.mvn/wrapper/MavenWrapperDownloader.java b/jaccard-coefficient/.mvn/wrapper/MavenWrapperDownloader.java
new file mode 100644
index 0000000..e76d1f3
--- /dev/null
+++ b/jaccard-coefficient/.mvn/wrapper/MavenWrapperDownloader.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2007-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.net.*;
+import java.io.*;
+import java.nio.channels.*;
+import java.util.Properties;
+
+public class MavenWrapperDownloader {
+
+ private static final String WRAPPER_VERSION = "0.5.6";
+ /**
+ * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
+ */
+ private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
+ + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
+
+ /**
+ * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
+ * use instead of the default one.
+ */
+ private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
+ ".mvn/wrapper/maven-wrapper.properties";
+
+ /**
+ * Path where the maven-wrapper.jar will be saved to.
+ */
+ private static final String MAVEN_WRAPPER_JAR_PATH =
+ ".mvn/wrapper/maven-wrapper.jar";
+
+ /**
+ * Name of the property which should be used to override the default download url for the wrapper.
+ */
+ private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
+
+ public static void main(String args[]) {
+ System.out.println("- Downloader started");
+ File baseDirectory = new File(args[0]);
+ System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
+
+ // If the maven-wrapper.properties exists, read it and check if it contains a custom
+ // wrapperUrl parameter.
+ File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
+ String url = DEFAULT_DOWNLOAD_URL;
+ if(mavenWrapperPropertyFile.exists()) {
+ FileInputStream mavenWrapperPropertyFileInputStream = null;
+ try {
+ mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
+ Properties mavenWrapperProperties = new Properties();
+ mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
+ url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
+ } catch (IOException e) {
+ System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
+ } finally {
+ try {
+ if(mavenWrapperPropertyFileInputStream != null) {
+ mavenWrapperPropertyFileInputStream.close();
+ }
+ } catch (IOException e) {
+ // Ignore ...
+ }
+ }
+ }
+ System.out.println("- Downloading from: " + url);
+
+ File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
+ if(!outputFile.getParentFile().exists()) {
+ if(!outputFile.getParentFile().mkdirs()) {
+ System.out.println(
+ "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
+ }
+ }
+ System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
+ try {
+ downloadFileFromURL(url, outputFile);
+ System.out.println("Done");
+ System.exit(0);
+ } catch (Throwable e) {
+ System.out.println("- Error downloading");
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ private static void downloadFileFromURL(String urlString, File destination) throws Exception {
+ if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
+ String username = System.getenv("MVNW_USERNAME");
+ char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
+ Authenticator.setDefault(new Authenticator() {
+ @Override
+ protected PasswordAuthentication getPasswordAuthentication() {
+ return new PasswordAuthentication(username, password);
+ }
+ });
+ }
+ URL website = new URL(urlString);
+ ReadableByteChannel rbc;
+ rbc = Channels.newChannel(website.openStream());
+ FileOutputStream fos = new FileOutputStream(destination);
+ fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
+ fos.close();
+ rbc.close();
+ }
+
+}
diff --git a/jaccard-coefficient/.mvn/wrapper/maven-wrapper.jar b/jaccard-coefficient/.mvn/wrapper/maven-wrapper.jar
new file mode 100644
index 0000000..2cc7d4a
Binary files /dev/null and b/jaccard-coefficient/.mvn/wrapper/maven-wrapper.jar differ
diff --git a/jaccard-coefficient/.mvn/wrapper/maven-wrapper.properties b/jaccard-coefficient/.mvn/wrapper/maven-wrapper.properties
new file mode 100644
index 0000000..ffdc10e
--- /dev/null
+++ b/jaccard-coefficient/.mvn/wrapper/maven-wrapper.properties
@@ -0,0 +1,2 @@
+distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.1/apache-maven-3.8.1-bin.zip
+wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
diff --git a/jaccard-coefficient/dependency-reduced-pom.xml b/jaccard-coefficient/dependency-reduced-pom.xml
new file mode 100644
index 0000000..a9aa905
--- /dev/null
+++ b/jaccard-coefficient/dependency-reduced-pom.xml
@@ -0,0 +1,341 @@
+
+
+ 4.0.0
+ org.hua
+ jaccardcoefficient
+ JaccardCoefficient
+ 0.1
+ http://maven.apache.org
+
+
+
+ maven-compiler-plugin
+ 2.5.1
+
+ 8
+ 8
+
+
+
+ maven-shade-plugin
+ 2.3
+
+
+ package
+
+ shade
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+
+ junit
+ junit
+ 4.8.2
+ test
+
+
+ org.apache.hadoop
+ hadoop-client
+ 3.2.0
+ provided
+
+
+ servlet-api
+ javax.servlet
+
+
+ hadoop-common
+ org.apache.hadoop
+
+
+ hadoop-hdfs-client
+ org.apache.hadoop
+
+
+ hadoop-yarn-api
+ org.apache.hadoop
+
+
+ hadoop-yarn-client
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-core
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-jobclient
+ org.apache.hadoop
+
+
+ hadoop-annotations
+ org.apache.hadoop
+
+
+
+
+ org.apache.spark
+ spark-core_2.13
+ 3.2.0
+ provided
+
+
+ scala-parallel-collections_2.13
+ org.scala-lang.modules
+
+
+ avro
+ org.apache.avro
+
+
+ avro-mapred
+ org.apache.avro
+
+
+ chill_2.13
+ com.twitter
+
+
+ chill-java
+ com.twitter
+
+
+ xbean-asm9-shaded
+ org.apache.xbean
+
+
+ hadoop-client-api
+ org.apache.hadoop
+
+
+ hadoop-client-runtime
+ org.apache.hadoop
+
+
+ spark-launcher_2.13
+ org.apache.spark
+
+
+ spark-kvstore_2.13
+ org.apache.spark
+
+
+ spark-network-common_2.13
+ org.apache.spark
+
+
+ spark-network-shuffle_2.13
+ org.apache.spark
+
+
+ spark-unsafe_2.13
+ org.apache.spark
+
+
+ activation
+ javax.activation
+
+
+ curator-recipes
+ org.apache.curator
+
+
+ zookeeper
+ org.apache.zookeeper
+
+
+ jakarta.servlet-api
+ jakarta.servlet
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-lang3
+ org.apache.commons
+
+
+ commons-math3
+ org.apache.commons
+
+
+ commons-text
+ org.apache.commons
+
+
+ commons-io
+ commons-io
+
+
+ commons-collections
+ commons-collections
+
+
+ slf4j-api
+ org.slf4j
+
+
+ jul-to-slf4j
+ org.slf4j
+
+
+ jcl-over-slf4j
+ org.slf4j
+
+
+ log4j
+ log4j
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ compress-lzf
+ com.ning
+
+
+ snappy-java
+ org.xerial.snappy
+
+
+ lz4-java
+ org.lz4
+
+
+ zstd-jni
+ com.github.luben
+
+
+ RoaringBitmap
+ org.roaringbitmap
+
+
+ commons-net
+ commons-net
+
+
+ scala-xml_2.13
+ org.scala-lang.modules
+
+
+ scala-library
+ org.scala-lang
+
+
+ scala-reflect
+ org.scala-lang
+
+
+ json4s-jackson_2.13
+ org.json4s
+
+
+ jersey-client
+ org.glassfish.jersey.core
+
+
+ jersey-common
+ org.glassfish.jersey.core
+
+
+ jersey-server
+ org.glassfish.jersey.core
+
+
+ jersey-container-servlet
+ org.glassfish.jersey.containers
+
+
+ jersey-container-servlet-core
+ org.glassfish.jersey.containers
+
+
+ jersey-hk2
+ org.glassfish.jersey.inject
+
+
+ netty-all
+ io.netty
+
+
+ stream
+ com.clearspring.analytics
+
+
+ metrics-core
+ io.dropwizard.metrics
+
+
+ metrics-jvm
+ io.dropwizard.metrics
+
+
+ metrics-json
+ io.dropwizard.metrics
+
+
+ metrics-graphite
+ io.dropwizard.metrics
+
+
+ metrics-jmx
+ io.dropwizard.metrics
+
+
+ jackson-module-scala_2.13
+ com.fasterxml.jackson.module
+
+
+ ivy
+ org.apache.ivy
+
+
+ oro
+ oro
+
+
+ pyrolite
+ net.razorvine
+
+
+ py4j
+ net.sf.py4j
+
+
+ spark-tags_2.13
+ org.apache.spark
+
+
+ commons-crypto
+ org.apache.commons
+
+
+ unused
+ org.spark-project.spark
+
+
+
+
+
+ UTF-8
+
+
+
diff --git a/jaccard-coefficient/mvnw b/jaccard-coefficient/mvnw
new file mode 100755
index 0000000..a16b543
--- /dev/null
+++ b/jaccard-coefficient/mvnw
@@ -0,0 +1,310 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ----------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------
+# Maven Start Up Batch script
+#
+# Required ENV vars:
+# ------------------
+# JAVA_HOME - location of a JDK home dir
+#
+# Optional ENV vars
+# -----------------
+# M2_HOME - location of maven2's installed home dir
+# MAVEN_OPTS - parameters passed to the Java VM when running Maven
+# e.g. to debug Maven itself, use
+# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
+# MAVEN_SKIP_RC - flag to disable loading of mavenrc files
+# ----------------------------------------------------------------------------
+
+if [ -z "$MAVEN_SKIP_RC" ] ; then
+
+ if [ -f /etc/mavenrc ] ; then
+ . /etc/mavenrc
+ fi
+
+ if [ -f "$HOME/.mavenrc" ] ; then
+ . "$HOME/.mavenrc"
+ fi
+
+fi
+
+# OS specific support. $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+mingw=false
+case "`uname`" in
+ CYGWIN*) cygwin=true ;;
+ MINGW*) mingw=true;;
+ Darwin*) darwin=true
+ # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
+ # See https://developer.apple.com/library/mac/qa/qa1170/_index.html
+ if [ -z "$JAVA_HOME" ]; then
+ if [ -x "/usr/libexec/java_home" ]; then
+ export JAVA_HOME="`/usr/libexec/java_home`"
+ else
+ export JAVA_HOME="/Library/Java/Home"
+ fi
+ fi
+ ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+ if [ -r /etc/gentoo-release ] ; then
+ JAVA_HOME=`java-config --jre-home`
+ fi
+fi
+
+if [ -z "$M2_HOME" ] ; then
+ ## resolve links - $0 may be a link to maven's home
+ PRG="$0"
+
+ # need this for relative symlinks
+ while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG="`dirname "$PRG"`/$link"
+ fi
+ done
+
+ saveddir=`pwd`
+
+ M2_HOME=`dirname "$PRG"`/..
+
+ # make it fully qualified
+ M2_HOME=`cd "$M2_HOME" && pwd`
+
+ cd "$saveddir"
+ # echo Using m2 at $M2_HOME
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME=`cygpath --unix "$M2_HOME"`
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] &&
+ CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# For Mingw, ensure paths are in UNIX format before anything is touched
+if $mingw ; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME="`(cd "$M2_HOME"; pwd)`"
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
+fi
+
+if [ -z "$JAVA_HOME" ]; then
+ javaExecutable="`which javac`"
+ if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
+ # readlink(1) is not available as standard on Solaris 10.
+ readLink=`which readlink`
+ if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
+ if $darwin ; then
+ javaHome="`dirname \"$javaExecutable\"`"
+ javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
+ else
+ javaExecutable="`readlink -f \"$javaExecutable\"`"
+ fi
+ javaHome="`dirname \"$javaExecutable\"`"
+ javaHome=`expr "$javaHome" : '\(.*\)/bin'`
+ JAVA_HOME="$javaHome"
+ export JAVA_HOME
+ fi
+ fi
+fi
+
+if [ -z "$JAVACMD" ] ; then
+ if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ else
+ JAVACMD="`which java`"
+ fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+ echo "Error: JAVA_HOME is not defined correctly." >&2
+ echo " We cannot execute $JAVACMD" >&2
+ exit 1
+fi
+
+if [ -z "$JAVA_HOME" ] ; then
+ echo "Warning: JAVA_HOME environment variable is not set."
+fi
+
+CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
+
+# traverses directory structure from process work directory to filesystem root
+# first directory with .mvn subdirectory is considered project base directory
+find_maven_basedir() {
+
+ if [ -z "$1" ]
+ then
+ echo "Path not specified to find_maven_basedir"
+ return 1
+ fi
+
+ basedir="$1"
+ wdir="$1"
+ while [ "$wdir" != '/' ] ; do
+ if [ -d "$wdir"/.mvn ] ; then
+ basedir=$wdir
+ break
+ fi
+ # workaround for JBEAP-8937 (on Solaris 10/Sparc)
+ if [ -d "${wdir}" ]; then
+ wdir=`cd "$wdir/.."; pwd`
+ fi
+ # end of workaround
+ done
+ echo "${basedir}"
+}
+
+# concatenates all lines of a file
+concat_lines() {
+ if [ -f "$1" ]; then
+ echo "$(tr -s '\n' ' ' < "$1")"
+ fi
+}
+
+BASE_DIR=`find_maven_basedir "$(pwd)"`
+if [ -z "$BASE_DIR" ]; then
+ exit 1;
+fi
+
+##########################################################################################
+# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
+# This allows using the maven wrapper in projects that prohibit checking in binary data.
+##########################################################################################
+if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found .mvn/wrapper/maven-wrapper.jar"
+ fi
+else
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..."
+ fi
+ if [ -n "$MVNW_REPOURL" ]; then
+ jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
+ else
+ jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
+ fi
+ while IFS="=" read key value; do
+ case "$key" in (wrapperUrl) jarUrl="$value"; break ;;
+ esac
+ done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties"
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Downloading from: $jarUrl"
+ fi
+ wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar"
+ if $cygwin; then
+ wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"`
+ fi
+
+ if command -v wget > /dev/null; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found wget ... using wget"
+ fi
+ if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+ wget "$jarUrl" -O "$wrapperJarPath"
+ else
+ wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath"
+ fi
+ elif command -v curl > /dev/null; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Found curl ... using curl"
+ fi
+ if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+ curl -o "$wrapperJarPath" "$jarUrl" -f
+ else
+ curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f
+ fi
+
+ else
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo "Falling back to using Java to download"
+ fi
+ javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java"
+ # For Cygwin, switch paths to Windows format before running javac
+ if $cygwin; then
+ javaClass=`cygpath --path --windows "$javaClass"`
+ fi
+ if [ -e "$javaClass" ]; then
+ if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo " - Compiling MavenWrapperDownloader.java ..."
+ fi
+ # Compiling the Java class
+ ("$JAVA_HOME/bin/javac" "$javaClass")
+ fi
+ if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
+ # Running the downloader
+ if [ "$MVNW_VERBOSE" = true ]; then
+ echo " - Running MavenWrapperDownloader.java ..."
+ fi
+ ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR")
+ fi
+ fi
+ fi
+fi
+##########################################################################################
+# End of extension
+##########################################################################################
+
+export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
+if [ "$MVNW_VERBOSE" = true ]; then
+ echo $MAVEN_PROJECTBASEDIR
+fi
+MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+ [ -n "$M2_HOME" ] &&
+ M2_HOME=`cygpath --path --windows "$M2_HOME"`
+ [ -n "$JAVA_HOME" ] &&
+ JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] &&
+ CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+ [ -n "$MAVEN_PROJECTBASEDIR" ] &&
+ MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
+fi
+
+# Provide a "standardized" way to retrieve the CLI args that will
+# work with both Windows and non-Windows executions.
+MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@"
+export MAVEN_CMD_LINE_ARGS
+
+WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
+
+exec "$JAVACMD" \
+ $MAVEN_OPTS \
+ -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
+ "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
+ ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"
diff --git a/jaccard-coefficient/pom.xml b/jaccard-coefficient/pom.xml
new file mode 100644
index 0000000..b4cb97e
--- /dev/null
+++ b/jaccard-coefficient/pom.xml
@@ -0,0 +1,95 @@
+
+ 4.0.0
+
+ org.hua
+ jaccardcoefficient
+ 0.1
+ jar
+
+ JaccardCoefficient
+ http://maven.apache.org
+
+
+ UTF-8
+
+
+
+
+ junit
+ junit
+ 4.8.2
+ test
+
+
+ org.apache.hadoop
+ hadoop-client
+ 3.2.0
+
+
+ javax.servlet
+ servlet-api
+
+
+ provided
+
+
+ org.apache.spark
+ spark-core_2.13
+ 3.2.0
+ provided
+
+
+ com.google.guava
+ guava
+ 11.0.2
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.12.0
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 2.5.1
+
+ 8
+ 8
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 2.3
+
+
+
+ package
+
+ shade
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/jaccard-coefficient/src/main/java/org/spark/JaccardCoefficient.java b/jaccard-coefficient/src/main/java/org/spark/JaccardCoefficient.java
new file mode 100644
index 0000000..4955d6b
--- /dev/null
+++ b/jaccard-coefficient/src/main/java/org/spark/JaccardCoefficient.java
@@ -0,0 +1,158 @@
+package org.spark;
+
+import java.util.*;
+import java.util.regex.Pattern;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import scala.Tuple2;
+
+public class JaccardCoefficient {
+
+ private static final Pattern SPACE = Pattern.compile("[ \\t\\x0B\\f\\r]+");
+
+ public static JavaPairRDD findCommonNeighborsScores(JavaPairRDD graphEdges, JavaPairRDD> joinedEdges) {
+
+ //filter the previous result by removing all the (a, a) pairs
+ //remove also the reverse edges by keeping only (a,b) where a < b
+ JavaPairRDD tempResults = joinedEdges.flatMapToPair(s -> {
+ ArrayList> arrayList = new ArrayList<>();
+ if (Integer.parseInt(s._2()._1()) < Integer.parseInt(s._2()._2())) {
+ arrayList.add(s._2());
+ }
+ return arrayList.iterator();
+ });
+
+ //1) subtract
+ //subtract the existing edges of the graph from the filtered remaining edges of the previous result ->
+ //-> in order to keep the edges of unconnected nodes only
+ //2) mapToPair
+ // every instance of a pair of unconnected nodes is equal to a common neighbor of them
+ // Because of that, map every instance with value "1" for the future reduction
+ //3)
+ // result will now contain pairs of (a<->b, 1)
+ JavaPairRDD unconnectedEdgeInstances = tempResults.subtract(graphEdges).mapToPair(s -> {
+ return new Tuple2(s._1() + " <-> " + s._2(), 1);
+ });
+
+ //1) reduceByKey
+ //this is the final result containing pairs of (edge-of-unconnected-nodes, number-of-common-neighbors)
+ return unconnectedEdgeInstances.reduceByKey((v1, v2) -> v1 + v2);
+ }
+
+
+ public static List> jaccardCoefficient(JavaRDD lines, Integer numOfDisplayedScores) {
+
+ //create an RDD of edges containing both (a, b) and (b, a)
+ //also ignore lines with comments
+ JavaPairRDD edges = lines.flatMapToPair(s -> {
+ String[] tokens = s.split(SPACE.pattern());
+ ArrayList> arrayList = new ArrayList<>();
+ if (!s.contains("#")) {
+ arrayList.add(new Tuple2<>(tokens[0], tokens[1]));
+ }
+ return arrayList.iterator();
+ });
+
+ edges.cache();
+
+ //1)
+ //join the edges with a copy of themselves to create edges ->
+ //-> of nodes connected to the neighbors of their neighbors
+ // output is a set of pairs of (common-neighbor, [node1, node2])
+ //2)
+ //this is not yet the desired outcome because the result will also contain:
+ // 1. -> pairs of nodes with the type of (a, a)
+ // 2. -> all the reverse edges of the resulting edges e.g. both (a, b) and (b, a)
+ // 3. -> pairs of already connected nodes (due to exsiting connections of nodes with common neighbors)
+ JavaPairRDD> joinedEdges = edges.join(edges);
+
+
+ //1) flatMapToPair
+ // transform the edges to instances of (node, 1) where "1" corresponds to 1 neighbor
+ //2) reduceByKey
+ //create pairs (node, total neighbors)
+ JavaPairRDD nodesWithNeighborsCount = edges.mapToPair((s) -> {
+ return (new Tuple2<>(s._1(), 1));
+ }).reduceByKey((v1, v2) -> v1 + v2);
+
+ //1. transform the pairs of type (common-neighbor, [node1, node2]) to pairs of (node1, node2)
+ //2. exclude pairs of type (a, a) and keep only the distinct values of the result
+ JavaPairRDD tempResultsPart1 = joinedEdges.flatMapToPair(s -> {
+ ArrayList> arrayList = new ArrayList<>();
+ if (Integer.parseInt(s._2()._1()) != Integer.parseInt(s._2()._2())) {
+ arrayList.add(s._2());
+ }
+ return arrayList.iterator();
+ }).distinct();
+
+ //subtract
+ //subtract the existing edges of the graph from the filtered remaining edges of the previous result ->
+ //-> in order to keep the edges of unconnected nodes only
+ //join
+ //join the result with the rdd of pairs (node, total-neighbors). The outcome pairs contain both ->
+ // -> (node1, (node2, total-neighbors-of-node1) and (node2, (node1, total-neighbors-of-node2)
+ JavaPairRDD> tempResultPart2 = tempResultsPart1.subtract(edges).join(nodesWithNeighborsCount);
+
+ //1) mapToPair
+ //we transform the previous result to pairs of (node1<->node2, total-neighbors-of-node1)
+ //for the reverse edges we create (node1<->node2, total-neighbors-of-node2)
+ //2) reduceByKey
+ //we reduce the instances to the final result of pairs of (node1 <->node2, sum-of-their-neighbors)
+ JavaPairRDD totalNeighbors = tempResultPart2.mapToPair(s -> {
+ if (Integer.parseInt(s._1()) < Integer.parseInt(s._2()._1())) {
+ return new Tuple2<>(s._1() + " <-> " + s._2()._1(), s._2()._2());
+ } else {
+ return new Tuple2<>(s._2()._1() + " <-> " + s._1(), s._2()._2());
+ }
+ }).reduceByKey((v1, v2) -> v1 + v2);
+
+ //find the common neighbors of the unconnected edges
+ JavaPairRDD commonNeighbors = findCommonNeighborsScores(edges, joinedEdges);
+
+ //join the common neighbors result with the union of neighbors result
+ //result is a set of pairs of type (unconnected-edge, (number-of-common-neighbors, sum-of-the-nodes-neighbors))
+ JavaPairRDD> jaccardCoefficientTempResult = commonNeighbors.join(totalNeighbors);
+
+ //calculate the jaccard coefficient
+ //result now is a set of pairs of type (unconnected-edge, jaccard-coefficient)
+ JavaPairRDD jaccardCoefficientScores = jaccardCoefficientTempResult.mapValues(v -> (double) v._1() / (double) (v._2() - v._1()));
+
+ //reverse the resulting tuples for the future sortByKey
+ JavaPairRDD reversedTuples = jaccardCoefficientScores.mapToPair(s -> {
+ return new Tuple2<>(s._2(), s._1());
+ });
+
+ //return the sorted result
+ return reversedTuples.sortByKey(false).take(numOfDisplayedScores);
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ if (args.length < 2) {
+ System.err.println("Usage: Arguments must be ");
+ System.exit(1);
+ }
+
+ SparkConf sparkConf = new SparkConf().setAppName("JaccardCoefficient").setMaster("local[*]");
+ JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+ JavaRDD lines = sc.textFile(args[0]);
+
+ //calculate the top jaccardCoefficient scores for every unconnected edge ->
+ //-> of two nodes with at least one common neighbor
+ List> jcScores = jaccardCoefficient(lines, Integer.parseInt(args[1]));
+
+ //print the results
+ for (Tuple2 score : jcScores) {
+ System.out.println(String.format("%.5f", score._1()) + ", " + score._2());
+ }
+
+ sc.stop();
+
+ }
+
+}