Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 30 additions & 28 deletions LargeVis_run.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
import LargeVis
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('-fea', default = 1, type = int, help = 'whether to visualize high-dimensional feature vectors or networks')
parser.add_argument('-input', default = '', help = 'input file')
parser.add_argument('-output', default = '', help = 'output file')
parser.add_argument('-outdim', default = -1, type = int, help = 'output dimensionality')
parser.add_argument('-threads', default = -1, type = int, help = 'number of training threads')
parser.add_argument('-samples', default = -1, type = int, help = 'number of training mini-batches')
parser.add_argument('-prop', default = -1, type = int, help = 'number of propagations')
parser.add_argument('-alpha', default = -1, type = float, help = 'learning rate')
parser.add_argument('-trees', default = -1, type = int, help = 'number of rp-trees')
parser.add_argument('-neg', default = -1, type = int, help = 'number of negative samples')
parser.add_argument('-neigh', default = -1, type = int, help = 'number of neighbors in the NN-graph')
parser.add_argument('-gamma', default = -1, type = float, help = 'weight assigned to negative edges')
parser.add_argument('-perp', default = -1, type = float, help = 'perplexity for the NN-grapn')

args = parser.parse_args()

if args.fea == 1:
LargeVis.loadfile(args.input)
else:
LargeVis.loadgraph(args.input)

Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp)

LargeVis.save(args.output)
#!/usr/bin/env python

import LargeVis
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--fea', default=1, type=int, help='whether to visualize high-dimensional feature vectors or networks')
parser.add_argument('--input', default='', help='input file', required=True)
parser.add_argument('--output', default='', help='output file', required=True)
parser.add_argument('--outdim', default=-1, type=int, help='output dimensionality')
parser.add_argument('--threads', default=-1, type=int, help='number of training threads')
parser.add_argument('--samples', default=-1, type=int, help='number of training mini-batches')
parser.add_argument('--prop', default=-1, type=int, help='number of propagations')
parser.add_argument('--alpha', default=-1, type=float, help='learning rate')
parser.add_argument('--trees', default=-1, type=int, help='number of rp-trees')
parser.add_argument('--neg', default=-1, type=int, help='number of negative samples')
parser.add_argument('--neigh', default=-1, type=int, help='number of neighbors in the NN-graph')
parser.add_argument('--gamma', default=-1, type=float, help='weight assigned to negative edges')
parser.add_argument('--perp', default=-1, type=float, help='perplexity for the NN-grapn')

args = parser.parse_args()

if args.fea == 1:
LargeVis.loadfile(args.input)
else:
LargeVis.loadgraph(args.input)

Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp)

LargeVis.save(args.output)
63 changes: 41 additions & 22 deletions Linux/LargeVis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ LargeVis::LargeVis()
knn_vec = old_knn_vec = NULL;
annoy_index = NULL;
head = alias = NULL;
neg_table = NULL;
neg_table = NULL;
}

const gsl_rng_type *LargeVis::gsl_T = NULL;
Expand All @@ -26,8 +26,8 @@ void LargeVis::clean_model()
vis = prob = NULL;
knn_vec = old_knn_vec = NULL;
annoy_index = NULL;
neg_table = NULL;
alias = NULL;
neg_table = NULL;
alias = NULL;

edge_count_actual = 0;
neg_size = 1e8;
Expand Down Expand Up @@ -56,14 +56,23 @@ void LargeVis::load_from_file(char *infile)
printf("\nFile not found!\n");
return;
}
printf("Reading input file %s ......", infile); fflush(stdout);
fscanf(fin, "%lld%lld", &n_vertices, &n_dim);
printf("Reading input file %s ......", infile); fflush(stdout);
if (fscanf(fin, "%lld%lld", &n_vertices, &n_dim) != 2) {
printf("Could not read dimensions\n");
fclose(fin);
exit(1);
}
vec = new real[n_vertices * n_dim];
for (long long i = 0; i < n_vertices; ++i)
{
for (long long j = 0; j < n_dim; ++j)
{
fscanf(fin, "%f", &vec[i * n_dim + j]);
if (fscanf(fin, "%f", &vec[i * n_dim + j]) != 1)
{
fclose(fin);
printf("Could not read line %lld\n", i + 1);
exit(1);
}
}
}
fclose(fin);
Expand All @@ -80,7 +89,17 @@ void LargeVis::load_from_data(real *data, long long n_vert, long long n_di)
printf("Total vertices : %lld\tDimension : %lld\n", n_vertices, n_dim);
}

void LargeVis::load_from_graph(char *infile)
bool load_edge_from_graph(FILE *fin, char *w1, char *w2, real *weight, bool use_default_weight) {
if (use_default_weight)
{
(*weight) = 1;
return fscanf(fin, "%s%s", w1, w2) == 2;
}
else
return fscanf(fin, "%s%s%f", w1, w2, weight) == 3;
}

void LargeVis::load_from_graph(char *infile, bool use_default_weight)
{
clean_data();
char *w1 = new char[1000];
Expand All @@ -96,7 +115,7 @@ void LargeVis::load_from_graph(char *infile)
return;
}
printf("Reading input file %s ......%c", infile, 13);
while (fscanf(fin, "%s%s%f", w1, w2, &weight) == 3)
while (load_edge_from_graph(fin, w1, w2, &weight, use_default_weight))
{
if (!dict.count(w1)) { dict[w1] = n_vertices++; names.push_back(w1); }
if (!dict.count(w2)) { dict[w2] = n_vertices++; names.push_back(w2); }
Expand Down Expand Up @@ -162,7 +181,7 @@ long long LargeVis::get_out_dim()

void LargeVis::normalize()
{
printf("Normalizing ......"); fflush(stdout);
printf("Normalizing ......"); fflush(stdout);
real *mean = new real[n_dim];
for (long long i = 0; i < n_dim; ++i) mean[i] = 0;
for (long long i = 0, ll = 0; i < n_vertices; ++i, ll += n_dim)
Expand Down Expand Up @@ -281,7 +300,7 @@ void *LargeVis::annoy_thread_caller(void *arg)

void LargeVis::run_annoy()
{
printf("Running ANNOY ......"); fflush(stdout);
printf("Running ANNOY ......"); fflush(stdout);
annoy_index = new AnnoyIndex<int, real, Euclidean, Kiss64Random>(n_dim);
for (long long i = 0; i < n_vertices; ++i)
annoy_index->add_item(i, &vec[i * n_dim]);
Expand All @@ -293,7 +312,7 @@ void LargeVis::run_annoy()
for (int j = 0; j < n_threads; ++j) pthread_create(&pt[j], NULL, LargeVis::annoy_thread_caller, new arg_struct(this, j));
for (int j = 0; j < n_threads; ++j) pthread_join(pt[j], NULL);
delete[] pt;
delete annoy_index; annoy_index = NULL;
delete annoy_index; annoy_index = NULL;
printf(" Done.\n");
}

Expand Down Expand Up @@ -348,7 +367,7 @@ void LargeVis::run_propagation()
{
for (int i = 0; i < n_propagations; ++i)
{
printf("Running propagation %d/%d%c", i + 1, n_propagations, 13);
printf("Running propagation %d/%lld%c", i + 1, n_propagations, 13);
fflush(stdout);
old_knn_vec = knn_vec;
knn_vec = new std::vector<int>[n_vertices];
Expand All @@ -375,7 +394,7 @@ void LargeVis::compute_similarity_thread(int id)
for (iter = 0; iter < 200; ++iter)
{
H = 0;
sum_weight = FLT_MIN;
sum_weight = FLT_MIN;
for (p = head[x]; p >= 0; p = next[p])
{
sum_weight += tmp = exp(-beta * edge_weight[p]);
Expand All @@ -392,8 +411,8 @@ void LargeVis::compute_similarity_thread(int id)
hi_beta = beta;
if (lo_beta < 0) beta /= 2; else beta = (lo_beta + beta) / 2;
}
if(beta > FLT_MAX) beta = FLT_MAX;
}
if(beta > FLT_MAX) beta = FLT_MAX;
}
for (p = head[x], sum_weight = FLT_MIN; p >= 0; p = next[p])
{
sum_weight += edge_weight[p] = exp(-beta * edge_weight[p]);
Expand Down Expand Up @@ -440,7 +459,7 @@ void *LargeVis::search_reverse_thread_caller(void *arg)

void LargeVis::compute_similarity()
{
printf("Computing similarities ......"); fflush(stdout);
printf("Computing similarities ......"); fflush(stdout);
n_edge = 0;
head = new long long[n_vertices];
long long i, x, y, p, q;
Expand All @@ -458,8 +477,8 @@ void LargeVis::compute_similarity()
head[x] = n_edge++;
}
}
delete[] vec; vec = NULL;
delete[] knn_vec; knn_vec = NULL;
delete[] vec; vec = NULL;
delete[] knn_vec; knn_vec = NULL;
pthread_t *pt = new pthread_t[n_threads];
for (int j = 0; j < n_threads; ++j) pthread_create(&pt[j], NULL, LargeVis::compute_similarity_thread_caller, new arg_struct(this, j));
for (int j = 0; j < n_threads; ++j) pthread_join(pt[j], NULL);
Expand Down Expand Up @@ -515,7 +534,7 @@ void LargeVis::test_accuracy()
++hit_case;
}
}
delete heap;
delete heap;
printf("Test knn accuracy : %.2f%%\n", hit_case * 100.0 / (test_case * n_neighbors));
}

Expand All @@ -542,7 +561,7 @@ void LargeVis::init_neg_table()
{
long long x, p, i;
neg_size = 1e8;
reverse.clear(); vector<long long> (reverse).swap(reverse);
reverse.clear(); vector<long long> (reverse).swap(reverse);
real sum_weights = 0, dd, *weights = new real[n_vertices];
for (i = 0; i < n_vertices; ++i) weights[i] = 0;
for (x = 0; x < n_vertices; ++x)
Expand All @@ -553,8 +572,8 @@ void LargeVis::init_neg_table()
}
sum_weights += weights[x] = pow(weights[x], 0.75);
}
next.clear(); vector<long long> (next).swap(next);
delete[] head; head = NULL;
next.clear(); vector<long long> (next).swap(next);
delete[] head; head = NULL;
neg_table = new int[neg_size];
dd = weights[0];
for (i = x = 0; i < neg_size; ++i)
Expand Down
2 changes: 1 addition & 1 deletion Linux/LargeVis.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class LargeVis{
public:
LargeVis();
void load_from_file(char *infile);
void load_from_graph(char *infile);
void load_from_graph(char *infile, bool use_default_weight = false);
void load_from_data(real *data, long long n_vert, long long n_di);
void save(char *outfile);
void run(long long out_d = -1, long long n_thre = -1, long long n_samp = -1, long long n_prop = -1, real alph = -1, long long n_tree = -1, long long n_nega = -1, long long n_neig = -1, real gamm = -1, real perp = -1);
Expand Down
Loading