Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Futhark implementation #30

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ julia.version:
julia --version > $@
java.version:
java -version 2>&1 | head -n 2 | tr "\n" " " > $@
futhark.version:
futhark --version

# ------------------------------------------------
# Get Data
Expand Down Expand Up @@ -255,6 +257,31 @@ rust%/gc.bin: rust%/src/main.rs rust%/Cargo.toml
RUSTFLAGS="-C target-cpu=native" cargo +nightly build --release --manifest-path $(word 2,$^) -Z unstable-options --out-dir $(shell dirname $@) \
&& mv $(basename $@) $@

# Futhark
futhark.c/gc.bin: futhark/libgc.fut
mkdir -p futhark.c
futhark c --library futhark/libgc.fut -o futhark.c/libgc
gcc -o futhark.c/libgc.o -c futhark.c/libgc.c -O3
gcc -o $@ futhark/gc.c futhark.c/libgc.o -Ifuthark.c -lm

futhark.multicore/gc.bin: futhark/libgc.fut
mkdir -p futhark.multicore
futhark multicore --library futhark/libgc.fut -o futhark.multicore/libgc
gcc -o futhark.multicore/libgc.o -c futhark.multicore/libgc.c -O3 -pthread
gcc -o $@ futhark/gc.c futhark.multicore/libgc.o -Ifuthark.multicore -lm -pthread

futhark.opencl/gc.bin: futhark/libgc.fut
mkdir -p futhark.opencl
futhark opencl --library futhark/libgc.fut -o futhark.opencl/libgc
gcc -o futhark.opencl/libgc.o -c futhark.opencl/libgc.c -O3
gcc -o $@ futhark/gc.c futhark.opencl/libgc.o -Ifuthark.opencl -lm -lOpenCL

futhark.cuda/gc.bin: futhark/libgc.fut
mkdir -p futhark.cuda
futhark cuda --library futhark/libgc.fut -o futhark.cuda/libgc
gcc -o futhark.cuda/libgc.o -c futhark.cuda/libgc.c -O3
gcc -o $@ futhark/gc.c futhark.cuda/libgc.o -Ifuthark.cuda -lm -lcuda -lcudart -lnvrtc

#TODO: Update
#pony/gc: pony/gc.pony
# ponyc
66 changes: 66 additions & 0 deletions futhark/gc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#include "libgc.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <assert.h>
#include <sys/time.h>

// The BUFSIZE can be set to whatever will fit in memory. You will
// get the right result for any nonzero value.
#define BUFSIZE 1048576

int main() {
FILE *f = fopen("chry_multiplied.fa","r");
assert(f != NULL);

// Create Futhark context. The cache file is to store compiled GPU
// kernels and such - all it does is speed up the startup.
struct futhark_context_config *cfg = futhark_context_config_new();
futhark_context_config_set_cache_file(cfg, "futhark/cache.bin");
struct futhark_context *ctx = futhark_context_new(cfg);

// Find the file size.
fseek(f, 0, SEEK_END);
ssize_t n = ftell(f);
rewind(f);

// Memory-map the file. This hopefully allows the creation of the
// Futhark array to happen with only one copy from disk, instead of
// two.
char *data = mmap(NULL, n, PROT_READ, MAP_SHARED, fileno(f), 0);
assert(data != MAP_FAILED);

fclose(f); // Not needed anymore.

struct futhark_opaque_summary *state;
futhark_entry_init(ctx, &state);

// Iterate through the memory-mapped file in BUFSIZE chunks.
size_t i = 0;
while (n > 0) {
// Make a Futhark array for the chunk.
int chunksize = n > BUFSIZE ? BUFSIZE : n;
struct futhark_u8_1d *str = futhark_new_u8_1d(ctx, data+i, chunksize);
i += chunksize;
n -= chunksize;

// Process it.
struct futhark_opaque_summary *new_state;
futhark_entry_gc_chunk(ctx, &new_state, state, str);
futhark_free_opaque_summary(ctx, state);
state = new_state;
futhark_free_u8_1d(ctx, str);
}

double res;
futhark_entry_summary_res(ctx, &res, state);
futhark_context_sync(ctx);
printf("%.10f\n", res);

futhark_free_opaque_summary(ctx, state);
futhark_context_free(ctx);
futhark_context_config_free(cfg);

return 0;
}
51 changes: 51 additions & 0 deletions futhark/libgc.fut
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
-- For each contiguous segment of the file, we construct a "summary
-- tuple" with the following components:
--
-- ...
--
-- The idea is that we can then process an entire file in a
-- divide-and-conquer fashion by splitting it into arbitrary
-- contiguous chunks, compute a summary for each, and then combine the
-- summaries, without worrying about splitting on line boundaries.
-- All we need in order to parallelise it is an associative function
-- for combining summaries.

type count = {gc:i32,total:i32}

def pct (c: count) = f64.i32 c.gc / f64.i32 c.total * 100

def count_add (x: count) (y: count) = {gc=x.gc+y.gc, total=x.total+y.total}

def count0 : count = {gc=0,total=0}

type summary = { befnl: count, aftnl: count, hasnl: bool, comment: bool }

def summary0 = {befnl=count0, aftnl=count0, hasnl=false, comment=false}

def mapop (b: u8) =
{befnl = match b
case 'G' -> {gc=1,total=1}
case 'C' -> {gc=1,total=1}
case 'A' -> {gc=0,total=1}
case 'T' -> {gc=0,total=1}
case _ -> count0,
aftnl = count0,
hasnl = b == '\n',
comment = b == '>'
}

def redop (x: summary) (y: summary) =
let join = if x.comment then x.aftnl else x.aftnl `count_add` y.befnl
in {befnl = if x.hasnl then x.befnl else x.befnl `count_add` join,
aftnl = if x.hasnl then join `count_add` y.aftnl else y.aftnl,
hasnl = x.hasnl || y.hasnl,
comment = y.comment || x.comment && (!y.hasnl)}

def gc (str: []u8) = reduce redop summary0 (map mapop str)

entry init : summary = summary0

entry gc_chunk (s: summary) (str: []u8) : summary = s `redop` gc str

entry summary_res (s: summary) =
pct (s.befnl `count_add` s.aftnl)