Skip to content

Commit

Permalink
Added an initial test for DataFrame loading.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed May 1, 2024
1 parent 160aaf5 commit c5c3da7
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 11 deletions.
69 changes: 69 additions & 0 deletions .github/workflows/run-tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
on:
push:
branches:
- master
pull_requests:

name: Run tests

jobs:
setup-r:
runs-on: ubuntu-latest
container: bioconductor/bioconductor_docker:devel

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Set directories
run: |
echo "R_PKG_DIR=${R_HOME}/site-library" >> $GITHUB_ENV
- name: Restore the package directory
uses: actions/cache@v3
with:
path: ${{ env.R_PKG_DIR }}
key: check-packages

- name: Install dependencies
shell: Rscript {0}
run: |
BiocManager::install("alabaster.sce")
- name: Build test objects
run: |
find tests -name "*.R" -exec R -f {} +
- name: Upload test files
uses: actions/upload-artifact@v3
with:
name: test-files
path: objects

test-js:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Node
uses: actions/setup-node@v3
with:
node-version: 16

- name: Restore the node modules
uses: actions/cache@v3
with:
path: '**/node_modules'
key: modules-${{ hashFiles('**/package.json') }}

- name: Download test files
uses: actions/download-artifact@v3
with:
name: test-files
path: objects

- name: Install packages
run: npm i --include-dev

- name: Run tests
run: npm run test
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ node_modules
*.swp
TEST_*
docs/built
objects/
40 changes: 30 additions & 10 deletions src/readers/DataFrame.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as scran from "scran.js";
import * as bioc from "bioconductor";
import * as utils from "./utils.js";

export async function readDataFrame(path, navigator) {
let colnames;
Expand Down Expand Up @@ -28,7 +29,7 @@ export async function readDataFrame(path, navigator) {
// Try to load nested objects if they're DFs, but don't try too hard.
let nested_path = path + "/other_columns/" + iname;
try {
columns.push(await readDataFrame(nested_path, getter));
columns.push(await readDataFrame(nested_path, navigator));
} catch (e) {
console.warn("failed to extract nested DataFrame at '" + nested_path + "'; " + e.message);
columns.push(null);
Expand All @@ -39,31 +40,50 @@ export async function readDataFrame(path, navigator) {
let current;
let objtype = chandle.children[iname]

if (obtype == "DataSet") {
if (objtype == "DataSet") {
let dhandle = chandle.open(iname, { load: true });
let type = dhandle.readAttribute("type").values[0];

let placeholder = null;
if (dhandle.attributes.indexOf("missing-value-placeholder") >= 0) {
placeholder = dhandle.readAttribute("missing-value-placeholder").values[0];
}

if (type == "integer" || type == "string") {
current = dhandle.values;
if (placeholder !== null) {
current = utils.substitutePlaceholder(current, placeholder);
}

} else if (type == "number") {
current = dhandle.values;
if (!(current instanceof Float64Array) && !(current instanceof Float32Array)) {
current = new Float64Array(current);
}
if (placeholder !== null) {
current = utils.substitutePlaceholder(current, placeholder);
}

} else if (type == "boolean") {
current = new Array(dhandle.values.length);
for (const [i, x] of dhandle.values.entries()) {
current[i] = (x != 0);
if (placeholder !== null) {
for (const [i, x] of dhandle.values.entries()) {
if (x == placeholder) {
current[i] = null;
} else {
current[i] = (x != 0);
}
}
} else {
for (const [i, x] of dhandle.values.entries()) {
current[i] = (x != 0);
}
}

} else {
throw new Error("data frame column has unknown type '" + type + "'");
}

if ("missing-value-placeholder" in dhandle.attributes) {
let placeholder = dhandle.readAttribute("missing-value-placeholder").values[0];
current = utils.substitutePlaceholder(current, placeholder);
}

} else if (objtype == "Group") {
let fhandle = chandle.open(iname);
let type = fhandle.readAttribute("type").values[0];
Expand All @@ -74,7 +94,7 @@ export async function readDataFrame(path, navigator) {
let codes = chandle.values;

let placeholder = -1;
if ("missing-value-placeholder" in chandle.attributes) {
if (chandle.attributes.indexOf("missing-value-placeholder") >= 0) {
placeholder = chandle.readAttribute("missing-value-placeholder").values[0];
}

Expand Down
2 changes: 1 addition & 1 deletion src/readers/utils.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export function isPlaceholder(x, placeholder) {
if (Number.isNaN(placeholder)) {
return Number.isnan(x);
return Number.isNaN(x);
} else {
return x == placeholder;
}
Expand Down
49 changes: 49 additions & 0 deletions tests/readers/DataFrame.setup.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
library(alabaster.base)
PATH <- "objects"
dir.create(PATH, showWarnings=FALSE)
library(S4Vectors)

{
df <- DataFrame(
strings = LETTERS,
integers = 1:26,
numbers = 1:26 / 2,
booleans = rep(c(TRUE, FALSE), length.out=26),
factors = factor(letters, rev(letters))
)

path <- file.path(PATH, "DataFrame-basic")
unlink(path, recursive=TRUE)
saveObject(df, path)
}

{
df <- DataFrame(
strings = LETTERS,
integers = 1:26,
numbers = 1:26 / 2,
booleans = rep(c(TRUE, FALSE), length.out=26),
factors = factor(letters, rev(letters))
)

df$strings[1] <- NA
df$integers[2] <- NA
df$numbers[3] <- NA
df$booleans[4] <- NA
df$factors[5] <- NA

path <- file.path(PATH, "DataFrame-missing")
unlink(path, recursive=TRUE)
saveObject(df, path)
}

{
library(S4Vectors)
df <- DataFrame(
A = 1:5,
B = I(DataFrame(X = (2:6)/2, Y = letters[1:5]))
)
path <- file.path(PATH, "DataFrame-nested")
unlink(path, recursive=TRUE)
saveObject(df, path)
}
86 changes: 86 additions & 0 deletions tests/readers/DataFrame.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import * as df from "../../src/readers/DataFrame.js";
import { localNavigator } from "../utils.js";
import * as path from "path";
import * as scran from "scran.js";

beforeAll(async () => { await scran.initialize({ localFile: true }) });
afterAll(async () => { await scran.terminate() });

const PATH = "objects";

test("basic data frame loading works as expected", async () => {
const basic_df = await df.readDataFrame(path.join(PATH, "DataFrame-basic"), localNavigator);
expect(basic_df.columnNames()).toEqual(["strings", "integers", "numbers", "booleans", "factors" ]);
expect(basic_df.numberOfRows()).toEqual(26);

const strcol = basic_df.column("strings");
expect(strcol instanceof Array).toBe(true);
expect(strcol[0]).toBe("A");
expect(strcol[25]).toBe("Z");

const intcol = basic_df.column("integers");
expect(intcol instanceof Int32Array).toBe(true);
expect(intcol[0]).toBe(1);
expect(intcol[25]).toBe(26);

const numcol = basic_df.column("numbers");
expect(numcol instanceof Float64Array).toBe(true);
expect(numcol[0]).toBe(0.5);
expect(numcol[25]).toBe(13);

const boolcol = basic_df.column("booleans");
expect(boolcol instanceof Array).toBe(true);
expect(boolcol[0]).toBe(true);
expect(boolcol[25]).toBe(false);

const faccol = basic_df.column("factors");
expect(faccol instanceof Array).toBe(true);
expect(faccol[0]).toBe("a");
expect(faccol[25]).toBe("z");
});

test("data frame loading works with missing values", async () => {
const missing_df = await df.readDataFrame(path.join(PATH, "DataFrame-missing"), localNavigator);
expect(missing_df.columnNames()).toEqual(["strings", "integers", "numbers", "booleans", "factors" ]);
expect(missing_df.numberOfRows()).toEqual(26);

const strcol = missing_df.column("strings");
expect(strcol instanceof Array).toBe(true);
expect(strcol[0]).toBeNull();
expect(strcol[1]).toBe("B");

const intcol = missing_df.column("integers");
expect(intcol instanceof Array).toBe(true);
expect(intcol[1]).toBeNull();
expect(intcol[2]).toBe(3);

const numcol = missing_df.column("numbers");
expect(numcol instanceof Array).toBe(true);
expect(numcol[2]).toBeNull();
expect(numcol[3]).toBe(2);

const boolcol = missing_df.column("booleans");
expect(boolcol instanceof Array).toBe(true);
expect(boolcol[3]).toBeNull();
expect(boolcol[4]).toBe(true);

const faccol = missing_df.column("factors");
expect(faccol instanceof Array).toBe(true);
expect(faccol[4]).toBeNull();
expect(faccol[5]).toBe("f");
});

test("data frame loading works with nested DFs", async () => {
const nested_df = await df.readDataFrame(path.join(PATH, "DataFrame-nested"), localNavigator);
expect(nested_df.columnNames()).toEqual(["A", "B"]);
expect(nested_df.numberOfRows()).toEqual(5);

const acol = nested_df.column("A");
expect(acol[0]).toBe(1);
expect(acol[4]).toBe(5);

const bcol = nested_df.column("B");
expect(bcol.columnNames()).toEqual(["X", "Y"]);
expect(bcol.numberOfRows()).toEqual(5);
});

14 changes: 14 additions & 0 deletions tests/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { Navigator } from "../src/Navigator.js";
import * as fs from "fs";
import * as path from "path";

function get(path) {
const contents = fs.readFileSync(path, null);
return new Uint8Array(contents);
}

function list(path) {
return fs.readdirSync(path);
}

export const localNavigator = new Navigator(get, list);

0 comments on commit c5c3da7

Please sign in to comment.