diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml new file mode 100644 index 0000000..1110034 --- /dev/null +++ b/.github/workflows/run-tests.yaml @@ -0,0 +1,69 @@ +on: + push: + branches: + - master + pull_requests: + +name: Run tests + +jobs: + setup-r: + runs-on: ubuntu-latest + container: bioconductor/bioconductor_docker:devel + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Set directories + run: | + echo "R_PKG_DIR=${R_HOME}/site-library" >> $GITHUB_ENV + + - name: Restore the package directory + uses: actions/cache@v3 + with: + path: ${{ env.R_PKG_DIR }} + key: check-packages + + - name: Install dependencies + shell: Rscript {0} + run: | + BiocManager::install("alabaster.sce") + + - name: Build test objects + run: | + find tests -name "*.R" -exec R -f {} + + + - name: Upload test files + uses: actions/upload-artifact@v3 + with: + name: test-files + path: objects + + test-js: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Node + uses: actions/setup-node@v3 + with: + node-version: 16 + + - name: Restore the node modules + uses: actions/cache@v3 + with: + path: '**/node_modules' + key: modules-${{ hashFiles('**/package.json') }} + + - name: Download test files + uses: actions/download-artifact@v3 + with: + name: test-files + path: objects + + - name: Install packages + run: npm i --include-dev + + - name: Run tests + run: npm run test diff --git a/.gitignore b/.gitignore index d5495db..8a97da4 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ node_modules *.swp TEST_* docs/built +objects/ diff --git a/src/readers/DataFrame.js b/src/readers/DataFrame.js index ce89a17..38f1d6e 100644 --- a/src/readers/DataFrame.js +++ b/src/readers/DataFrame.js @@ -1,5 +1,6 @@ import * as scran from "scran.js"; import * as bioc from "bioconductor"; +import * as utils from "./utils.js"; export async function readDataFrame(path, navigator) { let colnames; @@ -28,7 +29,7 @@ export async function readDataFrame(path, navigator) { // Try to load nested objects if they're DFs, but don't try too hard. let nested_path = path + "/other_columns/" + iname; try { - columns.push(await readDataFrame(nested_path, getter)); + columns.push(await readDataFrame(nested_path, navigator)); } catch (e) { console.warn("failed to extract nested DataFrame at '" + nested_path + "'; " + e.message); columns.push(null); @@ -39,31 +40,50 @@ export async function readDataFrame(path, navigator) { let current; let objtype = chandle.children[iname] - if (obtype == "DataSet") { + if (objtype == "DataSet") { let dhandle = chandle.open(iname, { load: true }); let type = dhandle.readAttribute("type").values[0]; + let placeholder = null; + if (dhandle.attributes.indexOf("missing-value-placeholder") >= 0) { + placeholder = dhandle.readAttribute("missing-value-placeholder").values[0]; + } + if (type == "integer" || type == "string") { current = dhandle.values; + if (placeholder !== null) { + current = utils.substitutePlaceholder(current, placeholder); + } + } else if (type == "number") { current = dhandle.values; if (!(current instanceof Float64Array) && !(current instanceof Float32Array)) { current = new Float64Array(current); } + if (placeholder !== null) { + current = utils.substitutePlaceholder(current, placeholder); + } + } else if (type == "boolean") { current = new Array(dhandle.values.length); - for (const [i, x] of dhandle.values.entries()) { - current[i] = (x != 0); + if (placeholder !== null) { + for (const [i, x] of dhandle.values.entries()) { + if (x == placeholder) { + current[i] = null; + } else { + current[i] = (x != 0); + } + } + } else { + for (const [i, x] of dhandle.values.entries()) { + current[i] = (x != 0); + } } + } else { throw new Error("data frame column has unknown type '" + type + "'"); } - if ("missing-value-placeholder" in dhandle.attributes) { - let placeholder = dhandle.readAttribute("missing-value-placeholder").values[0]; - current = utils.substitutePlaceholder(current, placeholder); - } - } else if (objtype == "Group") { let fhandle = chandle.open(iname); let type = fhandle.readAttribute("type").values[0]; @@ -74,7 +94,7 @@ export async function readDataFrame(path, navigator) { let codes = chandle.values; let placeholder = -1; - if ("missing-value-placeholder" in chandle.attributes) { + if (chandle.attributes.indexOf("missing-value-placeholder") >= 0) { placeholder = chandle.readAttribute("missing-value-placeholder").values[0]; } diff --git a/src/readers/utils.js b/src/readers/utils.js index 163060d..c8622bb 100644 --- a/src/readers/utils.js +++ b/src/readers/utils.js @@ -1,6 +1,6 @@ export function isPlaceholder(x, placeholder) { if (Number.isNaN(placeholder)) { - return Number.isnan(x); + return Number.isNaN(x); } else { return x == placeholder; } diff --git a/tests/readers/DataFrame.setup.R b/tests/readers/DataFrame.setup.R new file mode 100644 index 0000000..c302e8c --- /dev/null +++ b/tests/readers/DataFrame.setup.R @@ -0,0 +1,49 @@ +library(alabaster.base) +PATH <- "objects" +dir.create(PATH, showWarnings=FALSE) +library(S4Vectors) + +{ + df <- DataFrame( + strings = LETTERS, + integers = 1:26, + numbers = 1:26 / 2, + booleans = rep(c(TRUE, FALSE), length.out=26), + factors = factor(letters, rev(letters)) + ) + + path <- file.path(PATH, "DataFrame-basic") + unlink(path, recursive=TRUE) + saveObject(df, path) +} + +{ + df <- DataFrame( + strings = LETTERS, + integers = 1:26, + numbers = 1:26 / 2, + booleans = rep(c(TRUE, FALSE), length.out=26), + factors = factor(letters, rev(letters)) + ) + + df$strings[1] <- NA + df$integers[2] <- NA + df$numbers[3] <- NA + df$booleans[4] <- NA + df$factors[5] <- NA + + path <- file.path(PATH, "DataFrame-missing") + unlink(path, recursive=TRUE) + saveObject(df, path) +} + +{ + library(S4Vectors) + df <- DataFrame( + A = 1:5, + B = I(DataFrame(X = (2:6)/2, Y = letters[1:5])) + ) + path <- file.path(PATH, "DataFrame-nested") + unlink(path, recursive=TRUE) + saveObject(df, path) +} diff --git a/tests/readers/DataFrame.test.js b/tests/readers/DataFrame.test.js new file mode 100644 index 0000000..9441a81 --- /dev/null +++ b/tests/readers/DataFrame.test.js @@ -0,0 +1,86 @@ +import * as df from "../../src/readers/DataFrame.js"; +import { localNavigator } from "../utils.js"; +import * as path from "path"; +import * as scran from "scran.js"; + +beforeAll(async () => { await scran.initialize({ localFile: true }) }); +afterAll(async () => { await scran.terminate() }); + +const PATH = "objects"; + +test("basic data frame loading works as expected", async () => { + const basic_df = await df.readDataFrame(path.join(PATH, "DataFrame-basic"), localNavigator); + expect(basic_df.columnNames()).toEqual(["strings", "integers", "numbers", "booleans", "factors" ]); + expect(basic_df.numberOfRows()).toEqual(26); + + const strcol = basic_df.column("strings"); + expect(strcol instanceof Array).toBe(true); + expect(strcol[0]).toBe("A"); + expect(strcol[25]).toBe("Z"); + + const intcol = basic_df.column("integers"); + expect(intcol instanceof Int32Array).toBe(true); + expect(intcol[0]).toBe(1); + expect(intcol[25]).toBe(26); + + const numcol = basic_df.column("numbers"); + expect(numcol instanceof Float64Array).toBe(true); + expect(numcol[0]).toBe(0.5); + expect(numcol[25]).toBe(13); + + const boolcol = basic_df.column("booleans"); + expect(boolcol instanceof Array).toBe(true); + expect(boolcol[0]).toBe(true); + expect(boolcol[25]).toBe(false); + + const faccol = basic_df.column("factors"); + expect(faccol instanceof Array).toBe(true); + expect(faccol[0]).toBe("a"); + expect(faccol[25]).toBe("z"); +}); + +test("data frame loading works with missing values", async () => { + const missing_df = await df.readDataFrame(path.join(PATH, "DataFrame-missing"), localNavigator); + expect(missing_df.columnNames()).toEqual(["strings", "integers", "numbers", "booleans", "factors" ]); + expect(missing_df.numberOfRows()).toEqual(26); + + const strcol = missing_df.column("strings"); + expect(strcol instanceof Array).toBe(true); + expect(strcol[0]).toBeNull(); + expect(strcol[1]).toBe("B"); + + const intcol = missing_df.column("integers"); + expect(intcol instanceof Array).toBe(true); + expect(intcol[1]).toBeNull(); + expect(intcol[2]).toBe(3); + + const numcol = missing_df.column("numbers"); + expect(numcol instanceof Array).toBe(true); + expect(numcol[2]).toBeNull(); + expect(numcol[3]).toBe(2); + + const boolcol = missing_df.column("booleans"); + expect(boolcol instanceof Array).toBe(true); + expect(boolcol[3]).toBeNull(); + expect(boolcol[4]).toBe(true); + + const faccol = missing_df.column("factors"); + expect(faccol instanceof Array).toBe(true); + expect(faccol[4]).toBeNull(); + expect(faccol[5]).toBe("f"); +}); + +test("data frame loading works with nested DFs", async () => { + const nested_df = await df.readDataFrame(path.join(PATH, "DataFrame-nested"), localNavigator); + expect(nested_df.columnNames()).toEqual(["A", "B"]); + expect(nested_df.numberOfRows()).toEqual(5); + + const acol = nested_df.column("A"); + expect(acol[0]).toBe(1); + expect(acol[4]).toBe(5); + + const bcol = nested_df.column("B"); + expect(bcol.columnNames()).toEqual(["X", "Y"]); + expect(bcol.numberOfRows()).toEqual(5); +}); + diff --git a/tests/utils.js b/tests/utils.js new file mode 100644 index 0000000..004249d --- /dev/null +++ b/tests/utils.js @@ -0,0 +1,14 @@ +import { Navigator } from "../src/Navigator.js"; +import * as fs from "fs"; +import * as path from "path"; + +function get(path) { + const contents = fs.readFileSync(path, null); + return new Uint8Array(contents); +} + +function list(path) { + return fs.readdirSync(path); +} + +export const localNavigator = new Navigator(get, list);