From b44cb6846ad91f5492e12cc86af31ab7060305ec Mon Sep 17 00:00:00 2001 From: jheer Date: Tue, 13 Aug 2024 18:26:21 +0200 Subject: [PATCH 01/16] chore: Update dependencies. --- package-lock.json | 189 +++++++++++++++++++++++++--------------------- package.json | 7 +- 2 files changed, 107 insertions(+), 89 deletions(-) diff --git a/package-lock.json b/package-lock.json index aa16b2d..73cd83e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,20 +16,22 @@ "devDependencies": { "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "eslint": "^9.8.0", - "mocha": "^10.7.0", + "@uwdata/flechette": "~0.0.3", + "eslint": "^9.9.0", + "mocha": "^10.7.3", "rimraf": "^6.0.1", - "rollup": "^4.19.1", + "rollup": "^4.20.0", "rollup-plugin-bundle-size": "^1.0.3", "tape": "^5.8.1", "typescript": "^5.5.4" } }, "node_modules/@75lb/deep-merge": { - "version": "1.1.1", - "license": "MIT", + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@75lb/deep-merge/-/deep-merge-1.1.2.tgz", + "integrity": "sha512-08K9ou5VNbheZFxM5tDWoqjA3ImC50DiuuJ2tj1yEPRfkp8lLLg6XAaJ4On+a0yAXor/8ay5gHnAIshRM44Kpw==", "dependencies": { - "lodash.assignwith": "^4.2.0", + "lodash": "^4.17.21", "typical": "^7.1.1" }, "engines": { @@ -106,9 +108,9 @@ } }, "node_modules/@eslint/js": { - "version": "9.8.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.8.0.tgz", - "integrity": "sha512-MfluB7EUfxXtv3i/++oh89uzAr4PDI4nn201hsp+qaXqsjAWzinlZEHEfPgAX4doIlKvPG/i0A9dpKxOLII8yA==", + "version": "9.9.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.9.0.tgz", + "integrity": "sha512-hhetes6ZHP3BlXLxmd8K2SNgkhNSi+UcecbnwWKwpP7kyi/uC75DJ1lOOBO3xrC4jyojtGE3YxKZPHfk4yrgug==", "dev": true, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -386,9 +388,9 @@ } }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.19.1.tgz", - "integrity": "sha512-XzqSg714++M+FXhHfXpS1tDnNZNpgxxuGZWlRG/jSj+VEPmZ0yg6jV4E0AL3uyBKxO8mO3xtOsP5mQ+XLfrlww==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.20.0.tgz", + "integrity": "sha512-TSpWzflCc4VGAUJZlPpgAJE1+V60MePDQnBd7PPkpuEmOy8i87aL6tinFGKBFKuEDikYpig72QzdT3QPYIi+oA==", "cpu": [ "arm" ], @@ -399,9 +401,9 @@ ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.19.1.tgz", - "integrity": "sha512-thFUbkHteM20BGShD6P08aungq4irbIZKUNbG70LN8RkO7YztcGPiKTTGZS7Kw+x5h8hOXs0i4OaHwFxlpQN6A==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.20.0.tgz", + "integrity": "sha512-u00Ro/nok7oGzVuh/FMYfNoGqxU5CPWz1mxV85S2w9LxHR8OoMQBuSk+3BKVIDYgkpeOET5yXkx90OYFc+ytpQ==", "cpu": [ "arm64" ], @@ -412,9 +414,9 @@ ] }, "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.19.1.tgz", - "integrity": "sha512-8o6eqeFZzVLia2hKPUZk4jdE3zW7LCcZr+MD18tXkgBBid3lssGVAYuox8x6YHoEPDdDa9ixTaStcmx88lio5Q==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.20.0.tgz", + "integrity": "sha512-uFVfvzvsdGtlSLuL0ZlvPJvl6ZmrH4CBwLGEFPe7hUmf7htGAN+aXo43R/V6LATyxlKVC/m6UsLb7jbG+LG39Q==", "cpu": [ "arm64" ], @@ -425,9 +427,9 @@ ] }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.19.1.tgz", - "integrity": "sha512-4T42heKsnbjkn7ovYiAdDVRRWZLU9Kmhdt6HafZxFcUdpjlBlxj4wDrt1yFWLk7G4+E+8p2C9tcmSu0KA6auGA==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.20.0.tgz", + "integrity": "sha512-xbrMDdlev53vNXexEa6l0LffojxhqDTBeL+VUxuuIXys4x6xyvbKq5XqTXBCEUA8ty8iEJblHvFaWRJTk/icAQ==", "cpu": [ "x64" ], @@ -438,9 +440,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.19.1.tgz", - "integrity": "sha512-MXg1xp+e5GhZ3Vit1gGEyoC+dyQUBy2JgVQ+3hUrD9wZMkUw/ywgkpK7oZgnB6kPpGrxJ41clkPPnsknuD6M2Q==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.20.0.tgz", + "integrity": "sha512-jMYvxZwGmoHFBTbr12Xc6wOdc2xA5tF5F2q6t7Rcfab68TT0n+r7dgawD4qhPEvasDsVpQi+MgDzj2faOLsZjA==", "cpu": [ "arm" ], @@ -451,9 +453,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.19.1.tgz", - "integrity": "sha512-DZNLwIY4ftPSRVkJEaxYkq7u2zel7aah57HESuNkUnz+3bZHxwkCUkrfS2IWC1sxK6F2QNIR0Qr/YXw7nkF3Pw==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.20.0.tgz", + "integrity": "sha512-1asSTl4HKuIHIB1GcdFHNNZhxAYEdqML/MW4QmPS4G0ivbEcBr1JKlFLKsIRqjSwOBkdItn3/ZDlyvZ/N6KPlw==", "cpu": [ "arm" ], @@ -464,9 +466,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.19.1.tgz", - "integrity": "sha512-C7evongnjyxdngSDRRSQv5GvyfISizgtk9RM+z2biV5kY6S/NF/wta7K+DanmktC5DkuaJQgoKGf7KUDmA7RUw==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.20.0.tgz", + "integrity": "sha512-COBb8Bkx56KldOYJfMf6wKeYJrtJ9vEgBRAOkfw6Ens0tnmzPqvlpjZiLgkhg6cA3DGzCmLmmd319pmHvKWWlQ==", "cpu": [ "arm64" ], @@ -477,9 +479,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.19.1.tgz", - "integrity": "sha512-89tFWqxfxLLHkAthAcrTs9etAoBFRduNfWdl2xUs/yLV+7XDrJ5yuXMHptNqf1Zw0UCA3cAutkAiAokYCkaPtw==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.20.0.tgz", + "integrity": "sha512-+it+mBSyMslVQa8wSPvBx53fYuZK/oLTu5RJoXogjk6x7Q7sz1GNRsXWjn6SwyJm8E/oMjNVwPhmNdIjwP135Q==", "cpu": [ "arm64" ], @@ -490,9 +492,9 @@ ] }, "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.19.1.tgz", - "integrity": "sha512-PromGeV50sq+YfaisG8W3fd+Cl6mnOOiNv2qKKqKCpiiEke2KiKVyDqG/Mb9GWKbYMHj5a01fq/qlUR28PFhCQ==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.20.0.tgz", + "integrity": "sha512-yAMvqhPfGKsAxHN8I4+jE0CpLWD8cv4z7CK7BMmhjDuz606Q2tFKkWRY8bHR9JQXYcoLfopo5TTqzxgPUjUMfw==", "cpu": [ "ppc64" ], @@ -503,9 +505,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.19.1.tgz", - "integrity": "sha512-/1BmHYh+iz0cNCP0oHCuF8CSiNj0JOGf0jRlSo3L/FAyZyG2rGBuKpkZVH9YF+x58r1jgWxvm1aRg3DHrLDt6A==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.20.0.tgz", + "integrity": "sha512-qmuxFpfmi/2SUkAw95TtNq/w/I7Gpjurx609OOOV7U4vhvUhBcftcmXwl3rqAek+ADBwSjIC4IVNLiszoj3dPA==", "cpu": [ "riscv64" ], @@ -516,9 +518,9 @@ ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.19.1.tgz", - "integrity": "sha512-0cYP5rGkQWRZKy9/HtsWVStLXzCF3cCBTRI+qRL8Z+wkYlqN7zrSYm6FuY5Kd5ysS5aH0q5lVgb/WbG4jqXN1Q==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.20.0.tgz", + "integrity": "sha512-I0BtGXddHSHjV1mqTNkgUZLnS3WtsqebAXv11D5BZE/gfw5KoyXSAXVqyJximQXNvNzUo4GKlCK/dIwXlz+jlg==", "cpu": [ "s390x" ], @@ -529,9 +531,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.19.1.tgz", - "integrity": "sha512-XUXeI9eM8rMP8aGvii/aOOiMvTs7xlCosq9xCjcqI9+5hBxtjDpD+7Abm1ZhVIFE1J2h2VIg0t2DX/gjespC2Q==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.20.0.tgz", + "integrity": "sha512-y+eoL2I3iphUg9tN9GB6ku1FA8kOfmF4oUEWhztDJ4KXJy1agk/9+pejOuZkNFhRwHAOxMsBPLbXPd6mJiCwew==", "cpu": [ "x64" ], @@ -542,9 +544,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.19.1.tgz", - "integrity": "sha512-V7cBw/cKXMfEVhpSvVZhC+iGifD6U1zJ4tbibjjN+Xi3blSXaj/rJynAkCFFQfoG6VZrAiP7uGVzL440Q6Me2Q==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.20.0.tgz", + "integrity": "sha512-hM3nhW40kBNYUkZb/r9k2FKK+/MnKglX7UYd4ZUy5DJs8/sMsIbqWK2piZtVGE3kcXVNj3B2IrUYROJMMCikNg==", "cpu": [ "x64" ], @@ -555,9 +557,9 @@ ] }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.19.1.tgz", - "integrity": "sha512-88brja2vldW/76jWATlBqHEoGjJLRnP0WOEKAUbMcXaAZnemNhlAHSyj4jIwMoP2T750LE9lblvD4e2jXleZsA==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.20.0.tgz", + "integrity": "sha512-psegMvP+Ik/Bg7QRJbv8w8PAytPA7Uo8fpFjXyCRHWm6Nt42L+JtoqH8eDQ5hRP7/XW2UiIriy1Z46jf0Oa1kA==", "cpu": [ "arm64" ], @@ -568,9 +570,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.19.1.tgz", - "integrity": "sha512-LdxxcqRVSXi6k6JUrTah1rHuaupoeuiv38du8Mt4r4IPer3kwlTo+RuvfE8KzZ/tL6BhaPlzJ3835i6CxrFIRQ==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.20.0.tgz", + "integrity": "sha512-GabekH3w4lgAJpVxkk7hUzUf2hICSQO0a/BLFA11/RMxQT92MabKAqyubzDZmMOC/hcJNlc+rrypzNzYl4Dx7A==", "cpu": [ "ia32" ], @@ -581,9 +583,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.19.1.tgz", - "integrity": "sha512-2bIrL28PcK3YCqD9anGxDxamxdiJAxA+l7fWIwM5o8UqNy1t3d1NdAweO2XhA0KTDJ5aH1FsuiT5+7VhtHliXg==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.20.0.tgz", + "integrity": "sha512-aJ1EJSuTdGnM6qbVC4B5DSmozPTqIag9fSzXRNNo+humQLG89XpPgdt16Ia56ORD7s+H8Pmyx44uczDQ0yDzpg==", "cpu": [ "x64" ], @@ -633,6 +635,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@uwdata/flechette": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.3.tgz", + "integrity": "sha512-eYxNvZaHLZ3r0Nxe5bTbxaOEau4TVYO9u2Ju2k92vcsylAQ30t0QhvwJbfoN+XkN6g63O/xwpRnCVquYOf4M2A==", + "dev": true + }, "node_modules/acorn": { "version": "8.12.1", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz", @@ -1559,16 +1567,16 @@ } }, "node_modules/eslint": { - "version": "9.8.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.8.0.tgz", - "integrity": "sha512-K8qnZ/QJzT2dLKdZJVX6W4XOwBzutMYmt0lqUS+JdXgd+HTYFlonFgkJ8s44d/zMPPCnOOk0kMWCApCPhiOy9A==", + "version": "9.9.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.9.0.tgz", + "integrity": "sha512-JfiKJrbx0506OEerjK2Y1QlldtBxkAlLxT5OEcRF8uaQ86noDe2k31Vw9rnSWv+MXZHj7OOUV/dA0AhdLFcyvA==", "dev": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.11.0", "@eslint/config-array": "^0.17.1", "@eslint/eslintrc": "^3.1.0", - "@eslint/js": "9.8.0", + "@eslint/js": "9.9.0", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.3.0", "@nodelib/fs.walk": "^1.2.8", @@ -1607,6 +1615,14 @@ }, "funding": { "url": "https://eslint.org/donate" + }, + "peerDependencies": { + "jiti": "*" + }, + "peerDependenciesMeta": { + "jiti": { + "optional": true + } } }, "node_modules/eslint-scope": { @@ -2842,9 +2858,10 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/lodash.assignwith": { - "version": "4.2.0", - "license": "MIT" + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" }, "node_modules/lodash.camelcase": { "version": "4.3.0", @@ -2958,9 +2975,9 @@ } }, "node_modules/mocha": { - "version": "10.7.0", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.7.0.tgz", - "integrity": "sha512-v8/rBWr2VO5YkspYINnvu81inSz2y3ODJrhO175/Exzor1RcEZZkizgE2A+w/CAXXoESS8Kys5E62dOHGHzULA==", + "version": "10.7.3", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.7.3.tgz", + "integrity": "sha512-uQWxAu44wwiACGqjbPYmjo7Lg8sFrS3dQe7PP2FQI+woptP4vZXSMcfMyFL/e1yFEeEpV4RtyTpZROOKmxis+A==", "dev": true, "dependencies": { "ansi-colors": "^4.1.3", @@ -3573,9 +3590,9 @@ } }, "node_modules/rollup": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.19.1.tgz", - "integrity": "sha512-K5vziVlg7hTpYfFBI+91zHBEMo6jafYXpkMlqZjg7/zhIG9iHqazBf4xz9AVdjS9BruRn280ROqLI7G3OFRIlw==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.20.0.tgz", + "integrity": "sha512-6rbWBChcnSGzIlXeIdNIZTopKYad8ZG8ajhl78lGRLsI2rX8IkaotQhVas2Ma+GPxJav19wrSzvRvuiv0YKzWw==", "dev": true, "dependencies": { "@types/estree": "1.0.5" @@ -3588,22 +3605,22 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.19.1", - "@rollup/rollup-android-arm64": "4.19.1", - "@rollup/rollup-darwin-arm64": "4.19.1", - "@rollup/rollup-darwin-x64": "4.19.1", - "@rollup/rollup-linux-arm-gnueabihf": "4.19.1", - "@rollup/rollup-linux-arm-musleabihf": "4.19.1", - "@rollup/rollup-linux-arm64-gnu": "4.19.1", - "@rollup/rollup-linux-arm64-musl": "4.19.1", - "@rollup/rollup-linux-powerpc64le-gnu": "4.19.1", - "@rollup/rollup-linux-riscv64-gnu": "4.19.1", - "@rollup/rollup-linux-s390x-gnu": "4.19.1", - "@rollup/rollup-linux-x64-gnu": "4.19.1", - "@rollup/rollup-linux-x64-musl": "4.19.1", - "@rollup/rollup-win32-arm64-msvc": "4.19.1", - "@rollup/rollup-win32-ia32-msvc": "4.19.1", - "@rollup/rollup-win32-x64-msvc": "4.19.1", + "@rollup/rollup-android-arm-eabi": "4.20.0", + "@rollup/rollup-android-arm64": "4.20.0", + "@rollup/rollup-darwin-arm64": "4.20.0", + "@rollup/rollup-darwin-x64": "4.20.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.20.0", + "@rollup/rollup-linux-arm-musleabihf": "4.20.0", + "@rollup/rollup-linux-arm64-gnu": "4.20.0", + "@rollup/rollup-linux-arm64-musl": "4.20.0", + "@rollup/rollup-linux-powerpc64le-gnu": "4.20.0", + "@rollup/rollup-linux-riscv64-gnu": "4.20.0", + "@rollup/rollup-linux-s390x-gnu": "4.20.0", + "@rollup/rollup-linux-x64-gnu": "4.20.0", + "@rollup/rollup-linux-x64-musl": "4.20.0", + "@rollup/rollup-win32-arm64-msvc": "4.20.0", + "@rollup/rollup-win32-ia32-msvc": "4.20.0", + "@rollup/rollup-win32-x64-msvc": "4.20.0", "fsevents": "~2.3.2" } }, diff --git a/package.json b/package.json index bec7944..13a4e66 100644 --- a/package.json +++ b/package.json @@ -37,6 +37,7 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { + "@uwdata/flechette": "~0.0.3", "acorn": "^8.12.1", "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" @@ -44,10 +45,10 @@ "devDependencies": { "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "eslint": "^9.8.0", - "mocha": "^10.7.0", + "eslint": "^9.9.0", + "mocha": "^10.7.3", "rimraf": "^6.0.1", - "rollup": "^4.19.1", + "rollup": "^4.20.0", "rollup-plugin-bundle-size": "^1.0.3", "tape": "^5.8.1", "typescript": "^5.5.4" From 54894a700f7578042f834cc726ec04be29fa5f9a Mon Sep 17 00:00:00 2001 From: jheer Date: Tue, 13 Aug 2024 18:26:42 +0200 Subject: [PATCH 02/16] ci: Update CI node versions. --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 26c65bd..7f45af2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,7 +3,7 @@ name: Test on: push: branches: - - master + - main pull_request: jobs: @@ -12,7 +12,7 @@ jobs: strategy: matrix: - node: [18, 20] + node: [20, 22] name: Node ${{ matrix.node }} From 7c80e72259feb6a58e8eca4331ee89bec32802be Mon Sep 17 00:00:00 2001 From: jheer Date: Tue, 13 Aug 2024 18:27:04 +0200 Subject: [PATCH 03/16] feat!: Switch from arrow-js to flechete. --- perf/arrow-perf.js | 39 +++-- src/arrow/arrow-column.js | 277 -------------------------------- src/arrow/from-arrow.js | 135 ++++++++++++++-- src/arrow/types.ts | 119 +++++++++----- test/arrow/arrow-column-test.js | 112 ------------- test/arrow/from-arrow-test.js | 90 +++++++++-- test/verbs/reify-test.js | 4 +- 7 files changed, 297 insertions(+), 479 deletions(-) delete mode 100644 src/arrow/arrow-column.js delete mode 100644 test/arrow/arrow-column-test.js diff --git a/perf/arrow-perf.js b/perf/arrow-perf.js index 9f9c2f5..0708b22 100644 --- a/perf/arrow-perf.js +++ b/perf/arrow-perf.js @@ -4,7 +4,7 @@ import { bools, floats, ints, sample, strings } from './data-gen.js'; import { fromArrow, table, toArrow } from '../src/index.js'; import { Bool, Dictionary, Float64, Int32, Table, Uint32, Utf8, - tableToIPC, vectorFromArray + tableFromIPC, tableToIPC, vectorFromArray } from 'apache-arrow'; function process(N, nulls, msg) { @@ -18,39 +18,46 @@ function process(N, nulls, msg) { new Int32() ) }; - const at = new Table(vectors); - const dt = fromArrow(at); + const aa = new Table(vectors); + const buf = tableToIPC(aa, 'stream'); + const ft = fromArrow(buf); // using flechette + const at = fromArrow(aa); // using arrow-js - const arqueroFilterDict = val => time(() => { + const filterDict = (dt, val) => time(() => { dt.filter(`d.k === '${val}'`).numRows(); }); - const arqueroFilterValue = val => time(() => { + const filterValue = (dt, val) => time(() => { dt.filter(`d.v >= ${val}`).numRows(); }); tape(`arrow processing: ${msg}`, t => { - const k = at.getChild('k').get(50); + const k = aa.getChild('k').at(50); console.table([ // eslint-disable-line { - operation: 'init table', - arquero: time(() => fromArrow(at)) + operation: 'init table', + 'arrow-js': time(() => fromArrow(tableFromIPC(buf))), + flechette: time(() => fromArrow(buf)) }, { - operation: 'count dictionary', - arquero: time(() => dt.groupby('k').count()) + operation: 'count dictionary', + 'arrow-js': time(() => at.groupby('k').count()), + flechette: time(() => ft.groupby('k').count()) }, { - operation: 'filter dictionary', - arquero: arqueroFilterDict(k) + operation: 'filter dictionary', + 'arrow-js': filterDict(at, k), + flechette: filterDict(ft, k) }, { - operation: 'filter numbers 0', - arquero: arqueroFilterValue(0) + operation: 'filter numbers 0', + 'arrow-js': filterValue(at, 0), + flechette: filterValue(ft, 0) }, { - operation: 'filter numbers 1', - arquero: arqueroFilterValue(1) + operation: 'filter numbers 1', + 'arrow-js': filterValue(at, 1), + flechette: filterValue(ft, 1) } ]); t.end(); diff --git a/src/arrow/arrow-column.js b/src/arrow/arrow-column.js deleted file mode 100644 index a8b43f1..0000000 --- a/src/arrow/arrow-column.js +++ /dev/null @@ -1,277 +0,0 @@ -import sequence from '../op/functions/sequence.js'; -import error from '../util/error.js'; -import isFunction from '../util/is-function.js'; -import repeat from '../util/repeat.js'; -import toString from '../util/to-string.js'; -import unroll from '../util/unroll.js'; - -// Hardwire Arrow type ids to sidestep hard dependency -// https://github.com/apache/arrow/blob/master/js/src/enum.ts -const isDict = ({ typeId }) => typeId === -1; -const isInt = ({ typeId }) => typeId === 2; -const isUtf8 = ({ typeId }) => typeId === 5; -const isDecimal = ({ typeId }) => typeId === 7; -const isDate = ({ typeId }) => typeId === 8; -const isTimestamp = ({ typeId }) => typeId === 10; -const isStruct = ({ typeId }) => typeId === 13; -const isLargeUtf8 = ({ typeId }) => typeId === 20; -const isListType = ({ typeId }) => typeId === 12 || typeId === 16; - -/** - * Create an Arquero column that proxies access to an Arrow column. - * @param {import('apache-arrow').Vector} vector An Apache Arrow column. - * @param {import('./types.js').ArrowColumnOptions} [options] - * Arrow conversion options. - * @return {import('../table/types.js').ColumnType} - * An Arquero-compatible column. - */ -export default function arrowColumn(vector, options) { - return isDict(vector.type) - ? dictionaryColumn(vector) - : proxyColumn(vector, options); -} - -/** - * Internal method for Arquero column generation for Apache Arrow data - * @param {import('apache-arrow').Vector} vector An Apache Arrow column. - * @param {import('./types.js').ArrowColumnOptions} [options] - * Arrow conversion options. - * @return {import('../table/types.js').ColumnType} - * An Arquero-compatible column. - */ -function proxyColumn(vector, options = {}) { - const { type, length, numChildren } = vector; - const { - convertDate = true, - convertDecimal = true, - convertTimestamp = true, - convertBigInt = false, - memoize = true - } = options; - - // create a getter method for retrieving values - let get; - if (numChildren) { - // extract lists/structs to JS objects, possibly memoized - get = getNested(vector, options); - if (memoize) get = memoized(length, get); - } else if (memoize && (isUtf8(type) || isLargeUtf8(type))) { - // memoize string extraction - get = memoized(length, row => vector.get(row)); - } else if ((convertDate && isDate(type)) - || (convertTimestamp && isTimestamp(type))) { - // convert to Date type, memoized for object equality - get = memoized(length, row => { - const v = vector.get(row); - return v == null ? null : new Date(vector.get(row)); - }); - } else if (convertDecimal && isDecimal(type)) { - // map decimal to number - const scale = 1 / Math.pow(10, type.scale); - get = row => { - const v = vector.get(row); - return v == null ? null : decimalToNumber(v, scale); - }; - } else if (convertBigInt && isInt(type) && type.bitWidth >= 64) { - // map bigint to number - get = row => { - const v = vector.get(row); - return v == null ? null : Number(v); - }; - } else if (!isFunction(vector.at)) { - // backwards compatibility with older arrow versions - // the vector `at` method was added in Arrow v16 - get = row => vector.get(row); - } else { - // use the arrow column directly - return vector; - } - - // return a column proxy object using custom getter - return { - length, - at: get, - [Symbol.iterator]: () => (function* () { - for (let i = 0; i < length; ++i) { - yield get(i); - } - })() - }; -} - -/** - * Memoize expensive getter calls by caching retrieved values. - */ -function memoized(length, get) { - const values = Array(length); - return row => { - const v = values[row]; - return v !== undefined ? v : (values[row] = get(row)); - }; -} - -// generate base values for big integers represented as a Uint32Array -const BASE32 = Array.from( - { length: 8 }, - (_, i) => Math.pow(2, i * 32) -); - -/** - * Convert a fixed point decimal value to a double precision number. - * Note: if the value is sufficiently large the conversion may be lossy! - * @param {Uint32Array & { signed: boolean }} v a fixed point decimal value - * @param {number} scale a scale factor, corresponding to the - * number of fractional decimal digits in the fixed point value - * @return {number} the resulting number - */ -function decimalToNumber(v, scale) { - const n = v.length; - let x = 0; - if (v.signed && (v[n - 1] | 0) < 0) { - for (let i = 0; i < n; ++i) { - x += ~v[i] * BASE32[i]; - } - x = -(x + 1); - } else { - for (let i = 0; i < n; ++i) { - x += v[i] * BASE32[i]; - } - } - return x * scale; -} - -// get an array for a given vector -function arrayFrom(vector, options) { - return vector.numChildren ? repeat(vector.length, getNested(vector, options)) - : vector.nullCount ? [...vector] - : vector.toArray(); -} - -// generate a getter for a nested data type -function getNested(vector, options) { - return isListType(vector.type) ? getList(vector, options) - : isStruct(vector.type) ? getStruct(vector, options) - : error(`Unsupported Arrow type: ${toString(vector.VectorName)}`); -} - -// generate a getter for a list data type -function getList(vector, options) { - return vector.nullCount - ? row => vector.isValid(row) - ? arrayFrom(vector.get(row), options) - : null - : row => arrayFrom(vector.get(row), options); -} - -// generate a getter for a struct (object) data type -function getStruct(vector, options) { - // disable memoization for nested columns as we extract JS objects - const opt = { ...options, memoize: false }; - const props = []; - const code = []; - vector.type.children.forEach((field, i) => { - props.push(arrowColumn(vector.getChildAt(i), opt)); - code.push(`${toString(field.name)}:_${i}.at(row)`); - }); - const get = unroll('row', '({' + code + '})', props); - - return vector.nullCount - ? row => vector.isValid(row) ? get(row) : null - : get; -} - -/** - * Create a new Arquero column that proxies access to an - * Apache Arrow dictionary column. - * @param {import('apache-arrow').Vector} vector - * An Apache Arrow dictionary column. - */ -function dictionaryColumn(vector) { - const { data, length, nullCount } = vector; - const dictionary = data[data.length - 1].dictionary; - const size = dictionary.length; - const keys = dictKeys(data || [vector], length, nullCount, size); - const get = memoized(size, - k => k == null || k < 0 || k >= size ? null : dictionary.get(k) - ); - - return { - vector, - length, - at: row => get(keys[row]), - key: row => keys[row], - keyFor(value) { - if (value === null) return nullCount ? size : -1; - for (let i = 0; i < size; ++i) { - if (get(i) === value) return i; - } - return -1; - }, - groups(names) { - const s = size + (nullCount ? 1 : 0); - return { keys, get: [get], names, rows: sequence(0, s), size: s }; - }, - [Symbol.iterator]() { - return vector[Symbol.iterator](); - } - }; -} - -/** - * Generate a dictionary key array. - * @param {readonly any[]} chunks Arrow column chunks - * @param {number} length The length of the Arrow column - * @param {number} nulls The count of column null values - * @param {number} size The backing dictionary size - */ -function dictKeys(chunks, length, nulls, size) { - const v = chunks.length > 1 || nulls - ? flatten(chunks, length, chunks[0].type.indices) - : chunks[0].values; - return nulls ? nullKeys(chunks, v, size) : v; -} - -/** - * Flatten Arrow column chunks into a single array. - */ -function flatten(chunks, length, type) { - const array = new type.ArrayType(length); - const n = chunks.length; - for (let i = 0, idx = 0, len; i < n; ++i) { - len = chunks[i].length; - array.set(chunks[i].values.subarray(0, len), idx); - idx += len; - } - return array; -} - -/** - * Encode null values as an additional dictionary key. - * Returns a new key array with null values added. - * TODO: safeguard against integer overflow? - */ -function nullKeys(chunks, keys, key) { - // iterate over null bitmaps, encode null values as key - const n = chunks.length; - for (let i = 0, idx = 0, m, base, bits, byte; i < n; ++i) { - bits = chunks[i].nullBitmap; - m = chunks[i].length >> 3; - if (bits && bits.length) { - for (let j = 0; j <= m; ++j) { - if ((byte = bits[j]) !== 255) { - base = idx + (j << 3); - if ((byte & (1 << 0)) === 0) keys[base + 0] = key; - if ((byte & (1 << 1)) === 0) keys[base + 1] = key; - if ((byte & (1 << 2)) === 0) keys[base + 2] = key; - if ((byte & (1 << 3)) === 0) keys[base + 3] = key; - if ((byte & (1 << 4)) === 0) keys[base + 4] = key; - if ((byte & (1 << 5)) === 0) keys[base + 5] = key; - if ((byte & (1 << 6)) === 0) keys[base + 6] = key; - if ((byte & (1 << 7)) === 0) keys[base + 7] = key; - } - } - } - idx += chunks[i].length; - } - return keys; -} diff --git a/src/arrow/from-arrow.js b/src/arrow/from-arrow.js index cd9a060..b2ac143 100644 --- a/src/arrow/from-arrow.js +++ b/src/arrow/from-arrow.js @@ -1,39 +1,144 @@ -import { arrowTableFromIPC } from './arrow-table.js'; -import arrowColumn from './arrow-column.js'; +import { tableFromIPC } from '@uwdata/flechette'; import resolve, { all } from '../helpers/selection.js'; import { columnSet } from '../table/ColumnSet.js'; import { ColumnTable } from '../table/ColumnTable.js'; +import sequence from '../op/functions/sequence.js'; + +/** @type {import('./types.js').ArrowOptions} */ +const USE_DATE = { useDate: true }; /** * Create a new table backed by an Apache Arrow table instance. - * @param {import('./types.js').ArrowInput} arrow + * @param {import('./types.js').ArrowInput} input * An Apache Arrow data table or Arrow IPC byte buffer. * @param {import('./types.js').ArrowOptions} [options] * Options for Arrow import. * @return {ColumnTable} A new table containing the imported values. */ -export default function(arrow, options) { - if (arrow instanceof ArrayBuffer || ArrayBuffer.isView(arrow)) { - arrow = arrowTableFromIPC(arrow); - } +export default function(input, options) { + const { columns = all(), ...extractOptions } = options || USE_DATE; + const arrow = input instanceof ArrayBuffer || input instanceof Uint8Array + ? tableFromIPC(input, extractOptions) + : input; - const { - columns = all(), - ...columnOptions - } = options || {}; + const { fields } = arrow.schema; // resolve column selection - const fields = arrow.schema.fields.map(f => f.name); + const names = fields.map(f => f.name); const sel = resolve({ - columnNames: test => test ? fields.filter(test) : fields.slice(), - columnIndex: name => fields.indexOf(name) + columnNames: test => test ? names.filter(test) : names.slice(), + columnIndex: name => names.indexOf(name) }, columns); // build Arquero columns for backing Arrow columns const cols = columnSet(); sel.forEach((name, key) => { - cols.add(name, arrowColumn(arrow.getChild(key), columnOptions)); + const col = arrow.getChild(key); + const { typeId } = fields.find(f => f.name === key).type; + cols.add(name, typeId === -1 ? dictionaryColumn(col) : col); }); return new ColumnTable(cols.data, cols.names); } + +function dictionaryColumn(column) { + const { data, length, nullCount } = column; + const batch = data[data.length - 1]; + // flechette ?? arrow-js + const cache = batch.cache ?? batch.dictionary.toArray(); + const size = cache.length; + const keys = dictKeys(data, length, nullCount, size); + + const get = nullCount + ? (k => k === size ? null : cache[k]) + : (k => cache[k]); + + return { + length, + nullCount, + at: row => get(keys[row]), + key: row => keys[row], + keyFor(value) { + if (value === null) return nullCount ? size : -1; + for (let i = 0; i < size; ++i) { + if (cache[i] === value) return i; + } + return -1; + }, + groups(names) { + const s = size + (nullCount ? 1 : 0); + return { + keys, + get: [get], + names, + rows: sequence(0, s), + size: s + }; + }, + [Symbol.iterator]: () => column[Symbol.iterator](), + toArray: () => column.toArray() + }; +} + +/** + * Generate a dictionary key array. + * @param {readonly any[]} data Arrow column batches + * @param {number} length The length of the Arrow column + * @param {number} nulls The count of column null values + * @param {number} size The backing dictionary size + */ +function dictKeys(data, length, nulls, size) { + const v = data.length > 1 || nulls + ? flatten(data, length) + : data[0].values; + return nulls ? nullKeys(data, v, size) : v; +} + +/** + * Flatten Arrow column chunks into a single array. + */ +function flatten(data, length) { + const type = data[0].values.constructor; + const array = new type(length); + const n = data.length; + for (let i = 0, idx = 0, len; i < n; ++i) { + len = data[i].length; + array.set(data[i].values.subarray(0, len), idx); + idx += len; + } + return array; +} + +/** + * Encode null values as an additional dictionary key. + * Returns a new key array with null values added. + * TODO: safeguard against integer overflow? + */ +function nullKeys(data, keys, key) { + // iterate over null bitmaps, encode null values as key + const n = data.length; + for (let i = 0, idx = 0, byte; i < n; ++i) { + const batch = data[i]; + const { length } = batch; + // flechette ?? arrow-js + const validity = batch.validity ?? batch.nullBitmap; + const m = length >> 3; + if (validity && validity.length) { + for (let j = 0; j <= m; ++j) { + if ((byte = validity[j]) !== 255) { + const base = idx + (j << 3); + if ((byte & (1 << 0)) === 0) keys[base + 0] = key; + if ((byte & (1 << 1)) === 0) keys[base + 1] = key; + if ((byte & (1 << 2)) === 0) keys[base + 2] = key; + if ((byte & (1 << 3)) === 0) keys[base + 3] = key; + if ((byte & (1 << 4)) === 0) keys[base + 4] = key; + if ((byte & (1 << 5)) === 0) keys[base + 5] = key; + if ((byte & (1 << 6)) === 0) keys[base + 6] = key; + if ((byte & (1 << 7)) === 0) keys[base + 7] = key; + } + } + } + idx += length; + } + return keys; +} diff --git a/src/arrow/types.ts b/src/arrow/types.ts index f3cf8f1..e6b03bf 100644 --- a/src/arrow/types.ts +++ b/src/arrow/types.ts @@ -1,55 +1,86 @@ -import { DataType, Table } from 'apache-arrow'; -import type { Select, TypedArray } from '../table/types.js'; +import { ExtractionOptions } from '@uwdata/flechette'; +import { DataType } from 'apache-arrow'; +import type { ColumnType, Select } from '../table/types.js'; /** Arrow input data as bytes or loaded table. */ export type ArrowInput = | ArrayBuffer - | TypedArray - | Table; + | Uint8Array + | ArrowTable; -/** Options for Apache Arrow column conversion. */ -export interface ArrowColumnOptions { - /** - * Flag (default `true`) to convert Arrow date values to JavaScript Date - * objects. If false, defaults to what the Arrow implementation provides, - * typically timestamps as number values. - */ - convertDate?: boolean; - /** - * Flag (default `true`) to convert Arrow fixed point decimal values to - * JavaScript numbers. If false, defaults to what the Arrow implementation - * provides, typically byte arrays. The conversion will be lossy if the - * decimal can not be exactly represented as a double-precision floating - * point number. - */ - convertDecimal?: boolean; - /** - * Flag (default `true`) to convert Arrow timestamp values to JavaScript - * Date objects. If false, defaults to what the Arrow implementation - * provides, typically timestamps as number values. - */ - convertTimestamp?: boolean; - /** - * Flag (default `false`) to convert Arrow integers with bit widths of 64 - * bits or higher to JavaScript numbers. If false, defaults to what the - * Arrow implementation provides, typically `BigInt` values. The conversion - * will be lossy if the integer is so large it can not be exactly - * represented as a double-precision floating point number. - */ - convertBigInt?: boolean; - /** - * A hint (default `true`) to enable memoization of expensive conversions. - * If true, memoization is applied for string and nested (list, struct) - * types, caching extracted values to enable faster access. Memoization - * is also applied to converted Date values, in part to ensure exact object - * equality. This hint is ignored for dictionary columns, whose values are - * always memoized. - */ - memoize?: boolean; +export interface ArrowColumn extends ColumnType { + nullCount: number; + toArray(): ColumnType } +export interface ArrowDataType { + typeId: number; +} + +export interface ArrowField { + name: string; + nullable: boolean; + type: ArrowDataType; + metadata?: Map; +} + +export interface ArrowSchema { + version?: number; + fields: ArrowField[]; + metadata?: Map; +} + +export interface ArrowTable { + numRows: number; + numCols: number; + schema: ArrowSchema; + getChild(name: string): ArrowColumn; + getChildAt(index: number): ArrowColumn; +} + +// /** Options for Apache Arrow column conversion. */ +// export interface ArrowColumnOptions { +// /** +// * Flag (default `true`) to convert Arrow date values to JavaScript Date +// * objects. If false, defaults to what the Arrow implementation provides, +// * typically timestamps as number values. +// */ +// convertDate?: boolean; +// /** +// * Flag (default `true`) to convert Arrow fixed point decimal values to +// * JavaScript numbers. If false, defaults to what the Arrow implementation +// * provides, typically byte arrays. The conversion will be lossy if the +// * decimal can not be exactly represented as a double-precision floating +// * point number. +// */ +// convertDecimal?: boolean; +// /** +// * Flag (default `true`) to convert Arrow timestamp values to JavaScript +// * Date objects. If false, defaults to what the Arrow implementation +// * provides, typically timestamps as number values. +// */ +// convertTimestamp?: boolean; +// /** +// * Flag (default `false`) to convert Arrow integers with bit widths of 64 +// * bits or higher to JavaScript numbers. If false, defaults to what the +// * Arrow implementation provides, typically `BigInt` values. The conversion +// * will be lossy if the integer is so large it can not be exactly +// * represented as a double-precision floating point number. +// */ +// convertBigInt?: boolean; +// /** +// * A hint (default `true`) to enable memoization of expensive conversions. +// * If true, memoization is applied for string and nested (list, struct) +// * types, caching extracted values to enable faster access. Memoization +// * is also applied to converted Date values, in part to ensure exact object +// * equality. This hint is ignored for dictionary columns, whose values are +// * always memoized. +// */ +// memoize?: boolean; +// } + /** Options for Apache Arrow import. */ -export interface ArrowOptions extends ArrowColumnOptions { +export interface ArrowOptions extends ExtractionOptions { /** * An ordered set of columns to import. The input may consist of column name * strings, column integer indices, objects with current column names as diff --git a/test/arrow/arrow-column-test.js b/test/arrow/arrow-column-test.js deleted file mode 100644 index d982437..0000000 --- a/test/arrow/arrow-column-test.js +++ /dev/null @@ -1,112 +0,0 @@ -import assert from 'node:assert'; -import arrowColumn from '../../src/arrow/arrow-column.js'; -import { - DateDay, DateMillisecond, Int64, tableFromIPC, vectorFromArray -} from 'apache-arrow'; - -describe('arrowColumn', () => { - it('converts date day data', () => { - const date = (y, m = 0, d = 1) => new Date(Date.UTC(y, m, d)); - const values = [ - date(2000, 0, 1), - date(2004, 10, 12), - date(2007, 3, 14), - date(2009, 6, 26), - date(2000, 0, 1), - date(2004, 10, 12), - date(2007, 3, 14), - date(2009, 6, 26), - date(2000, 0, 1), - date(2004, 10, 12) - ]; - const vec = vectorFromArray(values, new DateDay()); - const proxy = arrowColumn(vec); - - assert.deepStrictEqual( - Array.from(proxy), - values, - 'date day converted' - ); - assert.deepStrictEqual( - Array.from(arrowColumn(vec, { convertDate: false })), - values.map(v => +v), - 'date day unconverted' - ); - assert.ok(proxy.at(0) === proxy.at(0), 'data day object equality'); - }); - - it('converts date millisecond data', () => { - const date = (y, m = 0, d = 1) => new Date(Date.UTC(y, m, d)); - const values = [ - date(2000, 0, 1), - date(2004, 10, 12), - date(2007, 3, 14), - date(2009, 6, 26), - date(2000, 0, 1), - date(2004, 10, 12), - date(2007, 3, 14), - date(2009, 6, 26), - date(2000, 0, 1), - date(2004, 10, 12) - ]; - const vec = vectorFromArray(values, new DateMillisecond()); - const proxy = arrowColumn(vec); - - assert.deepStrictEqual( - Array.from(proxy), - values, - 'date millisecond converted' - ); - assert.deepStrictEqual( - Array.from(arrowColumn(vec, { convertDate: false })), - values.map(v => +v), - 'date millisecond unconverted' - ); - assert.ok(proxy.at(0) === proxy.at(0), 'data millisecond object equality'); - }); - - it('converts bigint data', () => { - const values = [0n, 1n, 2n, 3n, 10n, 1000n]; - const vec = vectorFromArray(values, new Int64()); - - assert.deepStrictEqual( - Array.from(arrowColumn(vec, { convertBigInt: true })), - values.map(v => Number(v)), - 'bigint converted' - ); - assert.deepStrictEqual( - Array.from(arrowColumn(vec)), - values, - 'bigint unconverted' - ); - }); - - it('converts decimal data', () => { - // encoded externally to sidestep arrow JS lib bugs: - // import pyarrow as pa - // v = pa.array([1, 12, 34], type=pa.decimal128(18, 3)) - // batch = pa.record_batch([v], names=['d']) - // sink = pa.BufferOutputStream() - // with pa.ipc.new_stream(sink, batch.schema) as writer: - // writer.write_batch(batch) - // sink.getvalue().hex() - const hex = 'FFFFFFFF780000001000000000000A000C000600050008000A000000000104000C000000080008000000040008000000040000000100000014000000100014000800060007000C00000010001000000000000107100000001C0000000400000000000000010000006400000008000C0004000800080000001200000003000000FFFFFFFF8800000014000000000000000C0016000600050008000C000C0000000003040018000000300000000000000000000A0018000C00040008000A0000003C00000010000000030000000000000000000000020000000000000000000000000000000000000000000000000000003000000000000000000000000100000003000000000000000000000000000000E8030000000000000000000000000000E02E0000000000000000000000000000D0840000000000000000000000000000FFFFFFFF00000000'; - const bytes = Uint8Array.from(hex.match(/.{1,2}/g).map(s => parseInt(s, 16))); - const vec = tableFromIPC(bytes).getChild('d'); - - assert.deepStrictEqual( - Array.from(arrowColumn(vec, { convertDecimal: true })), - [1, 12, 34], - 'decimal converted' - ); - assert.deepEqual( - Array.from(arrowColumn(vec, { convertDecimal: false })), - [ - Uint32Array.from([1000, 0, 0, 0]), - Uint32Array.from([12000, 0, 0, 0]), - Uint32Array.from([34000, 0, 0, 0 ]) - ], - 'decimal unconverted' - ); - }); -}); diff --git a/test/arrow/from-arrow-test.js b/test/arrow/from-arrow-test.js index 63fc054..826b85b 100644 --- a/test/arrow/from-arrow-test.js +++ b/test/arrow/from-arrow-test.js @@ -3,13 +3,28 @@ import { Utf8 } from 'apache-arrow'; import tableEqual from '../table-equal.js'; import fromArrow from '../../src/arrow/from-arrow.js'; import toArrow from '../../src/arrow/to-arrow.js'; +import toArrowIPC from '../../src/arrow/to-arrow-ipc.js'; import { not } from '../../src/helpers/selection.js'; import { table } from '../../src/index-browser.js'; +import { tableFromIPC } from '@uwdata/flechette'; function arrowTable(data, types) { return toArrow(table(data), { types }); } +function arrowIPC(data, types) { + return toArrowIPC(table(data), { types }); +} + +function flechetteTable(data, types) { + return tableFromIPC(arrowIPC(data, types)); +} + +function getType(table, name) { + const f = table.schema.fields.find(f => f.name === name); + return f?.type; +} + describe('fromArrow', () => { it('imports Apache Arrow tables', () => { const data = { @@ -17,7 +32,6 @@ describe('fromArrow', () => { v: ['a', 'b', null, 'd', 'e'] }; const at = arrowTable(data); - tableEqual(fromArrow(at), data, 'arrow data'); }); @@ -62,41 +76,91 @@ describe('fromArrow', () => { tableEqual(s4, { a: data.u, b: data.x }, 'correct columns selected'); }); - it('can read Apache Arrow lists', () => { + it('imports Flechette Arrow tables', () => { + const data = { + u: [1, 2, 3, 4, 5], + v: ['a', 'b', null, 'd', 'e'] + }; + const at = flechetteTable(data); + tableEqual(fromArrow(at), data, 'arrow data'); + }); + + it('can unpack Flechette Arrow tables', () => { + const data = { + u: [1, 2, 3, 4, 5], + v: ['a', 'b', null, 'd', 'e'], + x: ['cc', 'dd', 'cc', 'dd', 'cc'], + y: ['aa', 'aa', null, 'bb', 'bb'] + }; + const at = flechetteTable(data, { v: new Utf8() }); + const dt = fromArrow(at); + + tableEqual(dt, data, 'arrow data'); + assert.ok(dt.column('x').keyFor, 'create dictionary column without nulls'); + assert.ok(dt.column('y').keyFor, 'create dictionary column with nulls'); + }); + + it('can select Flechette Arrow columns', () => { + const data = { + u: [1, 2, 3, 4, 5], + v: ['a', 'b', null, 'd', 'e'], + x: ['cc', 'dd', 'cc', 'dd', 'cc'], + y: ['aa', 'aa', null, 'bb', 'bb'] + }; + const at = flechetteTable(data); + + const s1 = fromArrow(at, { columns: 'x' }); + assert.deepEqual(s1.columnNames(), ['x'], 'select by column name'); + tableEqual(s1, { x: data.x }, 'correct columns selected'); + + const s2 = fromArrow(at, { columns: ['u', 'y'] }); + assert.deepEqual(s2.columnNames(), ['u', 'y'], 'select by column names'); + tableEqual(s2, { u: data.u, y: data.y }, 'correct columns selected'); + + const s3 = fromArrow(at, { columns: not('u', 'y') }); + assert.deepEqual(s3.columnNames(), ['v', 'x'], 'select by helper'); + tableEqual(s3, { v: data.v, x: data.x }, 'correct columns selected'); + + const s4 = fromArrow(at, { columns: { u: 'a', x: 'b'} }); + assert.deepEqual(s4.columnNames(), ['a', 'b'], 'select by helper'); + tableEqual(s4, { a: data.u, b: data.x }, 'correct columns selected'); + }); + + it('can read Flechette Arrow lists', () => { const l = [[1, 2, 3], null, [4, 5]]; - const at = arrowTable({ l }); + const at = flechetteTable({ l }); - if (at.getChild('l').type.typeId !== 12) { + if (getType(at, 'l').typeId !== 12) { assert.fail('Arrow column should have List type'); } tableEqual(fromArrow(at), { l }, 'extract Arrow list'); }); - it('can read Apache Arrow fixed-size lists', () => { + it('can read Flechette Arrow fixed-size lists', () => { const l = [[1, 2], null, [4, 5]]; - const at = arrowTable({ l }); + const at = flechetteTable({ l }); - if (at.getChild('l').type.typeId !== 16) { + if (getType(at, 'l').typeId !== 16) { assert.fail('Arrow column should have FixedSizeList type'); } tableEqual(fromArrow(at), { l }, 'extract Arrow list'); }); - it('can read Apache Arrow structs', () => { + it('can read Flechette Arrow structs', () => { const s = [{ foo: 1, bar: [2, 3] }, null, { foo: 2, bar: [4] }]; - const at = arrowTable({ s }); + const at = flechetteTable({ s }); - if (at.getChild('s').type.typeId !== 13) { + if (getType(at, 's').typeId !== 13) { assert.fail('Arrow column should have Struct type'); } tableEqual(fromArrow(at), { s }, 'extract Arrow struct'); }); - it('can read nested Apache Arrow structs', () => { + it('can read nested Flechette Arrow structs', () => { const s = [{ foo: 1, bar: { bop: 2 } }, { foo: 2, bar: { bop: 3 } }]; - const at = arrowTable({ s }); + const at = flechetteTable({ s }); - if (at.getChild('s').type.typeId !== 13) { + if (getType(at, 's').typeId !== 13) { assert.fail('Arrow column should have Struct type'); } tableEqual(fromArrow(at), { s }, 'extract nested Arrow struct'); diff --git a/test/verbs/reify-test.js b/test/verbs/reify-test.js index ac11a16..228695e 100644 --- a/test/verbs/reify-test.js +++ b/test/verbs/reify-test.js @@ -1,5 +1,5 @@ import tableEqual from '../table-equal.js'; -import { fromArrow, table, toArrow } from '../../src/index.js'; +import { fromArrow, table, toArrowIPC } from '../../src/index.js'; describe('reify', () => { it('materializes filtered and ordered tables', () => { @@ -26,7 +26,7 @@ describe('reify', () => { { a: 1.7, b: 'd', c: [4], d: new Date(2003, 3, 1, 4) } ]; - const dt = fromArrow(toArrow(data)); + const dt = fromArrow(toArrowIPC(data)); const rt = dt.filter(d => d.b !== 'c').reify(); tableEqual(rt, From ae1b5c00677b95ad7576dd2237d4a336b37775aa Mon Sep 17 00:00:00 2001 From: jheer Date: Tue, 13 Aug 2024 18:49:33 +0200 Subject: [PATCH 04/16] chore: Bump flechette patch version. --- package-lock.json | 9 ++++----- package.json | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/package-lock.json b/package-lock.json index 73cd83e..082b773 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "6.0.1", "license": "BSD-3-Clause", "dependencies": { + "@uwdata/flechette": "~0.0.4", "acorn": "^8.12.1", "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" @@ -16,7 +17,6 @@ "devDependencies": { "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "@uwdata/flechette": "~0.0.3", "eslint": "^9.9.0", "mocha": "^10.7.3", "rimraf": "^6.0.1", @@ -636,10 +636,9 @@ "license": "MIT" }, "node_modules/@uwdata/flechette": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.3.tgz", - "integrity": "sha512-eYxNvZaHLZ3r0Nxe5bTbxaOEau4TVYO9u2Ju2k92vcsylAQ30t0QhvwJbfoN+XkN6g63O/xwpRnCVquYOf4M2A==", - "dev": true + "version": "0.0.4", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.4.tgz", + "integrity": "sha512-Huz+xF4fWVirVJVrzb6f08vPYb4SLRBDmlGejTdL5kW6Xqa3cfeBqSnHUEcxUf1Fnrr7rT7RTtrt6bBxOGiwJQ==" }, "node_modules/acorn": { "version": "8.12.1", diff --git a/package.json b/package.json index 13a4e66..7dc1550 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { - "@uwdata/flechette": "~0.0.3", + "@uwdata/flechette": "~0.0.4", "acorn": "^8.12.1", "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" From d1ccd5c2c6a6a7259b76b57a0b8b7ca7e7235fdb Mon Sep 17 00:00:00 2001 From: jheer Date: Tue, 13 Aug 2024 19:48:19 +0200 Subject: [PATCH 05/16] feat: Update to flechette 0.0.5. --- package-lock.json | 8 ++++---- package.json | 2 +- src/arrow/from-arrow.js | 9 ++++----- src/arrow/types.ts | 1 + 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/package-lock.json b/package-lock.json index 082b773..c945626 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "6.0.1", "license": "BSD-3-Clause", "dependencies": { - "@uwdata/flechette": "~0.0.4", + "@uwdata/flechette": "~0.0.5", "acorn": "^8.12.1", "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" @@ -636,9 +636,9 @@ "license": "MIT" }, "node_modules/@uwdata/flechette": { - "version": "0.0.4", - "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.4.tgz", - "integrity": "sha512-Huz+xF4fWVirVJVrzb6f08vPYb4SLRBDmlGejTdL5kW6Xqa3cfeBqSnHUEcxUf1Fnrr7rT7RTtrt6bBxOGiwJQ==" + "version": "0.0.5", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.5.tgz", + "integrity": "sha512-OZqdOYLGmcLiQvRxzB4S8v7WjjvG9B95G5x4cxNHeI/FV4Y1kBupRvxbYOvH5wUDLbZi3NwFG8jklPqzLYDH/w==" }, "node_modules/acorn": { "version": "8.12.1", diff --git a/package.json b/package.json index 7dc1550..c723ec4 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { - "@uwdata/flechette": "~0.0.4", + "@uwdata/flechette": "~0.0.5", "acorn": "^8.12.1", "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" diff --git a/src/arrow/from-arrow.js b/src/arrow/from-arrow.js index b2ac143..70b4dda 100644 --- a/src/arrow/from-arrow.js +++ b/src/arrow/from-arrow.js @@ -34,17 +34,16 @@ export default function(input, options) { const cols = columnSet(); sel.forEach((name, key) => { const col = arrow.getChild(key); - const { typeId } = fields.find(f => f.name === key).type; - cols.add(name, typeId === -1 ? dictionaryColumn(col) : col); + cols.add(name, col.type.typeId === -1 ? dictionary(col) : col); }); return new ColumnTable(cols.data, cols.names); } -function dictionaryColumn(column) { +function dictionary(column) { const { data, length, nullCount } = column; const batch = data[data.length - 1]; - // flechette ?? arrow-js + // support both flechette and arrow-js const cache = batch.cache ?? batch.dictionary.toArray(); const size = cache.length; const keys = dictKeys(data, length, nullCount, size); @@ -120,7 +119,7 @@ function nullKeys(data, keys, key) { for (let i = 0, idx = 0, byte; i < n; ++i) { const batch = data[i]; const { length } = batch; - // flechette ?? arrow-js + // support both flechette and arrow-js const validity = batch.validity ?? batch.nullBitmap; const m = length >> 3; if (validity && validity.length) { diff --git a/src/arrow/types.ts b/src/arrow/types.ts index e6b03bf..11e770d 100644 --- a/src/arrow/types.ts +++ b/src/arrow/types.ts @@ -9,6 +9,7 @@ export type ArrowInput = | ArrowTable; export interface ArrowColumn extends ColumnType { + type: ArrowDataType; nullCount: number; toArray(): ColumnType } From a3d35478554a14604b825d6e3dcd02e71d24dc82 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 26 Aug 2024 16:57:41 +0200 Subject: [PATCH 06/16] chore: Update dependencies. --- package-lock.json | 168 +++++++++++++++++++++++----------------------- package.json | 6 +- 2 files changed, 87 insertions(+), 87 deletions(-) diff --git a/package-lock.json b/package-lock.json index c945626..54aa9fe 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "6.0.1", "license": "BSD-3-Clause", "dependencies": { - "@uwdata/flechette": "~0.0.5", + "@uwdata/flechette": "~0.0.8", "acorn": "^8.12.1", "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" @@ -17,10 +17,10 @@ "devDependencies": { "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "eslint": "^9.9.0", + "eslint": "^9.9.1", "mocha": "^10.7.3", "rimraf": "^6.0.1", - "rollup": "^4.20.0", + "rollup": "^4.21.0", "rollup-plugin-bundle-size": "^1.0.3", "tape": "^5.8.1", "typescript": "^5.5.4" @@ -70,9 +70,9 @@ } }, "node_modules/@eslint/config-array": { - "version": "0.17.1", - "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.17.1.tgz", - "integrity": "sha512-BlYOpej8AQ8Ev9xVqroV7a02JK3SkBAaN9GfMMH9W6Ch8FlQlkjGw4Ir7+FgYwfirivAf4t+GtzuAxqfukmISA==", + "version": "0.18.0", + "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.18.0.tgz", + "integrity": "sha512-fTxvnS1sRMu3+JjXwJG0j/i4RT9u4qJ+lqS/yCGap4lH4zZGzQ7tu+xZqQmcMZq5OBZDL4QRxQzRjkWcGt8IVw==", "dev": true, "dependencies": { "@eslint/object-schema": "^2.1.4", @@ -108,9 +108,9 @@ } }, "node_modules/@eslint/js": { - "version": "9.9.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.9.0.tgz", - "integrity": "sha512-hhetes6ZHP3BlXLxmd8K2SNgkhNSi+UcecbnwWKwpP7kyi/uC75DJ1lOOBO3xrC4jyojtGE3YxKZPHfk4yrgug==", + "version": "9.9.1", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.9.1.tgz", + "integrity": "sha512-xIDQRsfg5hNBqHz04H1R3scSVwmI+KUbqjsQKHKQ1DAUSaUjYPReZZmS/5PNiKu1fUvzDd6H7DEDKACSEhu+TQ==", "dev": true, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -388,9 +388,9 @@ } }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.20.0.tgz", - "integrity": "sha512-TSpWzflCc4VGAUJZlPpgAJE1+V60MePDQnBd7PPkpuEmOy8i87aL6tinFGKBFKuEDikYpig72QzdT3QPYIi+oA==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.21.0.tgz", + "integrity": "sha512-WTWD8PfoSAJ+qL87lE7votj3syLavxunWhzCnx3XFxFiI/BA/r3X7MUM8dVrH8rb2r4AiO8jJsr3ZjdaftmnfA==", "cpu": [ "arm" ], @@ -401,9 +401,9 @@ ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.20.0.tgz", - "integrity": "sha512-u00Ro/nok7oGzVuh/FMYfNoGqxU5CPWz1mxV85S2w9LxHR8OoMQBuSk+3BKVIDYgkpeOET5yXkx90OYFc+ytpQ==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.21.0.tgz", + "integrity": "sha512-a1sR2zSK1B4eYkiZu17ZUZhmUQcKjk2/j9Me2IDjk1GHW7LB5Z35LEzj9iJch6gtUfsnvZs1ZNyDW2oZSThrkA==", "cpu": [ "arm64" ], @@ -414,9 +414,9 @@ ] }, "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.20.0.tgz", - "integrity": "sha512-uFVfvzvsdGtlSLuL0ZlvPJvl6ZmrH4CBwLGEFPe7hUmf7htGAN+aXo43R/V6LATyxlKVC/m6UsLb7jbG+LG39Q==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.21.0.tgz", + "integrity": "sha512-zOnKWLgDld/svhKO5PD9ozmL6roy5OQ5T4ThvdYZLpiOhEGY+dp2NwUmxK0Ld91LrbjrvtNAE0ERBwjqhZTRAA==", "cpu": [ "arm64" ], @@ -427,9 +427,9 @@ ] }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.20.0.tgz", - "integrity": "sha512-xbrMDdlev53vNXexEa6l0LffojxhqDTBeL+VUxuuIXys4x6xyvbKq5XqTXBCEUA8ty8iEJblHvFaWRJTk/icAQ==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.21.0.tgz", + "integrity": "sha512-7doS8br0xAkg48SKE2QNtMSFPFUlRdw9+votl27MvT46vo44ATBmdZdGysOevNELmZlfd+NEa0UYOA8f01WSrg==", "cpu": [ "x64" ], @@ -440,9 +440,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.20.0.tgz", - "integrity": "sha512-jMYvxZwGmoHFBTbr12Xc6wOdc2xA5tF5F2q6t7Rcfab68TT0n+r7dgawD4qhPEvasDsVpQi+MgDzj2faOLsZjA==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.21.0.tgz", + "integrity": "sha512-pWJsfQjNWNGsoCq53KjMtwdJDmh/6NubwQcz52aEwLEuvx08bzcy6tOUuawAOncPnxz/3siRtd8hiQ32G1y8VA==", "cpu": [ "arm" ], @@ -453,9 +453,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.20.0.tgz", - "integrity": "sha512-1asSTl4HKuIHIB1GcdFHNNZhxAYEdqML/MW4QmPS4G0ivbEcBr1JKlFLKsIRqjSwOBkdItn3/ZDlyvZ/N6KPlw==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.21.0.tgz", + "integrity": "sha512-efRIANsz3UHZrnZXuEvxS9LoCOWMGD1rweciD6uJQIx2myN3a8Im1FafZBzh7zk1RJ6oKcR16dU3UPldaKd83w==", "cpu": [ "arm" ], @@ -466,9 +466,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.20.0.tgz", - "integrity": "sha512-COBb8Bkx56KldOYJfMf6wKeYJrtJ9vEgBRAOkfw6Ens0tnmzPqvlpjZiLgkhg6cA3DGzCmLmmd319pmHvKWWlQ==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.21.0.tgz", + "integrity": "sha512-ZrPhydkTVhyeGTW94WJ8pnl1uroqVHM3j3hjdquwAcWnmivjAwOYjTEAuEDeJvGX7xv3Z9GAvrBkEzCgHq9U1w==", "cpu": [ "arm64" ], @@ -479,9 +479,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.20.0.tgz", - "integrity": "sha512-+it+mBSyMslVQa8wSPvBx53fYuZK/oLTu5RJoXogjk6x7Q7sz1GNRsXWjn6SwyJm8E/oMjNVwPhmNdIjwP135Q==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.21.0.tgz", + "integrity": "sha512-cfaupqd+UEFeURmqNP2eEvXqgbSox/LHOyN9/d2pSdV8xTrjdg3NgOFJCtc1vQ/jEke1qD0IejbBfxleBPHnPw==", "cpu": [ "arm64" ], @@ -492,9 +492,9 @@ ] }, "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.20.0.tgz", - "integrity": "sha512-yAMvqhPfGKsAxHN8I4+jE0CpLWD8cv4z7CK7BMmhjDuz606Q2tFKkWRY8bHR9JQXYcoLfopo5TTqzxgPUjUMfw==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.21.0.tgz", + "integrity": "sha512-ZKPan1/RvAhrUylwBXC9t7B2hXdpb/ufeu22pG2psV7RN8roOfGurEghw1ySmX/CmDDHNTDDjY3lo9hRlgtaHg==", "cpu": [ "ppc64" ], @@ -505,9 +505,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.20.0.tgz", - "integrity": "sha512-qmuxFpfmi/2SUkAw95TtNq/w/I7Gpjurx609OOOV7U4vhvUhBcftcmXwl3rqAek+ADBwSjIC4IVNLiszoj3dPA==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.21.0.tgz", + "integrity": "sha512-H1eRaCwd5E8eS8leiS+o/NqMdljkcb1d6r2h4fKSsCXQilLKArq6WS7XBLDu80Yz+nMqHVFDquwcVrQmGr28rg==", "cpu": [ "riscv64" ], @@ -518,9 +518,9 @@ ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.20.0.tgz", - "integrity": "sha512-I0BtGXddHSHjV1mqTNkgUZLnS3WtsqebAXv11D5BZE/gfw5KoyXSAXVqyJximQXNvNzUo4GKlCK/dIwXlz+jlg==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.21.0.tgz", + "integrity": "sha512-zJ4hA+3b5tu8u7L58CCSI0A9N1vkfwPhWd/puGXwtZlsB5bTkwDNW/+JCU84+3QYmKpLi+XvHdmrlwUwDA6kqw==", "cpu": [ "s390x" ], @@ -531,9 +531,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.20.0.tgz", - "integrity": "sha512-y+eoL2I3iphUg9tN9GB6ku1FA8kOfmF4oUEWhztDJ4KXJy1agk/9+pejOuZkNFhRwHAOxMsBPLbXPd6mJiCwew==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.21.0.tgz", + "integrity": "sha512-e2hrvElFIh6kW/UNBQK/kzqMNY5mO+67YtEh9OA65RM5IJXYTWiXjX6fjIiPaqOkBthYF1EqgiZ6OXKcQsM0hg==", "cpu": [ "x64" ], @@ -544,9 +544,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.20.0.tgz", - "integrity": "sha512-hM3nhW40kBNYUkZb/r9k2FKK+/MnKglX7UYd4ZUy5DJs8/sMsIbqWK2piZtVGE3kcXVNj3B2IrUYROJMMCikNg==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.21.0.tgz", + "integrity": "sha512-1vvmgDdUSebVGXWX2lIcgRebqfQSff0hMEkLJyakQ9JQUbLDkEaMsPTLOmyccyC6IJ/l3FZuJbmrBw/u0A0uCQ==", "cpu": [ "x64" ], @@ -557,9 +557,9 @@ ] }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.20.0.tgz", - "integrity": "sha512-psegMvP+Ik/Bg7QRJbv8w8PAytPA7Uo8fpFjXyCRHWm6Nt42L+JtoqH8eDQ5hRP7/XW2UiIriy1Z46jf0Oa1kA==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.21.0.tgz", + "integrity": "sha512-s5oFkZ/hFcrlAyBTONFY1TWndfyre1wOMwU+6KCpm/iatybvrRgmZVM+vCFwxmC5ZhdlgfE0N4XorsDpi7/4XQ==", "cpu": [ "arm64" ], @@ -570,9 +570,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.20.0.tgz", - "integrity": "sha512-GabekH3w4lgAJpVxkk7hUzUf2hICSQO0a/BLFA11/RMxQT92MabKAqyubzDZmMOC/hcJNlc+rrypzNzYl4Dx7A==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.21.0.tgz", + "integrity": "sha512-G9+TEqRnAA6nbpqyUqgTiopmnfgnMkR3kMukFBDsiyy23LZvUCpiUwjTRx6ezYCjJODXrh52rBR9oXvm+Fp5wg==", "cpu": [ "ia32" ], @@ -583,9 +583,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.20.0.tgz", - "integrity": "sha512-aJ1EJSuTdGnM6qbVC4B5DSmozPTqIag9fSzXRNNo+humQLG89XpPgdt16Ia56ORD7s+H8Pmyx44uczDQ0yDzpg==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.21.0.tgz", + "integrity": "sha512-2jsCDZwtQvRhejHLfZ1JY6w6kEuEtfF9nzYsZxzSlNVKDX+DpsDJ+Rbjkm74nvg2rdx0gwBS+IMdvwJuq3S9pQ==", "cpu": [ "x64" ], @@ -636,9 +636,9 @@ "license": "MIT" }, "node_modules/@uwdata/flechette": { - "version": "0.0.5", - "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.5.tgz", - "integrity": "sha512-OZqdOYLGmcLiQvRxzB4S8v7WjjvG9B95G5x4cxNHeI/FV4Y1kBupRvxbYOvH5wUDLbZi3NwFG8jklPqzLYDH/w==" + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.8.tgz", + "integrity": "sha512-Qcy7NL7TfQNQLUw98ygWV8E5PMXR3T6QForhHo3T1u0zfU0L6rN9FcJAFWyA3bxr8eU+WjVypPtqfBDKj+lBvw==" }, "node_modules/acorn": { "version": "8.12.1", @@ -1566,16 +1566,16 @@ } }, "node_modules/eslint": { - "version": "9.9.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.9.0.tgz", - "integrity": "sha512-JfiKJrbx0506OEerjK2Y1QlldtBxkAlLxT5OEcRF8uaQ86noDe2k31Vw9rnSWv+MXZHj7OOUV/dA0AhdLFcyvA==", + "version": "9.9.1", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.9.1.tgz", + "integrity": "sha512-dHvhrbfr4xFQ9/dq+jcVneZMyRYLjggWjk6RVsIiHsP8Rz6yZ8LvZ//iU4TrZF+SXWG+JkNF2OyiZRvzgRDqMg==", "dev": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.11.0", - "@eslint/config-array": "^0.17.1", + "@eslint/config-array": "^0.18.0", "@eslint/eslintrc": "^3.1.0", - "@eslint/js": "9.9.0", + "@eslint/js": "9.9.1", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.3.0", "@nodelib/fs.walk": "^1.2.8", @@ -3589,9 +3589,9 @@ } }, "node_modules/rollup": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.20.0.tgz", - "integrity": "sha512-6rbWBChcnSGzIlXeIdNIZTopKYad8ZG8ajhl78lGRLsI2rX8IkaotQhVas2Ma+GPxJav19wrSzvRvuiv0YKzWw==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.21.0.tgz", + "integrity": "sha512-vo+S/lfA2lMS7rZ2Qoubi6I5hwZwzXeUIctILZLbHI+laNtvhhOIon2S1JksA5UEDQ7l3vberd0fxK44lTYjbQ==", "dev": true, "dependencies": { "@types/estree": "1.0.5" @@ -3604,22 +3604,22 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.20.0", - "@rollup/rollup-android-arm64": "4.20.0", - "@rollup/rollup-darwin-arm64": "4.20.0", - "@rollup/rollup-darwin-x64": "4.20.0", - "@rollup/rollup-linux-arm-gnueabihf": "4.20.0", - "@rollup/rollup-linux-arm-musleabihf": "4.20.0", - "@rollup/rollup-linux-arm64-gnu": "4.20.0", - "@rollup/rollup-linux-arm64-musl": "4.20.0", - "@rollup/rollup-linux-powerpc64le-gnu": "4.20.0", - "@rollup/rollup-linux-riscv64-gnu": "4.20.0", - "@rollup/rollup-linux-s390x-gnu": "4.20.0", - "@rollup/rollup-linux-x64-gnu": "4.20.0", - "@rollup/rollup-linux-x64-musl": "4.20.0", - "@rollup/rollup-win32-arm64-msvc": "4.20.0", - "@rollup/rollup-win32-ia32-msvc": "4.20.0", - "@rollup/rollup-win32-x64-msvc": "4.20.0", + "@rollup/rollup-android-arm-eabi": "4.21.0", + "@rollup/rollup-android-arm64": "4.21.0", + "@rollup/rollup-darwin-arm64": "4.21.0", + "@rollup/rollup-darwin-x64": "4.21.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.21.0", + "@rollup/rollup-linux-arm-musleabihf": "4.21.0", + "@rollup/rollup-linux-arm64-gnu": "4.21.0", + "@rollup/rollup-linux-arm64-musl": "4.21.0", + "@rollup/rollup-linux-powerpc64le-gnu": "4.21.0", + "@rollup/rollup-linux-riscv64-gnu": "4.21.0", + "@rollup/rollup-linux-s390x-gnu": "4.21.0", + "@rollup/rollup-linux-x64-gnu": "4.21.0", + "@rollup/rollup-linux-x64-musl": "4.21.0", + "@rollup/rollup-win32-arm64-msvc": "4.21.0", + "@rollup/rollup-win32-ia32-msvc": "4.21.0", + "@rollup/rollup-win32-x64-msvc": "4.21.0", "fsevents": "~2.3.2" } }, diff --git a/package.json b/package.json index c723ec4..b97e4da 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { - "@uwdata/flechette": "~0.0.5", + "@uwdata/flechette": "~0.0.8", "acorn": "^8.12.1", "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" @@ -45,10 +45,10 @@ "devDependencies": { "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "eslint": "^9.9.0", + "eslint": "^9.9.1", "mocha": "^10.7.3", "rimraf": "^6.0.1", - "rollup": "^4.20.0", + "rollup": "^4.21.0", "rollup-plugin-bundle-size": "^1.0.3", "tape": "^5.8.1", "typescript": "^5.5.4" From c76bf66c18d8a0968c4d82aa1330061f0ee1607f Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 26 Aug 2024 16:58:00 +0200 Subject: [PATCH 07/16] feat: Update types, remove dead code. --- src/arrow/arrow-table.js | 12 +--------- src/arrow/from-arrow.js | 7 ++---- src/arrow/types.ts | 49 +++++++--------------------------------- src/table/types.ts | 7 ++++-- 4 files changed, 16 insertions(+), 59 deletions(-) diff --git a/src/arrow/arrow-table.js b/src/arrow/arrow-table.js index de93d64..82c8beb 100644 --- a/src/arrow/arrow-table.js +++ b/src/arrow/arrow-table.js @@ -1,4 +1,4 @@ -import { Table, tableFromIPC, tableToIPC } from 'apache-arrow'; +import { Table, tableToIPC } from 'apache-arrow'; import error from '../util/error.js'; const fail = (cause) => error( @@ -17,16 +17,6 @@ export function arrowTable(...args) { } } -export function arrowTableFromIPC(bytes) { - // trap access to provide a helpful message - // when Apache Arrow has not been imported - try { - return tableFromIPC(bytes); - } catch (err) { - fail(err); - } -} - export function arrowTableToIPC(table, format) { // trap access to provide a helpful message // when Apache Arrow has not been imported diff --git a/src/arrow/from-arrow.js b/src/arrow/from-arrow.js index 70b4dda..23d0921 100644 --- a/src/arrow/from-arrow.js +++ b/src/arrow/from-arrow.js @@ -4,9 +4,6 @@ import { columnSet } from '../table/ColumnSet.js'; import { ColumnTable } from '../table/ColumnTable.js'; import sequence from '../op/functions/sequence.js'; -/** @type {import('./types.js').ArrowOptions} */ -const USE_DATE = { useDate: true }; - /** * Create a new table backed by an Apache Arrow table instance. * @param {import('./types.js').ArrowInput} input @@ -16,9 +13,9 @@ const USE_DATE = { useDate: true }; * @return {ColumnTable} A new table containing the imported values. */ export default function(input, options) { - const { columns = all(), ...extractOptions } = options || USE_DATE; + const { columns = all(), ...rest } = options || {}; const arrow = input instanceof ArrayBuffer || input instanceof Uint8Array - ? tableFromIPC(input, extractOptions) + ? tableFromIPC(input, { useDate: true, ...rest }) : input; const { fields } = arrow.schema; diff --git a/src/arrow/types.ts b/src/arrow/types.ts index 11e770d..1f64c76 100644 --- a/src/arrow/types.ts +++ b/src/arrow/types.ts @@ -8,16 +8,19 @@ export type ArrowInput = | Uint8Array | ArrowTable; +/** A column in an Apache Arrow table. */ export interface ArrowColumn extends ColumnType { type: ArrowDataType; nullCount: number; toArray(): ColumnType } +/** Minimal interface for an Arrow data type. */ export interface ArrowDataType { typeId: number; } +/** A field definition with an Arrow schema. */ export interface ArrowField { name: string; nullable: boolean; @@ -25,12 +28,17 @@ export interface ArrowField { metadata?: Map; } +/** An Apache Arrow table schema. */ export interface ArrowSchema { version?: number; fields: ArrowField[]; metadata?: Map; } +/** + * Interface for an Apache Arrow table. + * Compatible with both Flechette and Arrow-JS table instances. + */ export interface ArrowTable { numRows: number; numCols: number; @@ -39,47 +47,6 @@ export interface ArrowTable { getChildAt(index: number): ArrowColumn; } -// /** Options for Apache Arrow column conversion. */ -// export interface ArrowColumnOptions { -// /** -// * Flag (default `true`) to convert Arrow date values to JavaScript Date -// * objects. If false, defaults to what the Arrow implementation provides, -// * typically timestamps as number values. -// */ -// convertDate?: boolean; -// /** -// * Flag (default `true`) to convert Arrow fixed point decimal values to -// * JavaScript numbers. If false, defaults to what the Arrow implementation -// * provides, typically byte arrays. The conversion will be lossy if the -// * decimal can not be exactly represented as a double-precision floating -// * point number. -// */ -// convertDecimal?: boolean; -// /** -// * Flag (default `true`) to convert Arrow timestamp values to JavaScript -// * Date objects. If false, defaults to what the Arrow implementation -// * provides, typically timestamps as number values. -// */ -// convertTimestamp?: boolean; -// /** -// * Flag (default `false`) to convert Arrow integers with bit widths of 64 -// * bits or higher to JavaScript numbers. If false, defaults to what the -// * Arrow implementation provides, typically `BigInt` values. The conversion -// * will be lossy if the integer is so large it can not be exactly -// * represented as a double-precision floating point number. -// */ -// convertBigInt?: boolean; -// /** -// * A hint (default `true`) to enable memoization of expensive conversions. -// * If true, memoization is applied for string and nested (list, struct) -// * types, caching extracted values to enable faster access. Memoization -// * is also applied to converted Date values, in part to ensure exact object -// * equality. This hint is ignored for dictionary columns, whose values are -// * always memoized. -// */ -// memoize?: boolean; -// } - /** Options for Apache Arrow import. */ export interface ArrowOptions extends ExtractionOptions { /** diff --git a/src/table/types.ts b/src/table/types.ts index 17dfba7..8c889fe 100644 --- a/src/table/types.ts +++ b/src/table/types.ts @@ -4,11 +4,14 @@ import { BitSet } from './BitSet.js'; /** A table column value. */ export type DataValue = any; -/** Interface for table columns. */ +/** + * Interface for table columns. + * Compatible with arrays, typed arrays, and Arrow columns. + */ export interface ColumnType { /** The number of rows in the column. */ length: number; - /** Retrieve the values at the given row index. */ + /** Retrieve the value at the given row index. */ at(row: number): T; /** Return a column value iterator. */ [Symbol.iterator]() : Iterator; From 5049551b74173403b3eb3f22291a431d392028f8 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 26 Aug 2024 16:58:14 +0200 Subject: [PATCH 08/16] docs: Update fromArrow docs. --- docs/api/index.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/docs/api/index.md b/docs/api/index.md index 3482cc2..66969c4 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -91,20 +91,18 @@ aq.from(new Map([ ['d', 4], ['e', 5], ['f', 6] ])
# aq.fromArrow(arrowTable[, options]) · [Source](https://github.com/uwdata/arquero/blob/master/src/format/from-arrow.js) -Create a new table backed by an [Apache Arrow](https://arrow.apache.org/docs/js/) table instance. The input *arrowTable* can either be an instantiated Arrow table instance or a byte array in the Arrow IPC format. +Create a new table backed by [Apache Arrow](https://arrow.apache.org/) binary data. The input *arrowTable* can be a byte array in the Arrow IPC format or an instantiated [Flechette](https://github.com/uwdata/flechette) or [Apache Arrow JS](https://arrow.apache.org/docs/js/) table instance. Byte array inputs are decoded using [Flechette](https://github.com/uwdata/flechette). -For most data types, Arquero uses binary-encoded Arrow columns as-is with zero data copying. For columns containing string, list (array), or struct values, Arquero additionally memoizes value lookups to amortize access costs. For dictionary columns, Arquero unpacks columns with `null` entries or containing multiple record batches to optimize query performance. +For many data types, Arquero uses binary-encoded Arrow columns as-is with zero data copying. For dictionary columns, Arquero unpacks columns with `null` entries or containing multiple record batches to optimize query performance. This method performs parsing only. To both load and parse an Arrow file, use [loadArrow](#loadArrow). -* *arrowTable*: An [Apache Arrow](https://arrow.apache.org/docs/js/) data table or a byte array (e.g., [ArrayBuffer](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/ArrayBuffer) or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array)) in the Arrow IPC format. +* *arrowTable*: A byte array (e.g., [ArrayBuffer](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/ArrayBuffer) or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array)) in the Arrow IPC format or a [Flechette](https://github.com/uwdata/flechette) or [Apache Arrow JS](https://arrow.apache.org/docs/js/) table instance. * *options*: An Arrow import options object: * *columns*: An ordered set of columns to import. The input may consist of: column name strings, column integer indices, objects with current column names as keys and new column names as values (for renaming), or a selection helper function such as [all](#all), [not](#not), or [range](#range)). - * *convertDate*: Boolean flag (default `true`) to convert Arrow date values to JavaScript Date objects. If false, defaults to what the Arrow implementation provides, typically timestamps as number values. - * *convertDecimal*: Boolean flag (default `true`) to convert Arrow fixed point decimal values to JavaScript numbers. If false, defaults to what the Arrow implementation provides, typically byte arrays. The conversion will be lossy if the decimal can not be exactly represented as a double-precision floating point number. - *convertTimestamp*: Boolean flag (default `true`) to convert Arrow timestamp values to JavaScript Date objects. If false, defaults to what the Arrow implementation provides, typically timestamps as number values. - *convertBigInt*: Boolean flag (default `false`) to convert Arrow integers with bit widths of 64 bits or higher to JavaScript numbers. If false, defaults to what the Arrow implementation provides, typically `BigInt` values. The conversion will be lossy if the integer is so large it can not be exactly represented as a double-precision floating point number. - *memoize*: Boolean hint (default `true`) to enable memoization of expensive conversions. If true, memoization is applied for string and nested (list, struct) types, caching extracted values to enable faster access. Memoization is also applied to converted Date values, in part to ensure exact object equality. This hint is ignored for dictionary columns, whose values are always memoized. + * *useDate*: Boolean flag (default `true`) to convert Arrow date and timestamp values to JavaScript Date objects. Otherwise, numeric timestamps are used. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useBigInt*: Boolean flag (default `false`) to represent Arrow 64-bit integers as JavaScript `BigInt` values. For Flechette tables, the default is to coerce 64-bit integers to JavaScript numbers. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useMap*: Boolean flag (default `false`) to represent Arrow Map data as JavaScript `Map` values. For Flechette tables, the default is to produce an array of `[key, value]` arrays. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. *Examples* From 80fea0193015026f6e73d99c89302e7585524842 Mon Sep 17 00:00:00 2001 From: jheer Date: Thu, 12 Sep 2024 17:37:42 -0700 Subject: [PATCH 09/16] feat!: Drop Apache Arrow JS, use Flechette for building/encoding Arrow data. --- docs/api/index.md | 114 +------ docs/api/table.md | 30 +- docs/index.md | 7 +- package-lock.json | 343 ++++++++++++---------- package.json | 10 +- perf/arrow-perf.js | 59 ++-- rollup.config.js | 13 +- src/api.js | 6 +- src/arrow/arrow-table.js | 28 -- src/arrow/builder/array-builder.js | 9 - src/arrow/builder/bool-builder.js | 11 - src/arrow/builder/date-day-builder.js | 9 - src/arrow/builder/date-millis-builder.js | 9 - src/arrow/builder/default-builder.js | 12 - src/arrow/builder/dictionary-builder.js | 36 --- src/arrow/builder/index.js | 40 --- src/arrow/builder/resolve-type.js | 97 ------ src/arrow/builder/utf8-builder.js | 21 -- src/arrow/builder/util.js | 33 --- src/arrow/builder/valid-builder.js | 25 -- src/arrow/encode/data-from-objects.js | 16 - src/arrow/encode/data-from-table.js | 69 ----- src/arrow/encode/data-from.js | 21 -- src/arrow/encode/profiler.js | 138 --------- src/arrow/encode/scan.js | 29 -- src/arrow/to-arrow.js | 59 ---- src/{arrow => format}/from-arrow.js | 0 src/format/load-file.js | 4 +- src/format/load-url.js | 4 +- src/{arrow => format}/to-arrow-ipc.js | 9 +- src/format/to-arrow.js | 47 +++ src/{arrow => format}/types.ts | 23 +- src/format/util.js | 10 +- src/table/ColumnTable.js | 12 +- src/table/Table.js | 3 +- test/arrow/data-from-test.js | 153 ---------- test/arrow/profiler-test.js | 83 ------ test/{arrow => format}/from-arrow-test.js | 31 +- test/format/to-arrow-ipc-test.js | 64 ++++ test/{arrow => format}/to-arrow-test.js | 209 ++++++++----- test/verbs/reify-test.js | 6 +- 41 files changed, 539 insertions(+), 1363 deletions(-) delete mode 100644 src/arrow/arrow-table.js delete mode 100644 src/arrow/builder/array-builder.js delete mode 100644 src/arrow/builder/bool-builder.js delete mode 100644 src/arrow/builder/date-day-builder.js delete mode 100644 src/arrow/builder/date-millis-builder.js delete mode 100644 src/arrow/builder/default-builder.js delete mode 100644 src/arrow/builder/dictionary-builder.js delete mode 100644 src/arrow/builder/index.js delete mode 100644 src/arrow/builder/resolve-type.js delete mode 100644 src/arrow/builder/utf8-builder.js delete mode 100644 src/arrow/builder/util.js delete mode 100644 src/arrow/builder/valid-builder.js delete mode 100644 src/arrow/encode/data-from-objects.js delete mode 100644 src/arrow/encode/data-from-table.js delete mode 100644 src/arrow/encode/data-from.js delete mode 100644 src/arrow/encode/profiler.js delete mode 100644 src/arrow/encode/scan.js delete mode 100644 src/arrow/to-arrow.js rename src/{arrow => format}/from-arrow.js (100%) rename src/{arrow => format}/to-arrow-ipc.js (61%) create mode 100644 src/format/to-arrow.js rename src/{arrow => format}/types.ts (81%) delete mode 100644 test/arrow/data-from-test.js delete mode 100644 test/arrow/profiler-test.js rename test/{arrow => format}/from-arrow-test.js (87%) create mode 100644 test/format/to-arrow-ipc-test.js rename test/{arrow => format}/to-arrow-test.js (56%) diff --git a/docs/api/index.md b/docs/api/index.md index 66969c4..e4f6803 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -9,8 +9,6 @@ title: Arquero API Reference * [table](#table), [from](#from), [fromArrow](#fromArrow), [fromCSV](#fromCSV), [fromFixed](#fromFixed), [fromJSON](#fromJSON) * [Table Input](#input) * [load](#load), [loadArrow](#loadArrow), [loadCSV](#loadCSV), [loadFixed](#loadFixed), [loadJSON](#loadJSON) -* [Table Output](#output) - * [toArrow](#toArrow), [toArrowIPC](#toArrowIPC) * [Expression Helpers](#expression-helpers) * [op](#op), [agg](#agg), [escape](#escape) * [bin](#bin), [desc](#desc), [frac](#frac), [rolling](#rolling), [seed](#seed) @@ -99,10 +97,12 @@ This method performs parsing only. To both load and parse an Arrow file, use [lo * *arrowTable*: A byte array (e.g., [ArrayBuffer](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/ArrayBuffer) or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array)) in the Arrow IPC format or a [Flechette](https://github.com/uwdata/flechette) or [Apache Arrow JS](https://arrow.apache.org/docs/js/) table instance. * *options*: An Arrow import options object: - * *columns*: An ordered set of columns to import. The input may consist of: column name strings, column integer indices, objects with current column names as keys and new column names as values (for renaming), or a selection helper function such as [all](#all), [not](#not), or [range](#range)). + * *columns*: An ordered set of columns to import. The input may consist of: column name strings, column integer indices, objects with current column names as keys and new column names as values (for renaming), or a selection helper function such as [all](#all), [not](#not), or [range](#range). + * *useBigInt*: Boolean flag (default `false`) to extract 64-bit integer types as JavaScript `BigInt` values. For Flechette tables, the default is to coerce 64-bit integers to JavaScript numbers and raise an error if the number is out of range. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. * *useDate*: Boolean flag (default `true`) to convert Arrow date and timestamp values to JavaScript Date objects. Otherwise, numeric timestamps are used. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. - * *useBigInt*: Boolean flag (default `false`) to represent Arrow 64-bit integers as JavaScript `BigInt` values. For Flechette tables, the default is to coerce 64-bit integers to JavaScript numbers. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useDecimalBigInt*: Boolean flag (default `false`) to extract Arrow decimal-type data as BigInt values, where fractional digits are scaled to integers. Otherwise, decimals are (sometimes lossily) converted to floating-point numbers (default). This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. * *useMap*: Boolean flag (default `false`) to represent Arrow Map data as JavaScript `Map` values. For Flechette tables, the default is to produce an array of `[key, value]` arrays. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useProxy*: Boolean flag (default `false`) to extract Arrow Struct values and table row objects using zero-copy proxy objects that extract data from underlying Arrow batches. The proxy objects can improve performance and reduce memory usage, but do not support property enumeration (`Object.keys`, `Object.values`, `Object.entries`) or spreading (`{ ...object }`). This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. *Examples* @@ -313,7 +313,7 @@ const dt = await aq.load('data/table.json', { as: 'json', using: aq.from })
# aq.loadArrow(url[, options]) · [Source](https://github.com/uwdata/arquero/blob/master/src/format/load-file.js) -Load a file in the [Apache Arrow](https://arrow.apache.org/docs/js/) IPC format from a *url* and return a Promise for a table. +Load a file in the [Apache Arrow](https://arrow.apache.org/overview/) IPC binary format from a *url* and return a Promise for a table. This method performs both loading and parsing, and is equivalent to `aq.load(url, { as: 'arrayBuffer', using: aq.fromArrow })`. To instead create an Arquero table for an Apache Arrow dataset that has already been loaded, use [fromArrow](#fromArrow). @@ -404,110 +404,6 @@ const dt = await aq.loadJSON('data/table.json', { autoType: false })
-## Table Output - -Methods for writing data to an output format. Most output methods are available as [table methods](table#output), in addition to the top level namespace. - -
# -aq.toArrow(data[, options]) · [Source](https://github.com/uwdata/arquero/blob/master/src/arrow/to-arrow.js) - -Create an [Apache Arrow](https://arrow.apache.org/docs/js/) table for the input *data*. The input data can be either an [Arquero table](#table) or an array of standard JavaScript objects. This method will throw an error if type inference fails or if the generated columns have differing lengths. For Arquero tables, this method can instead be invoked as [table.toArrow()](table#toArrow). - -* *data*: An input dataset to convert to Arrow format. If array-valued, the data should consist of an array of objects where each entry represents a row and named properties represent columns. Otherwise, the input data should be an [Arquero table](#table). -* *options*: Options for Arrow encoding. - * *columns*: Ordered list of column names to include. If function-valued, the function should accept the input *data* as a single argument and return an array of column name strings. - * *limit*: The maximum number of rows to include (default `Infinity`). - * *offset*: The row offset indicating how many initial rows to skip (default `0`). - * *types*: An optional object indicating the [Arrow data type](https://arrow.apache.org/docs/js/enums/Arrow_dom.Type.html) to use for named columns. If specified, the input should be an object with column names for keys and Arrow data types for values. If a column's data type is not explicitly provided, type inference will be performed. - - Type values can either be instantiated Arrow [DataType](https://arrow.apache.org/docs/js/classes/Arrow_dom.DataType.html) instances (for example, `new Float64()`,`new DateMilliseconds()`, *etc.*) or type enum codes (`Type.Float64`, `Type.Date`, `Type.Dictionary`). High-level types map to specific data type instances as follows: - - * `Type.Date` → `new DateMilliseconds()` - * `Type.Dictionary` → `new Dictionary(new Utf8(), new Int32())` - * `Type.Float` → `new Float64()` - * `Type.Int` → `new Int32()` - * `Type.Interval` → `new IntervalYearMonth()` - * `Type.Time` → `new TimeMillisecond()` - - Types that require additional parameters (including `List`, `Struct`, and `Timestamp`) can not be specified using type codes. Instead, use data type constructors from apache-arrow, such as `new List(new Int32())`. - -*Examples* - -Encode Arrow data from an input Arquero table: - -```js -import { table, toArrow } from 'arquero'; -import { Type } from 'apache-arrow'; - -// create Arquero table -const dt = table({ - x: [1, 2, 3, 4, 5], - y: [3.4, 1.6, 5.4, 7.1, 2.9] -}); - -// encode as an Arrow table (infer data types) -// here, infers Uint8 for 'x' and Float64 for 'y' -// equivalent to dt.toArrow() -const at1 = toArrow(dt); - -// encode into Arrow table (set explicit data types) -// equivalent to dt.toArrow({ types: { ... } }) -const at2 = toArrow(dt, { - types: { - x: Type.Uint16, - y: Type.Float32 - } -}); - -// serialize Arrow table to a transferable byte array -const bytes = at1.serialize(); -``` - -Encode Arrow data from an input object array: - -```js -import { toArrow } from 'arquero'; - -// encode object array as an Arrow table (infer data types) -const at = toArrow([ - { x: 1, y: 3.4 }, - { x: 2, y: 1.6 }, - { x: 3, y: 5.4 }, - { x: 4, y: 7.1 }, - { x: 5, y: 2.9 } -]); -``` - -
# -table.toArrowBuffer(data[, options]) · [Source](https://github.com/uwdata/arquero/blob/master/src/arrow/to-arrow-ipc.js) - -Format input data in the binary [Apache Arrow](https://arrow.apache.org/docs/js/) IPC format. The input data can be either an [Arquero table](#table) or an array of standard JavaScript objects. This method will throw an error if type inference fails or if the generated columns have differing lengths. For Arquero tables, this method can instead be invoked as [table.toArrowIPC()](table#toArrowIPC). - -The resulting binary data may be saved to disk or passed between processes or tools. For example, when using [Web Workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers), the output of this method can be passed directly between threads (no data copy) as a [Transferable](https://developer.mozilla.org/en-US/docs/Web/API/Transferable) object. Additionally, Arrow binary data can be loaded in other language environments such as [Python](https://arrow.apache.org/docs/python/) or [R](https://arrow.apache.org/docs/r/). - -This method will throw an error if type inference fails or if the generated columns have differing lengths. - -* *options*: Options for Arrow encoding, same as [toArrow](#toArrow) but with an additional *format* option. - * *format*: The Arrow IPC byte format to use. One of `'stream'` (default) or `'file'`. - -*Examples* - -Encode Arrow data from an input Arquero table: - -```js -import { table, toArrowIPC } from 'arquero'; - -const dt = table({ - x: [1, 2, 3, 4, 5], - y: [3.4, 1.6, 5.4, 7.1, 2.9] -}); - -// encode table as a transferable Arrow byte buffer -// here, infers Uint8 for 'x' and Float64 for 'y' -const bytes = toArrowIPC(dt); -``` - -
## Expression Helpers diff --git a/docs/api/table.md b/docs/api/table.md index 182f2b8..4d6f77d 100644 --- a/docs/api/table.md +++ b/docs/api/table.md @@ -573,32 +573,26 @@ aq.table({ a: [1, 2, 3], b: [4, 5, 6] }).toMarkdown()
# table.toArrow([options]) · [Source](https://github.com/uwdata/arquero/blob/master/src/arrow/encode/index.js) -Format this table as an [Apache Arrow](https://arrow.apache.org/docs/js/) table instance. This method will throw an error if type inference fails or if the generated columns have differing lengths. +Format this table as an [Apache Arrow](https://arrow.apache.org/overview/) table instance using [Flechette](https://idl.uw.edu/flechette/). This method will throw an error if type inference fails or if the generated columns have differing lengths. * *options*: Options for Arrow encoding. * *columns*: Ordered list of column names to include. If function-valued, the function should accept this table as a single argument and return an array of column name strings. * *limit*: The maximum number of rows to include (default `Infinity`). * *offset*: The row offset indicating how many initial rows to skip (default `0`). - * *types*: An optional object indicating the [Arrow data type](https://arrow.apache.org/docs/js/enums/Arrow_dom.Type.html) to use for named columns. If specified, the input should be an object with column names for keys and Arrow data types for values. If a column's data type is not explicitly provided, type inference will be performed. - - Type values can either be instantiated Arrow [DataType](https://arrow.apache.org/docs/js/classes/Arrow_dom.DataType.html) instances (for example, `new Float64()`,`new DateMilliseconds()`, *etc.*) or type enum codes (`Type.Float64`, `Type.Date`, `Type.Dictionary`). High-level types map to specific data type instances as follows: - - * `Type.Date` → `new DateMilliseconds()` - * `Type.Dictionary` → `new Dictionary(new Utf8(), new Int32())` - * `Type.Float` → `new Float64()` - * `Type.Int` → `new Int32()` - * `Type.Interval` → `new IntervalYearMonth()` - * `Type.Time` → `new TimeMillisecond()` - - Types that require additional parameters (including `List`, `Struct`, and `Timestamp`) can not be specified using type codes. Instead, use data type constructors from apache-arrow, such as `new List(new Int32())`. + * *types*: An optional object indicating the [Arrow data type](https://idl.uw.edu/flechette/api/data-types) to use for named columns. If specified, the input should be an object with column names for keys and Arrow data types for values. Type values must be instantiated Flechette [DataType](https://idl.uw.edu/flechette/api/data-types) instances (for example, `float64()`,`dateDay()`, `list(int32())` *etc.*). If a column's data type is not explicitly provided, type inference will be performed. + * *useBigInt*: Boolean flag (default `false`) to extract 64-bit integer types as JavaScript `BigInt` values. For Flechette tables, the default is to coerce 64-bit integers to JavaScript numbers and raise an error if the number is out of range. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useDecimalBigInt*: Boolean flag (default `false`) to extract Arrow decimal-type data as BigInt values, where fractional digits are scaled to integers. Otherwise, decimals are (sometimes lossily) converted to floating-point numbers (default). This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useDate*: Boolean flag (default `true`) to convert Arrow date and timestamp values to JavaScript Date objects. Otherwise, numeric timestamps are used. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useMap*: Boolean flag (default `false`) to represent Arrow Map data as JavaScript `Map` values. For Flechette tables, the default is to produce an array of `[key, value]` arrays. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useProxy*: Boolean flag (default `false`) to extract Arrow Struct values and table row objects using zero-copy proxy objects that extract data from underlying Arrow batches. The proxy objects can improve performance and reduce memory usage, but do not support property enumeration (`Object.keys`, `Object.values`, `Object.entries`) or spreading (`{ ...object }`). This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. *Examples* Encode Arrow data from an input Arquero table: ```js +import { float32, uint16 } from '@uwdata/flechette'; import { table } from 'arquero'; -import { Type } from 'apache-arrow'; // create Arquero table const dt = table({ @@ -613,8 +607,8 @@ const at1 = dt.toArrow(); // encode into Arrow table (set explicit data types) const at2 = dt.toArrow({ types: { - x: Type.Uint16, - y: Type.Float32 + x: uint16(), + y: float32() } }); ``` @@ -622,12 +616,12 @@ const at2 = dt.toArrow({
# table.toArrowBuffer([options]) · [Source](https://github.com/uwdata/arquero/blob/master/src/arrow/encode/index.js) -Format this table as binary data in the [Apache Arrow](https://arrow.apache.org/docs/js/) IPC format. The binary data may be saved to disk or passed between processes or tools. For example, when using [Web Workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers), the output of this method can be passed directly between threads (no data copy) as a [Transferable](https://developer.mozilla.org/en-US/docs/Web/API/Transferable) object. Additionally, Arrow binary data can be loaded in other language environments such as [Python](https://arrow.apache.org/docs/python/) or [R](https://arrow.apache.org/docs/r/). +Format this table as binary data in the [Apache Arrow](https://arrow.apache.org/overview/) IPC format using [Flechette](https://idl.uw.edu/flechette/). The binary data may be saved to disk or passed between processes or tools. For example, when using [Web Workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers), the output of this method can be passed directly between threads (no data copy) as a [Transferable](https://developer.mozilla.org/en-US/docs/Web/API/Transferable) object. Additionally, Arrow binary data can be loaded in other language environments such as [Python](https://arrow.apache.org/docs/python/) or [R](https://arrow.apache.org/docs/r/). This method will throw an error if type inference fails or if the generated columns have differing lengths. * *options*: Options for Arrow encoding, same as [toArrow](#toArrow) but with an additional *format* option. - * *format*: The Arrow IPC byte format to use. One of `'stream'` (default) or `'file'`. + * *format*: The Arrow IPC byte format to use. One of `'stream'` (default) or `'file'`. For more details on these formats, see the [Apache Arrow format documentation](https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format). *Examples* diff --git a/docs/index.md b/docs/index.md index e07d314..e4b7d75 100644 --- a/docs/index.md +++ b/docs/index.md @@ -80,12 +80,7 @@ To use in the browser, you can load Arquero from a content delivery network: ``` -Arquero will be imported into the `aq` global object. The default browser bundle does not include the [Apache Arrow](https://arrow.apache.org/) library. To perform Arrow encoding using [toArrow()](api/#toArrow) or binary file loading using [loadArrow()](api/#loadArrow), import Apache Arrow first: - -```html - - -``` +Arquero will be imported into the `aq` global object. The default browser bundle also includes the [Flechette](https://idl.uw.edu/flechette/) library for processing [Apache Arrow](https://arrow.apache.org/overview/) data. Alternatively, you can build and import `arquero.min.js` from the `dist` directory, or build your own application bundle. When building custom application bundles for the browser, the module bundler should draw from the `browser` property of Arquero's `package.json` file. For example, if using [rollup](https://rollupjs.org/), pass the `browser: true` option to the [node-resolve](https://github.com/rollup/plugins/tree/master/packages/node-resolve) plugin. diff --git a/package-lock.json b/package-lock.json index 54aa9fe..028b73e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,33 +9,21 @@ "version": "6.0.1", "license": "BSD-3-Clause", "dependencies": { - "@uwdata/flechette": "~0.0.8", + "@uwdata/flechette": "1.0.0-beta.2", "acorn": "^8.12.1", - "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" }, "devDependencies": { "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "eslint": "^9.9.1", + "apache-arrow": "17.0.0", + "eslint": "^9.10.0", "mocha": "^10.7.3", "rimraf": "^6.0.1", - "rollup": "^4.21.0", + "rollup": "^4.21.3", "rollup-plugin-bundle-size": "^1.0.3", "tape": "^5.8.1", - "typescript": "^5.5.4" - } - }, - "node_modules/@75lb/deep-merge": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@75lb/deep-merge/-/deep-merge-1.1.2.tgz", - "integrity": "sha512-08K9ou5VNbheZFxM5tDWoqjA3ImC50DiuuJ2tj1yEPRfkp8lLLg6XAaJ4On+a0yAXor/8ay5gHnAIshRM44Kpw==", - "dependencies": { - "lodash": "^4.17.21", - "typical": "^7.1.1" - }, - "engines": { - "node": ">=12.17" + "typescript": "^5.6.2" } }, "node_modules/@aashutoshrathi/word-wrap": { @@ -108,9 +96,9 @@ } }, "node_modules/@eslint/js": { - "version": "9.9.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.9.1.tgz", - "integrity": "sha512-xIDQRsfg5hNBqHz04H1R3scSVwmI+KUbqjsQKHKQ1DAUSaUjYPReZZmS/5PNiKu1fUvzDd6H7DEDKACSEhu+TQ==", + "version": "9.10.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.10.0.tgz", + "integrity": "sha512-fuXtbiP5GWIn8Fz+LWoOMVf/Jxm+aajZYkhi6CuEm4SxymFM+eUWzbO9qXT+L0iCkL5+KGYMCSGxo686H19S1g==", "dev": true, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -125,6 +113,18 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, + "node_modules/@eslint/plugin-kit": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.1.0.tgz", + "integrity": "sha512-autAXT203ixhqei9xt+qkYOvY8l6LAFIdT2UXc/RPNeUVfqRF1BV94GTJyVPFKT8nFM6MyVJhjLj9E8JWvf5zQ==", + "dev": true, + "dependencies": { + "levn": "^0.4.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, "node_modules/@humanwhocodes/module-importer": { "version": "1.0.1", "dev": true, @@ -388,9 +388,9 @@ } }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.21.0.tgz", - "integrity": "sha512-WTWD8PfoSAJ+qL87lE7votj3syLavxunWhzCnx3XFxFiI/BA/r3X7MUM8dVrH8rb2r4AiO8jJsr3ZjdaftmnfA==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.21.3.tgz", + "integrity": "sha512-MmKSfaB9GX+zXl6E8z4koOr/xU63AMVleLEa64v7R0QF/ZloMs5vcD1sHgM64GXXS1csaJutG+ddtzcueI/BLg==", "cpu": [ "arm" ], @@ -401,9 +401,9 @@ ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.21.0.tgz", - "integrity": "sha512-a1sR2zSK1B4eYkiZu17ZUZhmUQcKjk2/j9Me2IDjk1GHW7LB5Z35LEzj9iJch6gtUfsnvZs1ZNyDW2oZSThrkA==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.21.3.tgz", + "integrity": "sha512-zrt8ecH07PE3sB4jPOggweBjJMzI1JG5xI2DIsUbkA+7K+Gkjys6eV7i9pOenNSDJH3eOr/jLb/PzqtmdwDq5g==", "cpu": [ "arm64" ], @@ -414,9 +414,9 @@ ] }, "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.21.0.tgz", - "integrity": "sha512-zOnKWLgDld/svhKO5PD9ozmL6roy5OQ5T4ThvdYZLpiOhEGY+dp2NwUmxK0Ld91LrbjrvtNAE0ERBwjqhZTRAA==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.21.3.tgz", + "integrity": "sha512-P0UxIOrKNBFTQaXTxOH4RxuEBVCgEA5UTNV6Yz7z9QHnUJ7eLX9reOd/NYMO3+XZO2cco19mXTxDMXxit4R/eQ==", "cpu": [ "arm64" ], @@ -427,9 +427,9 @@ ] }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.21.0.tgz", - "integrity": "sha512-7doS8br0xAkg48SKE2QNtMSFPFUlRdw9+votl27MvT46vo44ATBmdZdGysOevNELmZlfd+NEa0UYOA8f01WSrg==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.21.3.tgz", + "integrity": "sha512-L1M0vKGO5ASKntqtsFEjTq/fD91vAqnzeaF6sfNAy55aD+Hi2pBI5DKwCO+UNDQHWsDViJLqshxOahXyLSh3EA==", "cpu": [ "x64" ], @@ -440,9 +440,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.21.0.tgz", - "integrity": "sha512-pWJsfQjNWNGsoCq53KjMtwdJDmh/6NubwQcz52aEwLEuvx08bzcy6tOUuawAOncPnxz/3siRtd8hiQ32G1y8VA==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.21.3.tgz", + "integrity": "sha512-btVgIsCjuYFKUjopPoWiDqmoUXQDiW2A4C3Mtmp5vACm7/GnyuprqIDPNczeyR5W8rTXEbkmrJux7cJmD99D2g==", "cpu": [ "arm" ], @@ -453,9 +453,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.21.0.tgz", - "integrity": "sha512-efRIANsz3UHZrnZXuEvxS9LoCOWMGD1rweciD6uJQIx2myN3a8Im1FafZBzh7zk1RJ6oKcR16dU3UPldaKd83w==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.21.3.tgz", + "integrity": "sha512-zmjbSphplZlau6ZTkxd3+NMtE4UKVy7U4aVFMmHcgO5CUbw17ZP6QCgyxhzGaU/wFFdTfiojjbLG3/0p9HhAqA==", "cpu": [ "arm" ], @@ -466,9 +466,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.21.0.tgz", - "integrity": "sha512-ZrPhydkTVhyeGTW94WJ8pnl1uroqVHM3j3hjdquwAcWnmivjAwOYjTEAuEDeJvGX7xv3Z9GAvrBkEzCgHq9U1w==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.21.3.tgz", + "integrity": "sha512-nSZfcZtAnQPRZmUkUQwZq2OjQciR6tEoJaZVFvLHsj0MF6QhNMg0fQ6mUOsiCUpTqxTx0/O6gX0V/nYc7LrgPw==", "cpu": [ "arm64" ], @@ -479,9 +479,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.21.0.tgz", - "integrity": "sha512-cfaupqd+UEFeURmqNP2eEvXqgbSox/LHOyN9/d2pSdV8xTrjdg3NgOFJCtc1vQ/jEke1qD0IejbBfxleBPHnPw==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.21.3.tgz", + "integrity": "sha512-MnvSPGO8KJXIMGlQDYfvYS3IosFN2rKsvxRpPO2l2cum+Z3exiExLwVU+GExL96pn8IP+GdH8Tz70EpBhO0sIQ==", "cpu": [ "arm64" ], @@ -492,9 +492,9 @@ ] }, "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.21.0.tgz", - "integrity": "sha512-ZKPan1/RvAhrUylwBXC9t7B2hXdpb/ufeu22pG2psV7RN8roOfGurEghw1ySmX/CmDDHNTDDjY3lo9hRlgtaHg==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.21.3.tgz", + "integrity": "sha512-+W+p/9QNDr2vE2AXU0qIy0qQE75E8RTwTwgqS2G5CRQ11vzq0tbnfBd6brWhS9bCRjAjepJe2fvvkvS3dno+iw==", "cpu": [ "ppc64" ], @@ -505,9 +505,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.21.0.tgz", - "integrity": "sha512-H1eRaCwd5E8eS8leiS+o/NqMdljkcb1d6r2h4fKSsCXQilLKArq6WS7XBLDu80Yz+nMqHVFDquwcVrQmGr28rg==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.21.3.tgz", + "integrity": "sha512-yXH6K6KfqGXaxHrtr+Uoy+JpNlUlI46BKVyonGiaD74ravdnF9BUNC+vV+SIuB96hUMGShhKV693rF9QDfO6nQ==", "cpu": [ "riscv64" ], @@ -518,9 +518,9 @@ ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.21.0.tgz", - "integrity": "sha512-zJ4hA+3b5tu8u7L58CCSI0A9N1vkfwPhWd/puGXwtZlsB5bTkwDNW/+JCU84+3QYmKpLi+XvHdmrlwUwDA6kqw==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.21.3.tgz", + "integrity": "sha512-R8cwY9wcnApN/KDYWTH4gV/ypvy9yZUHlbJvfaiXSB48JO3KpwSpjOGqO4jnGkLDSk1hgjYkTbTt6Q7uvPf8eg==", "cpu": [ "s390x" ], @@ -531,9 +531,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.21.0.tgz", - "integrity": "sha512-e2hrvElFIh6kW/UNBQK/kzqMNY5mO+67YtEh9OA65RM5IJXYTWiXjX6fjIiPaqOkBthYF1EqgiZ6OXKcQsM0hg==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.21.3.tgz", + "integrity": "sha512-kZPbX/NOPh0vhS5sI+dR8L1bU2cSO9FgxwM8r7wHzGydzfSjLRCFAT87GR5U9scj2rhzN3JPYVC7NoBbl4FZ0g==", "cpu": [ "x64" ], @@ -544,9 +544,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.21.0.tgz", - "integrity": "sha512-1vvmgDdUSebVGXWX2lIcgRebqfQSff0hMEkLJyakQ9JQUbLDkEaMsPTLOmyccyC6IJ/l3FZuJbmrBw/u0A0uCQ==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.21.3.tgz", + "integrity": "sha512-S0Yq+xA1VEH66uiMNhijsWAafffydd2X5b77eLHfRmfLsRSpbiAWiRHV6DEpz6aOToPsgid7TI9rGd6zB1rhbg==", "cpu": [ "x64" ], @@ -557,9 +557,9 @@ ] }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.21.0.tgz", - "integrity": "sha512-s5oFkZ/hFcrlAyBTONFY1TWndfyre1wOMwU+6KCpm/iatybvrRgmZVM+vCFwxmC5ZhdlgfE0N4XorsDpi7/4XQ==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.21.3.tgz", + "integrity": "sha512-9isNzeL34yquCPyerog+IMCNxKR8XYmGd0tHSV+OVx0TmE0aJOo9uw4fZfUuk2qxobP5sug6vNdZR6u7Mw7Q+Q==", "cpu": [ "arm64" ], @@ -570,9 +570,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.21.0.tgz", - "integrity": "sha512-G9+TEqRnAA6nbpqyUqgTiopmnfgnMkR3kMukFBDsiyy23LZvUCpiUwjTRx6ezYCjJODXrh52rBR9oXvm+Fp5wg==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.21.3.tgz", + "integrity": "sha512-nMIdKnfZfzn1Vsk+RuOvl43ONTZXoAPUUxgcU0tXooqg4YrAqzfKzVenqqk2g5efWh46/D28cKFrOzDSW28gTA==", "cpu": [ "ia32" ], @@ -583,9 +583,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.21.0.tgz", - "integrity": "sha512-2jsCDZwtQvRhejHLfZ1JY6w6kEuEtfF9nzYsZxzSlNVKDX+DpsDJ+Rbjkm74nvg2rdx0gwBS+IMdvwJuq3S9pQ==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.21.3.tgz", + "integrity": "sha512-fOvu7PCQjAj4eWDEuD8Xz5gpzFqXzGlxHZozHP4b9Jxv9APtdxL6STqztDzMLuRXEc4UpXGGhx029Xgm91QBeA==", "cpu": [ "x64" ], @@ -596,10 +596,10 @@ ] }, "node_modules/@swc/helpers": { - "version": "0.5.11", - "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.11.tgz", - "integrity": "sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A==", - "license": "Apache-2.0", + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.13.tgz", + "integrity": "sha512-UoKGxQ3r5kYI9dALKJapMmuK+1zWM/H17Z1+iwnNmzcJRnfFuevZs375TA5rW31pu4BS4NoSy1fRsexDXfWn5w==", + "dev": true, "dependencies": { "tslib": "^2.4.0" } @@ -607,13 +607,14 @@ "node_modules/@types/command-line-args": { "version": "5.2.3", "resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.3.tgz", - "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==" + "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==", + "dev": true }, "node_modules/@types/command-line-usage": { "version": "5.0.4", "resolved": "https://registry.npmjs.org/@types/command-line-usage/-/command-line-usage-5.0.4.tgz", "integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==", - "license": "MIT" + "dev": true }, "node_modules/@types/estree": { "version": "1.0.5", @@ -622,12 +623,12 @@ "dev": true }, "node_modules/@types/node": { - "version": "20.14.7", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.7.tgz", - "integrity": "sha512-uTr2m2IbJJucF3KUxgnGOZvYbN0QgkGyWxG6973HCpMYFy2KfcgYuIwkJQMQkt1VbBMlvWRbpshFTLxnxCZjKQ==", - "license": "MIT", + "version": "20.16.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.5.tgz", + "integrity": "sha512-VwYCweNo3ERajwy0IUlqqcyZ8/A7Zwa9ZP3MnENWcB11AejO+tLy3pu850goUW2FC/IJMdZUfKpX/yxL1gymCA==", + "dev": true, "dependencies": { - "undici-types": "~5.26.4" + "undici-types": "~6.19.2" } }, "node_modules/@types/resolve": { @@ -636,9 +637,9 @@ "license": "MIT" }, "node_modules/@uwdata/flechette": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-0.0.8.tgz", - "integrity": "sha512-Qcy7NL7TfQNQLUw98ygWV8E5PMXR3T6QForhHo3T1u0zfU0L6rN9FcJAFWyA3bxr8eU+WjVypPtqfBDKj+lBvw==" + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-1.0.0-beta.2.tgz", + "integrity": "sha512-7RdHjuPZ2HqWUHUdeG0YElrZC55NS8ftQCByuyxYpjFqidbsmx8ntekc6k1uT3vVVUMcyo7pycEhfLKEEkdbsQ==" }, "node_modules/acorn": { "version": "8.12.1", @@ -699,6 +700,7 @@ }, "node_modules/ansi-styles": { "version": "4.3.0", + "dev": true, "license": "MIT", "dependencies": { "color-convert": "^2.0.1" @@ -728,6 +730,7 @@ "version": "17.0.0", "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-17.0.0.tgz", "integrity": "sha512-X0p7auzdnGuhYMVKYINdQssS4EcKec9TCXyez/qtJt32DrIMGbzqiaMiQ0X6fQlQpw8Fl0Qygcv4dfRAr5Gu9Q==", + "dev": true, "dependencies": { "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", @@ -752,7 +755,9 @@ }, "node_modules/array-back": { "version": "3.1.0", - "license": "MIT", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-3.1.0.tgz", + "integrity": "sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==", + "dev": true, "engines": { "node": ">=6" } @@ -956,7 +961,9 @@ }, "node_modules/chalk-template": { "version": "0.4.0", - "license": "MIT", + "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz", + "integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==", + "dev": true, "dependencies": { "chalk": "^4.1.2" }, @@ -969,7 +976,9 @@ }, "node_modules/chalk-template/node_modules/chalk": { "version": "4.1.2", - "license": "MIT", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -983,7 +992,9 @@ }, "node_modules/chalk-template/node_modules/supports-color": { "version": "7.2.0", - "license": "MIT", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, "dependencies": { "has-flag": "^4.0.0" }, @@ -1095,6 +1106,7 @@ }, "node_modules/color-convert": { "version": "2.0.1", + "dev": true, "license": "MIT", "dependencies": { "color-name": "~1.1.4" @@ -1105,11 +1117,14 @@ }, "node_modules/color-name": { "version": "1.1.4", + "dev": true, "license": "MIT" }, "node_modules/command-line-args": { "version": "5.2.1", - "license": "MIT", + "resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz", + "integrity": "sha512-H4UfQhZyakIjC74I9d34fGYDwk3XpSr17QhEd0Q3I9Xq1CETHo4Hcuo87WyWHpAF1aSLjLRf5lD9ZGX2qStUvg==", + "dev": true, "dependencies": { "array-back": "^3.1.0", "find-replace": "^3.0.0", @@ -1120,20 +1135,15 @@ "node": ">=4.0.0" } }, - "node_modules/command-line-args/node_modules/typical": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, "node_modules/command-line-usage": { - "version": "7.0.1", - "license": "MIT", + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.3.tgz", + "integrity": "sha512-PqMLy5+YGwhMh1wS04mVG44oqDsgyLRSKJBdOo1bnYhMKBW65gZF1dRp2OZRhiTjgUHljy99qkO7bsctLaw35Q==", + "dev": true, "dependencies": { "array-back": "^6.2.2", "chalk-template": "^0.4.0", - "table-layout": "^3.0.0", + "table-layout": "^4.1.0", "typical": "^7.1.1" }, "engines": { @@ -1142,7 +1152,18 @@ }, "node_modules/command-line-usage/node_modules/array-back": { "version": "6.2.2", - "license": "MIT", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", + "dev": true, + "engines": { + "node": ">=12.17" + } + }, + "node_modules/command-line-usage/node_modules/typical": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", + "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==", + "dev": true, "engines": { "node": ">=12.17" } @@ -1566,16 +1587,17 @@ } }, "node_modules/eslint": { - "version": "9.9.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.9.1.tgz", - "integrity": "sha512-dHvhrbfr4xFQ9/dq+jcVneZMyRYLjggWjk6RVsIiHsP8Rz6yZ8LvZ//iU4TrZF+SXWG+JkNF2OyiZRvzgRDqMg==", + "version": "9.10.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.10.0.tgz", + "integrity": "sha512-Y4D0IgtBZfOcOUAIQTSXBKoNGfY0REGqHJG6+Q81vNippW5YlKjHFj4soMxamKK1NXHUWuBZTLdU3Km+L/pcHw==", "dev": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.11.0", "@eslint/config-array": "^0.18.0", "@eslint/eslintrc": "^3.1.0", - "@eslint/js": "9.9.1", + "@eslint/js": "9.10.0", + "@eslint/plugin-kit": "^0.1.0", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.3.0", "@nodelib/fs.walk": "^1.2.8", @@ -1598,7 +1620,6 @@ "is-glob": "^4.0.0", "is-path-inside": "^3.0.3", "json-stable-stringify-without-jsonify": "^1.0.1", - "levn": "^0.4.1", "lodash.merge": "^4.6.2", "minimatch": "^3.1.2", "natural-compare": "^1.4.0", @@ -1869,7 +1890,9 @@ }, "node_modules/find-replace": { "version": "3.0.0", - "license": "MIT", + "resolved": "https://registry.npmjs.org/find-replace/-/find-replace-3.0.0.tgz", + "integrity": "sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ==", + "dev": true, "dependencies": { "array-back": "^3.0.1" }, @@ -1920,7 +1943,7 @@ "version": "24.3.25", "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-24.3.25.tgz", "integrity": "sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==", - "license": "Apache-2.0" + "dev": true }, "node_modules/flatted": { "version": "3.3.1", @@ -2207,6 +2230,7 @@ }, "node_modules/has-flag": { "version": "4.0.0", + "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -2798,6 +2822,9 @@ }, "node_modules/json-bignum": { "version": "0.0.3", + "resolved": "https://registry.npmjs.org/json-bignum/-/json-bignum-0.0.3.tgz", + "integrity": "sha512-2WHyXj3OfHSgNyuzDbSxI1w2jgw5gkWSWhS7Qg4bWXx1nLk3jnbwfUeS0PSba3IzpTUWdHxBieELUzXRjQB2zg==", + "dev": true, "engines": { "node": ">=0.8" } @@ -2857,14 +2884,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" - }, "node_modules/lodash.camelcase": { "version": "4.3.0", - "license": "MIT" + "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", + "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==", + "dev": true }, "node_modules/lodash.merge": { "version": "4.6.2", @@ -3589,9 +3613,9 @@ } }, "node_modules/rollup": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.21.0.tgz", - "integrity": "sha512-vo+S/lfA2lMS7rZ2Qoubi6I5hwZwzXeUIctILZLbHI+laNtvhhOIon2S1JksA5UEDQ7l3vberd0fxK44lTYjbQ==", + "version": "4.21.3", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.21.3.tgz", + "integrity": "sha512-7sqRtBNnEbcBtMeRVc6VRsJMmpI+JU1z9VTvW8D4gXIYQFz0aLcsE6rRkyghZkLfEgUZgVvOG7A5CVz/VW5GIA==", "dev": true, "dependencies": { "@types/estree": "1.0.5" @@ -3604,22 +3628,22 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.21.0", - "@rollup/rollup-android-arm64": "4.21.0", - "@rollup/rollup-darwin-arm64": "4.21.0", - "@rollup/rollup-darwin-x64": "4.21.0", - "@rollup/rollup-linux-arm-gnueabihf": "4.21.0", - "@rollup/rollup-linux-arm-musleabihf": "4.21.0", - "@rollup/rollup-linux-arm64-gnu": "4.21.0", - "@rollup/rollup-linux-arm64-musl": "4.21.0", - "@rollup/rollup-linux-powerpc64le-gnu": "4.21.0", - "@rollup/rollup-linux-riscv64-gnu": "4.21.0", - "@rollup/rollup-linux-s390x-gnu": "4.21.0", - "@rollup/rollup-linux-x64-gnu": "4.21.0", - "@rollup/rollup-linux-x64-musl": "4.21.0", - "@rollup/rollup-win32-arm64-msvc": "4.21.0", - "@rollup/rollup-win32-ia32-msvc": "4.21.0", - "@rollup/rollup-win32-x64-msvc": "4.21.0", + "@rollup/rollup-android-arm-eabi": "4.21.3", + "@rollup/rollup-android-arm64": "4.21.3", + "@rollup/rollup-darwin-arm64": "4.21.3", + "@rollup/rollup-darwin-x64": "4.21.3", + "@rollup/rollup-linux-arm-gnueabihf": "4.21.3", + "@rollup/rollup-linux-arm-musleabihf": "4.21.3", + "@rollup/rollup-linux-arm64-gnu": "4.21.3", + "@rollup/rollup-linux-arm64-musl": "4.21.3", + "@rollup/rollup-linux-powerpc64le-gnu": "4.21.3", + "@rollup/rollup-linux-riscv64-gnu": "4.21.3", + "@rollup/rollup-linux-s390x-gnu": "4.21.3", + "@rollup/rollup-linux-x64-gnu": "4.21.3", + "@rollup/rollup-linux-x64-musl": "4.21.3", + "@rollup/rollup-win32-arm64-msvc": "4.21.3", + "@rollup/rollup-win32-ia32-msvc": "4.21.3", + "@rollup/rollup-win32-x64-msvc": "4.21.3", "fsevents": "~2.3.2" } }, @@ -3840,13 +3864,6 @@ "node": ">= 0.4" } }, - "node_modules/stream-read-all": { - "version": "3.0.1", - "license": "MIT", - "engines": { - "node": ">=10" - } - }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", @@ -4045,27 +4062,23 @@ } }, "node_modules/table-layout": { - "version": "3.0.2", - "license": "MIT", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-4.1.1.tgz", + "integrity": "sha512-iK5/YhZxq5GO5z8wb0bY1317uDF3Zjpha0QFFLA8/trAoiLbQD0HUbMesEaxyzUgDxi2QlcbM8IvqOlEjgoXBA==", + "dev": true, "dependencies": { - "@75lb/deep-merge": "^1.1.1", "array-back": "^6.2.2", - "command-line-args": "^5.2.1", - "command-line-usage": "^7.0.0", - "stream-read-all": "^3.0.1", - "typical": "^7.1.1", "wordwrapjs": "^5.1.0" }, - "bin": { - "table-layout": "bin/cli.js" - }, "engines": { "node": ">=12.17" } }, "node_modules/table-layout/node_modules/array-back": { "version": "6.2.2", - "license": "MIT", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", + "dev": true, "engines": { "node": ">=12.17" } @@ -4161,10 +4174,10 @@ } }, "node_modules/tslib": { - "version": "2.6.3", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz", - "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ==", - "license": "0BSD" + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.7.0.tgz", + "integrity": "sha512-gLXCKdN1/j47AiHiOkJN69hJmcbGTHI0ImLmbYLHykhgeN0jVGola9yVjFgzCUklsZQMW55o+dW7IXv3RCXDzA==", + "dev": true }, "node_modules/type-check": { "version": "0.4.0", @@ -4255,9 +4268,9 @@ } }, "node_modules/typescript": { - "version": "5.5.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.4.tgz", - "integrity": "sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==", + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz", + "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==", "dev": true, "bin": { "tsc": "bin/tsc", @@ -4268,10 +4281,12 @@ } }, "node_modules/typical": { - "version": "7.1.1", - "license": "MIT", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/typical/-/typical-4.0.0.tgz", + "integrity": "sha512-VAH4IvQ7BDFYglMd7BPRDfLgxZZX4O4TFcRDA6EN5X7erNJJq+McIEp8np9aVtxrCJ6qx4GTYVfOWNjcqwZgRw==", + "dev": true, "engines": { - "node": ">=12.17" + "node": ">=8" } }, "node_modules/unbox-primitive": { @@ -4291,10 +4306,10 @@ } }, "node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "license": "MIT" + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "dev": true }, "node_modules/uri-js": { "version": "4.4.1", @@ -4387,7 +4402,9 @@ }, "node_modules/wordwrapjs": { "version": "5.1.0", - "license": "MIT", + "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz", + "integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==", + "dev": true, "engines": { "node": ">=12.17" } diff --git a/package.json b/package.json index b97e4da..19d325b 100644 --- a/package.json +++ b/package.json @@ -37,20 +37,20 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { - "@uwdata/flechette": "~0.0.8", + "@uwdata/flechette": "1.0.0-beta.2", "acorn": "^8.12.1", - "apache-arrow": "^17.0.0", "node-fetch": "^3.3.2" }, "devDependencies": { "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "eslint": "^9.9.1", + "apache-arrow": "17.0.0", + "eslint": "^9.10.0", "mocha": "^10.7.3", "rimraf": "^6.0.1", - "rollup": "^4.21.0", + "rollup": "^4.21.3", "rollup-plugin-bundle-size": "^1.0.3", "tape": "^5.8.1", - "typescript": "^5.5.4" + "typescript": "^5.6.2" } } diff --git a/perf/arrow-perf.js b/perf/arrow-perf.js index 0708b22..c5d2ff0 100644 --- a/perf/arrow-perf.js +++ b/perf/arrow-perf.js @@ -1,27 +1,24 @@ import tape from 'tape'; import { time } from './time.js'; import { bools, floats, ints, sample, strings } from './data-gen.js'; -import { fromArrow, table, toArrow } from '../src/index.js'; -import { - Bool, Dictionary, Float64, Int32, Table, Uint32, Utf8, - tableFromIPC, tableToIPC, vectorFromArray -} from 'apache-arrow'; +import { fromArrow, table, toArrowIPC } from '../src/index.js'; +import { bool, columnFromArray, dictionary, float64, int32, tableFromColumns, tableToIPC, uint32, utf8 } from '@uwdata/flechette'; +import { tableFromIPC } from 'apache-arrow'; function process(N, nulls, msg) { - const vectors = { - k: vectorFromArray( - sample(N, strings(100), nulls), - new Dictionary(new Utf8(), new Int32()) - ), - v: vectorFromArray( - ints(N, -10000, 10000, nulls), - new Int32() - ) - }; - const aa = new Table(vectors); - const buf = tableToIPC(aa, 'stream'); - const ft = fromArrow(buf); // using flechette - const at = fromArrow(aa); // using arrow-js + const aa = tableFromColumns({ + k: columnFromArray( + sample(N, strings(100), nulls), + dictionary(utf8(), int32()) + ), + v: columnFromArray( + ints(N, -10000, 10000, nulls), + int32() + ) + }); + const buf = tableToIPC(aa); + const at = fromArrow(tableFromIPC(buf)); // using arrow-js + const ft = fromArrow(aa); // using flechette const filterDict = (dt, val) => time(() => { dt.filter(`d.k === '${val}'`).numRows(); @@ -67,11 +64,11 @@ function process(N, nulls, msg) { function serialize(N, nulls, msg) { tape(`arrow serialization: ${msg}`, t => { console.table([ // eslint-disable-line - encode('boolean', new Bool(), bools(N, nulls)), - encode('integer', new Int32(), ints(N, -10000, 10000, nulls)), - encode('float', new Float64(), floats(N, -10000, 10000, nulls)), + encode('boolean', bool(), bools(N, nulls)), + encode('integer', int32(), ints(N, -10000, 10000, nulls)), + encode('float', float64(), floats(N, -10000, 10000, nulls)), encode('dictionary', - new Dictionary(new Utf8(), new Uint32(), 0), + dictionary(utf8(), uint32()), sample(N, strings(100), nulls) ) ]); @@ -83,17 +80,21 @@ function encode(name, type, values) { const dt = table({ values }); // measure encoding times - const qt = time(() => tableToIPC(toArrow(dt, { types: { values: type } }))); + const qt = time(() => toArrowIPC(dt, { types: { values: type } })); const at = time( - () => tableToIPC(new Table({ values: vectorFromArray(values, type) })) + () => tableToIPC( + tableFromColumns({ + values: columnFromArray(values, type) + }) + ) ); const jt = time(() => JSON.stringify(values)); // measure serialized byte size - const ab = tableToIPC(new Table({ - values: vectorFromArray(values, type) + const ab = tableToIPC(tableFromColumns({ + values: columnFromArray(values, type) })).length; - const qb = tableToIPC(toArrow(dt, { types: { values: type }})).length; + const qb = toArrowIPC(dt, { types: { values: type }}).length; const jb = (new TextEncoder().encode(JSON.stringify(values))).length; // check that arrow and arquero produce the same result @@ -104,7 +105,7 @@ function encode(name, type, values) { return { 'data type': name, - 'arrow-js': at, + 'flechette': at, 'arquero': qt, 'json': jt, 'size-arrow': ab, diff --git a/rollup.config.js b/rollup.config.js index 45d59c8..ce0e61b 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -3,23 +3,27 @@ import { nodeResolve } from '@rollup/plugin-node-resolve'; import terser from '@rollup/plugin-terser'; const name = 'aq'; -const external = [ 'apache-arrow' ]; -const globals = { 'apache-arrow': 'Arrow' }; const plugins = [ bundleSize(), nodeResolve({ modulesOnly: true }) ]; +function onwarn(warning) { + if (warning.code !== 'CIRCULAR_DEPENDENCY') { + // eslint-disable-next-line + console.error(`(!) ${warning.message}`); + } +} + export default [ { input: 'src/index-browser.js', - external, plugins, + onwarn, output: [ { file: 'dist/arquero.js', format: 'umd', - globals, name }, { @@ -27,7 +31,6 @@ export default [ format: 'umd', sourcemap: true, plugins: [ terser({ ecma: 2018 }) ], - globals, name } ] diff --git a/src/api.js b/src/api.js index 9960a0b..ec25761 100644 --- a/src/api.js +++ b/src/api.js @@ -8,12 +8,12 @@ export { default as walk_ast } from './expression/ast/walk.js'; // public API export { seed } from './util/random.js'; -export { default as fromArrow } from './arrow/from-arrow.js'; +export { default as fromArrow } from './format/from-arrow.js'; export { default as fromCSV } from './format/from-csv.js'; export { default as fromFixed } from './format/from-fixed.js'; export { default as fromJSON } from './format/from-json.js'; -export { default as toArrow } from './arrow/to-arrow.js'; -export { default as toArrowIPC } from './arrow/to-arrow-ipc.js'; +export { default as toArrow } from './format/to-arrow.js'; +export { default as toArrowIPC } from './format/to-arrow-ipc.js'; export { default as toCSV } from './format/to-csv.js'; export { default as toHTML } from './format/to-html.js'; export { default as toJSON } from './format/to-json.js'; diff --git a/src/arrow/arrow-table.js b/src/arrow/arrow-table.js deleted file mode 100644 index 82c8beb..0000000 --- a/src/arrow/arrow-table.js +++ /dev/null @@ -1,28 +0,0 @@ -import { Table, tableToIPC } from 'apache-arrow'; -import error from '../util/error.js'; - -const fail = (cause) => error( - 'Apache Arrow not imported, ' + - 'see https://github.com/uwdata/arquero#usage', - cause -); - -export function arrowTable(...args) { - // trap access to provide a helpful message - // when Apache Arrow has not been imported - try { - return new Table(...args); - } catch (err) { - fail(err); - } -} - -export function arrowTableToIPC(table, format) { - // trap access to provide a helpful message - // when Apache Arrow has not been imported - try { - return tableToIPC(table, format); - } catch (err) { - fail(err); - } -} diff --git a/src/arrow/builder/array-builder.js b/src/arrow/builder/array-builder.js deleted file mode 100644 index c9de3a4..0000000 --- a/src/arrow/builder/array-builder.js +++ /dev/null @@ -1,9 +0,0 @@ -import { array } from './util.js'; - -export default function(type, length) { - const data = array(type.ArrayType, length); - return { - set(value, index) { data[index] = value; }, - data: () => ({ type, length, buffers: [null, data] }) - }; -} diff --git a/src/arrow/builder/bool-builder.js b/src/arrow/builder/bool-builder.js deleted file mode 100644 index a327ff3..0000000 --- a/src/arrow/builder/bool-builder.js +++ /dev/null @@ -1,11 +0,0 @@ -import { array } from './util.js'; - -export default function(type, length) { - const data = array(type.ArrayType, length / 8); - return { - set(value, index) { - if (value) data[index >> 3] |= (1 << (index % 8)); - }, - data: () => ({ type, length, buffers: [null, data] }) - }; -} diff --git a/src/arrow/builder/date-day-builder.js b/src/arrow/builder/date-day-builder.js deleted file mode 100644 index d7bb012..0000000 --- a/src/arrow/builder/date-day-builder.js +++ /dev/null @@ -1,9 +0,0 @@ -import { array } from './util.js'; - -export default function(type, length) { - const data = array(type.ArrayType, length); - return { - set(value, index) { data[index] = (value / 86400000) | 0; }, - data: () => ({ type, length, buffers: [null, data] }) - }; -} diff --git a/src/arrow/builder/date-millis-builder.js b/src/arrow/builder/date-millis-builder.js deleted file mode 100644 index 1bcdac1..0000000 --- a/src/arrow/builder/date-millis-builder.js +++ /dev/null @@ -1,9 +0,0 @@ -import { array } from './util.js'; - -export default function(type, length) { - const data = array(type.ArrayType, length); - return { - set(value, index) { data[index] = BigInt(value); }, - data: () => ({ type, length, buffers: [null, data] }) - }; -} diff --git a/src/arrow/builder/default-builder.js b/src/arrow/builder/default-builder.js deleted file mode 100644 index d0301ec..0000000 --- a/src/arrow/builder/default-builder.js +++ /dev/null @@ -1,12 +0,0 @@ -import { makeBuilder } from 'apache-arrow'; - -export default function(type) { - const b = makeBuilder({ - type, - nullValues: [null, undefined] - }); - return { - set(value, index) { b.set(index, value); }, - data: () => b.finish().flush() - }; -} diff --git a/src/arrow/builder/dictionary-builder.js b/src/arrow/builder/dictionary-builder.js deleted file mode 100644 index c301a7a..0000000 --- a/src/arrow/builder/dictionary-builder.js +++ /dev/null @@ -1,36 +0,0 @@ -import utf8Builder from './utf8-builder.js'; -import { array, arrowVector } from './util.js'; - -export default function(type, length) { - const values = []; - const data = array(type.indices.ArrayType, length); - const keys = Object.create(null); - - let next = -1; - let strlen = 0; - - return { - set(value, index) { - const v = String(value); - let k = keys[v]; - if (k === undefined) { - strlen += v.length; - keys[v] = k = ++next; - values.push(v); - } - data[index] = k; - }, - data: () => ({ - type, - length, - buffers: [null, data], - dict: dictionary(type.dictionary, values, strlen) - }) - }; -} - -function dictionary(type, values, strlen) { - const b = utf8Builder(type, values.length, strlen); - values.forEach(b.set); - return arrowVector(b.data()); -} diff --git a/src/arrow/builder/index.js b/src/arrow/builder/index.js deleted file mode 100644 index 2e9dde7..0000000 --- a/src/arrow/builder/index.js +++ /dev/null @@ -1,40 +0,0 @@ -import { Type } from 'apache-arrow'; -import arrayBuilder from './array-builder.js'; -import boolBuilder from './bool-builder.js'; -import dateDayBuilder from './date-day-builder.js'; -import dateMillisBuilder from './date-millis-builder.js'; -import defaultBuilder from './default-builder.js'; -import dictionaryBuilder from './dictionary-builder.js'; -import validBuilder from './valid-builder.js'; - -export default function(type, nrows, nullable = true) { - let method; - - switch (type.typeId) { - case Type.Int: - method = type.bitWidth < 64 ? arrayBuilder : null; - break; - case Type.Float: - method = type.precision > 0 ? arrayBuilder : null; - break; - case Type.Dictionary: - // check sub-types against builder assumptions - // if check fails, fallback to default builder - method = ( - type.dictionary.typeId === Type.Utf8 && - type.indices.typeId === Type.Int && - type.indices.bitWidth < 64 - ) ? dictionaryBuilder : null; - break; - case Type.Bool: - method = boolBuilder; - break; - case Type.Date: - method = type.unit ? dateMillisBuilder : dateDayBuilder; - break; - } - - return method == null ? defaultBuilder(type) - : nullable ? validBuilder(method(type, nrows), nrows) - : method(type, nrows); -} diff --git a/src/arrow/builder/resolve-type.js b/src/arrow/builder/resolve-type.js deleted file mode 100644 index 14eb341..0000000 --- a/src/arrow/builder/resolve-type.js +++ /dev/null @@ -1,97 +0,0 @@ -import { - Binary, - Bool, - DataType, - DateDay, - DateMillisecond, - Dictionary, - Float16, - Float32, - Float64, - Int16, - Int32, - Int64, - Int8, - IntervalDayTime, - IntervalYearMonth, - Null, - TimeMicrosecond, - TimeMillisecond, - TimeNanosecond, - TimeSecond, - Type, - Uint16, - Uint32, - Uint64, - Uint8, - Utf8 -} from 'apache-arrow'; -import error from '../../util/error.js'; -import toString from '../../util/to-string.js'; - -export default function(type) { - if (type instanceof DataType || type == null) { - return type; - } - - switch (type) { - case Type.Binary: - return new Binary(); - case Type.Bool: - return new Bool(); - case Type.DateDay: - return new DateDay(); - case Type.DateMillisecond: - case Type.Date: - return new DateMillisecond(); - case Type.Dictionary: - return new Dictionary(new Utf8(), new Int32()); - case Type.Float16: - return new Float16(); - case Type.Float32: - return new Float32(); - case Type.Float64: - case Type.Float: - return new Float64(); - case Type.Int8: - return new Int8(); - case Type.Int16: - return new Int16(); - case Type.Int32: - case Type.Int: - return new Int32(); - case Type.Int64: - return new Int64(); - case Type.IntervalDayTime: - return new IntervalDayTime(); - case Type.Interval: - case Type.IntervalYearMonth: - return new IntervalYearMonth(); - case Type.Null: - return new Null(); - case Type.TimeMicrosecond: - return new TimeMicrosecond(); - case Type.TimeMillisecond: - case Type.Time: - return new TimeMillisecond(); - case Type.TimeNanosecond: - return new TimeNanosecond(); - case Type.TimeSecond: - return new TimeSecond(); - case Type.Uint8: - return new Uint8(); - case Type.Uint16: - return new Uint16(); - case Type.Uint32: - return new Uint32(); - case Type.Uint64: - return new Uint64(); - case Type.Utf8: - return new Utf8(); - default: - error( - `Unsupported type code: ${toString(type)}. ` + - 'Use a data type constructor instead?' - ); - } -} diff --git a/src/arrow/builder/utf8-builder.js b/src/arrow/builder/utf8-builder.js deleted file mode 100644 index b8fb35d..0000000 --- a/src/arrow/builder/utf8-builder.js +++ /dev/null @@ -1,21 +0,0 @@ -import { array, ceil64Bytes, writeUtf8 } from './util.js'; - -export default function(type, length, strlen) { - const offset = array(Int32Array, length + 1); - const buf = array(Uint8Array, 3 * strlen); - - let idx = 0; - - return { - set(value, index) { - idx += writeUtf8(buf, idx, value); - offset[index + 1] = idx; - }, - data: () => { - // slice utf buffer if over-allocated - const dlen = ceil64Bytes(idx); - const data = buf.length > dlen ? buf.subarray(0, dlen) : buf; - return { type, length, buffers: [offset, data] }; - } - }; -} diff --git a/src/arrow/builder/util.js b/src/arrow/builder/util.js deleted file mode 100644 index c3cc25f..0000000 --- a/src/arrow/builder/util.js +++ /dev/null @@ -1,33 +0,0 @@ -import { Data, Vector } from 'apache-arrow'; - -export function ceil64Bytes(length, bpe = 1) { - return ((((length * bpe) + 63) & ~63) || 64) / bpe; -} - -export function array(Type, length, bpe = Type.BYTES_PER_ELEMENT) { - return new Type(ceil64Bytes(length, bpe)); -} - -export function arrowData(d) { - return d instanceof Data - ? d - : new Data(d.type, 0, d.length, d.nulls, d.buffers, null, d.dict); -} - -export function arrowVector(data) { - return new Vector([arrowData(data)]); -} - -export const encoder = new TextEncoder(); - -export function encode(data, idx, str) { - const bytes = encoder.encode(str); - data.set(bytes, idx); - return bytes.length; -} - -export function encodeInto(data, idx, str) { - return encoder.encodeInto(str, data.subarray(idx)).written; -} - -export const writeUtf8 = encoder.encodeInto ? encodeInto : encode; diff --git a/src/arrow/builder/valid-builder.js b/src/arrow/builder/valid-builder.js deleted file mode 100644 index ee21dae..0000000 --- a/src/arrow/builder/valid-builder.js +++ /dev/null @@ -1,25 +0,0 @@ -import { array } from './util.js'; - -export default function(builder, length) { - const valid = array(Uint8Array, length / 8); - let nulls = 0; - - return { - set(value, index) { - if (value == null) { - ++nulls; - } else { - builder.set(value, index); - valid[index >> 3] |= (1 << (index % 8)); - } - }, - data: () => { - const d = builder.data(); - if (nulls) { - d.nulls = nulls; - d.buffers[2] = valid; - } - return d; - } - }; -} diff --git a/src/arrow/encode/data-from-objects.js b/src/arrow/encode/data-from-objects.js deleted file mode 100644 index af91330..0000000 --- a/src/arrow/encode/data-from-objects.js +++ /dev/null @@ -1,16 +0,0 @@ -import { dataFromScan } from './data-from.js'; -import { profile } from './profiler.js'; -import resolveType from '../builder/resolve-type.js'; - -export default function(data, name, nrows, scan, type, nullable = true) { - type = resolveType(type); - - // perform type inference if needed - if (!type) { - const p = profile(scan, name); - nullable = p.nulls > 0; - type = p.type(); - } - - return dataFromScan(nrows, scan, name, type, nullable); -} diff --git a/src/arrow/encode/data-from-table.js b/src/arrow/encode/data-from-table.js deleted file mode 100644 index 85fe155..0000000 --- a/src/arrow/encode/data-from-table.js +++ /dev/null @@ -1,69 +0,0 @@ -import { - Float32, Float64, - Int16, Int32, Int64, Int8, - Uint16, Uint32, Uint64, Uint8, Vector -} from 'apache-arrow'; -import { dataFromArray, dataFromScan } from './data-from.js'; -import { profile } from './profiler.js'; -import resolveType from '../builder/resolve-type.js'; -import isTypedArray from '../../util/is-typed-array.js'; - -export default function(table, name, nrows, scan, type, nullable = true) { - type = resolveType(type); - const column = table.column(name); - const reified = !(table.isFiltered() || table.isOrdered()); - - // use existing arrow data if types match - const vec = arrowVector(column); - if (vec && reified && typeCompatible(vec.type, type)) { - return vec; - } - - // if backing data is a typed array, leverage that - const data = column.data; - if (isTypedArray(data)) { - const dtype = typeFromArray(data); - if (reified && dtype && typeCompatible(dtype, type)) { - return dataFromArray(data, dtype); - } else { - type = type || dtype; - nullable = false; - } - } - - // perform type inference if needed - if (!type) { - const p = profile(scan, column); - nullable = p.nulls > 0; - type = p.type(); - } - - return dataFromScan(nrows, scan, column, type, nullable); -} - -function arrowVector(value) { - return value instanceof Vector ? value - : value.vector instanceof Vector ? value.vector - : null; -} - -function typeFromArray(data) { - const types = { - Float32Array: Float32, - Float64Array: Float64, - Int8Array: Int8, - Int16Array: Int16, - Int32Array: Int32, - Uint8Array: Uint8, - Uint16Array: Uint16, - Uint32Array: Uint32, - BigInt64Array: Int64, - BigUint64Array: Uint64 - }; - const Type = types[data.constructor.name]; - return Type ? new Type() : null; -} - -function typeCompatible(a, b) { - return !a || !b ? true : a.compareTo(b); -} diff --git a/src/arrow/encode/data-from.js b/src/arrow/encode/data-from.js deleted file mode 100644 index 8b0a0ec..0000000 --- a/src/arrow/encode/data-from.js +++ /dev/null @@ -1,21 +0,0 @@ -import builder from '../builder/index.js'; -import { arrowData, ceil64Bytes } from '../builder/util.js'; - -export function dataFromArray(array, type) { - const length = array.length; - const size = ceil64Bytes(length, array.BYTES_PER_ELEMENT); - - let data = array; - if (length !== size) { - data = new array.constructor(size); - data.set(array); - } - - return arrowData({ type, length, buffers: [null, data] }); -} - -export function dataFromScan(nrows, scan, column, type, nullable = true) { - const b = builder(type, nrows, nullable); - scan(column, b.set); - return arrowData(b.data()); -} diff --git a/src/arrow/encode/profiler.js b/src/arrow/encode/profiler.js deleted file mode 100644 index 16d1b7f..0000000 --- a/src/arrow/encode/profiler.js +++ /dev/null @@ -1,138 +0,0 @@ -import { Field, FixedSizeList, List, Struct, Type } from 'apache-arrow'; -import resolveType from '../builder/resolve-type.js'; -import error from '../../util/error.js'; -import isArrayType from '../../util/is-array-type.js'; -import isDate from '../../util/is-date.js'; -import isExactUTCDate from '../../util/is-exact-utc-date.js'; - -export function profile(scan, column) { - const p = profiler(); - scan(column, p.add); - return p; -} - -export function profiler() { - const p = { - count: 0, - nulls: 0, - bools: 0, - nums: 0, - ints: 0, - bigints: 0, - min: Infinity, - max: -Infinity, - digits: 0, - dates: 0, - utcdays: 0, - strings: 0, - strlen: 0, - arrays: 0, - minlen: Infinity, - maxlen: 0, - structs: 0, - - add(value) { - ++p.count; - if (value == null) { - ++p.nulls; - return; - } - - const type = typeof value; - if (type === 'string') { - ++p.strings; - } else if (type === 'number') { - ++p.nums; - if (value < p.min) p.min = value; - if (value > p.max) p.max = value; - if (Number.isInteger(value)) ++p.ints; - } else if (type === 'boolean') { - ++p.bools; - } else if (type === 'object') { - if (isDate(value)) { - ++p.dates; - if (isExactUTCDate(value)) { - ++p.utcdays; - } - } else if (isArrayType(value)) { - ++p.arrays; - if (value.length < p.minlen) p.minlen = value.length; - if (value.length > p.maxlen) p.maxlen = value.length; - const ap = p.array_prof || (p.array_prof = profiler()); - value.forEach(ap.add); - } else { - ++p.structs; - const sp = p.struct_prof || (p.struct_prof = {}); - for (const key in value) { - const fp = sp[key] || (sp[key] = profiler()); - fp.add(value[key]); - } - } - } else if (type === 'bigint') { - ++p.bigints; - if (value < p.min) p.min = value; - if (value > p.max) p.max = value; - } - }, - type() { - return resolveType(infer(p)); - } - }; - - return p; -} - -function infer(p) { - const valid = p.count - p.nulls; - - if (valid === 0) { - return Type.Null; - } - else if (p.ints === valid) { - const v = Math.max(Math.abs(p.min) - 1, p.max); - return p.min < 0 - ? v >= 2 ** 31 ? Type.Float64 - : v < (1 << 7) ? Type.Int8 : v < (1 << 15) ? Type.Int16 : Type.Int32 - : v >= 2 ** 32 ? Type.Float64 - : v < (1 << 8) ? Type.Uint8 : v < (1 << 16) ? Type.Uint16 : Type.Uint32; - } - else if (p.nums === valid) { - return Type.Float64; - } - else if (p.bigints === valid) { - // @ts-ignore - const v = -p.min > p.max ? -p.min - 1n : p.max; - return p.min < 0 - ? v < 2 ** 63 ? Type.Int64 - : error(`BigInt exceeds 64 bits: ${v}`) - : p.max < 2 ** 64 ? Type.Uint64 - : error(`BigInt exceeds 64 bits: ${p.max}`); - } - else if (p.bools === valid) { - return Type.Bool; - } - else if (p.utcdays === valid) { - return Type.DateDay; - } - else if (p.dates === valid) { - return Type.DateMillisecond; - } - else if (p.arrays === valid) { - const type = Field.new('value', p.array_prof.type(), true); - return p.minlen === p.maxlen - ? new FixedSizeList(p.minlen, type) - : new List(type); - } - else if (p.structs === valid) { - const sp = p.struct_prof; - return new Struct( - Object.keys(sp).map(name => Field.new(name, sp[name].type(), true)) - ); - } - else if (p.strings > 0) { - return Type.Dictionary; - } - else { - error('Type inference failure'); - } -} diff --git a/src/arrow/encode/scan.js b/src/arrow/encode/scan.js deleted file mode 100644 index b12a588..0000000 --- a/src/arrow/encode/scan.js +++ /dev/null @@ -1,29 +0,0 @@ -import isArrayType from '../../util/is-array-type.js'; - -export function scanArray(data, limit, offset) { - const n = Math.min(data.length, offset + limit); - return (name, visit) => { - for (let i = offset; i < n; ++i) { - visit(data[i][name], i); - } - }; -} - -export function scanTable(table, limit, offset) { - const scanAll = offset === 0 && table.numRows() <= limit - && !table.isFiltered() && !table.isOrdered(); - - return (column, visit) => { - const isArray = isArrayType(column); - let i = -1; - scanAll && isArray - ? column.forEach(visit) - : table.scan( - // optimize column value access - isArray - ? row => visit(column[row], ++i) - : row => visit(column.at(row), ++i), - true, limit, offset - ); - }; -} diff --git a/src/arrow/to-arrow.js b/src/arrow/to-arrow.js deleted file mode 100644 index e4adb13..0000000 --- a/src/arrow/to-arrow.js +++ /dev/null @@ -1,59 +0,0 @@ -import { arrowTable } from './arrow-table.js'; -import dataFromObjects from './encode/data-from-objects.js'; -import dataFromTable from './encode/data-from-table.js'; -import { scanArray, scanTable } from './encode/scan.js'; -import error from '../util/error.js'; -import isArray from '../util/is-array.js'; -import isFunction from '../util/is-function.js'; - -/** - * Create an Apache Arrow table for an input dataset. - * @param {object[]|import('../table/Table.js').Table} data An input dataset - * to convert to Arrow format. If array-valued, the data should consist of an - * array of objects where each entry represents a row and named properties - * represent columns. Otherwise, the input data should be an Arquero table. - * @param {import('./types.js').ArrowFormatOptions} [options] - * Encoding options, including column data types. - * @return {import('apache-arrow').Table} An Apache Arrow Table instance. - */ -export default function(data, options = {}) { - const { types = {} } = options; - const { dataFrom, names, nrows, scan } = init(data, options); - const cols = {}; - names.forEach(name => { - const col = dataFrom(data, name, nrows, scan, types[name]); - if (col.length !== nrows) { - error('Column length mismatch'); - } - cols[name] = col; - }); - return arrowTable(cols); -} - -function init(data, options) { - const { columns, limit = Infinity, offset = 0 } = options; - const names = isFunction(columns) ? columns(data) - : isArray(columns) ? columns - : null; - if (isArray(data)) { - return { - dataFrom: dataFromObjects, - names: names || Object.keys(data[0]), - nrows: Math.min(limit, data.length - offset), - scan: scanArray(data, limit, offset) - }; - } else if (isTable(data)) { - return { - dataFrom: dataFromTable, - names: names || data.columnNames(), - nrows: Math.min(limit, data.numRows() - offset), - scan: scanTable(data, limit, offset) - }; - } else { - error('Unsupported input data type'); - } -} - -function isTable(data) { - return data && isFunction(data.reify); -} diff --git a/src/arrow/from-arrow.js b/src/format/from-arrow.js similarity index 100% rename from src/arrow/from-arrow.js rename to src/format/from-arrow.js diff --git a/src/format/load-file.js b/src/format/load-file.js index db5975a..17f5555 100644 --- a/src/format/load-file.js +++ b/src/format/load-file.js @@ -3,7 +3,7 @@ import { readFile } from 'fs'; import fromCSV from './from-csv.js'; import fromFixed from './from-fixed.js'; import fromJSON from './from-json.js'; -import fromArrow from '../arrow/from-arrow.js'; +import fromArrow from './from-arrow.js'; import { from } from '../table/index.js'; import isArray from '../util/is-array.js'; @@ -67,7 +67,7 @@ function loadFile(file, options, parse) { /** * Load an Arrow file from a URL and return a Promise for an Arquero table. * @param {string} path The URL or file path to load. - * @param {LoadOptions & import('../arrow/types.js').ArrowOptions} [options] + * @param {LoadOptions & import('../format/types.js').ArrowOptions} [options] * Arrow format options. * @return {Promise} A Promise for an Arquero table. * @example aq.loadArrow('data/table.arrow') diff --git a/src/format/load-url.js b/src/format/load-url.js index aded0fd..c1e12a9 100644 --- a/src/format/load-url.js +++ b/src/format/load-url.js @@ -1,4 +1,4 @@ -import fromArrow from '../arrow/from-arrow.js'; +import fromArrow from './from-arrow.js'; import fromCSV from './from-csv.js'; import fromFixed from './from-fixed.js'; import fromJSON from './from-json.js'; @@ -43,7 +43,7 @@ export function load(url, options = {}) { /** * Load an Arrow file from a URL and return a Promise for an Arquero table. * @param {string} url The URL to load. - * @param {LoadOptions & import('../arrow/types.js').ArrowOptions} [options] + * @param {LoadOptions & import('./types.js').ArrowOptions} [options] * Arrow format options. * @return {Promise} A Promise for an Arquero table. * @example aq.loadArrow('data/table.arrow') diff --git a/src/arrow/to-arrow-ipc.js b/src/format/to-arrow-ipc.js similarity index 61% rename from src/arrow/to-arrow-ipc.js rename to src/format/to-arrow-ipc.js index 9c079ec..e01abfa 100644 --- a/src/arrow/to-arrow-ipc.js +++ b/src/format/to-arrow-ipc.js @@ -1,9 +1,9 @@ -import { arrowTableToIPC } from './arrow-table.js'; +import { tableToIPC } from '@uwdata/flechette'; import toArrow from './to-arrow.js'; /** * Format a table as binary data in the Apache Arrow IPC format. - * @param {object[]|import('../table/Table.js').Table} data The table data + * @param {import('../table/Table.js').Table} data The table data * @param {import('./types.js').ArrowIPCFormatOptions} [options] * The Arrow IPC formatting options. Set the *format* option to `'stream'` * or `'file'` to specify the IPC format. @@ -11,8 +11,5 @@ import toArrow from './to-arrow.js'; */ export default function(data, options = {}) { const { format = 'stream', ...toArrowOptions } = options; - if (!['stream', 'file'].includes(format)) { - throw Error('Unrecognised Arrow IPC output format'); - } - return arrowTableToIPC(toArrow(data, toArrowOptions), format); + return tableToIPC(toArrow(data, toArrowOptions), { format }); } diff --git a/src/format/to-arrow.js b/src/format/to-arrow.js new file mode 100644 index 0000000..cc9baff --- /dev/null +++ b/src/format/to-arrow.js @@ -0,0 +1,47 @@ +import { columnFromArray, columnFromValues, tableFromColumns } from '@uwdata/flechette'; +import { columns as select } from './util.js'; +import isArrayType from '../util/is-array-type.js'; + +/** + * Create an Apache Arrow table for an input table. + * @param {import('../table/Table.js').Table} table + * An input Arquero table to convert to Arrow format. + * @param {import('./types.js').ArrowFormatOptions} [options] + * Encoding options, including column data types. + * @return {import('@uwdata/flechette').Table} An Arrow Table instance. + */ +export default function(table, options = {}) { + const { columns, limit, offset, types = {}, ...opt } = options; + const names = select(table, columns); + const length = table.size; + const data = table.data(); + + // make a full table scan with no indirection? + const fullScan = offset === 0 + && table.numRows() <= limit + && !table.isFiltered() + && !table.isOrdered(); + + return tableFromColumns(names.map(name => { + const values = data[name]; + const type = types[name]; + const isArray = isArrayType(values); + let col; + if (fullScan && isArray) { + // use faster path, take advantange of any typed arrays + col = columnFromArray(values, type, opt); + } else { + // use table scan method to visit column values + const get = isArray + ? row => values[row] + : row => values.at(row); + col = columnFromValues( + length, + visit => table.scan(row => visit(get(row)), true, limit, offset), + type, + opt + ); + } + return [name, col]; + })); +} diff --git a/src/arrow/types.ts b/src/format/types.ts similarity index 81% rename from src/arrow/types.ts rename to src/format/types.ts index 1f64c76..8142c2a 100644 --- a/src/arrow/types.ts +++ b/src/format/types.ts @@ -1,6 +1,6 @@ -import { ExtractionOptions } from '@uwdata/flechette'; -import { DataType } from 'apache-arrow'; +import { ExtractionOptions, TableBuilderOptions } from '@uwdata/flechette'; import type { ColumnType, Select } from '../table/types.js'; +import { ColumnSelectOptions } from './util.js'; /** Arrow input data as bytes or loaded table. */ export type ArrowInput = @@ -59,26 +59,19 @@ export interface ArrowOptions extends ExtractionOptions { } /** Options for Arrow encoding. */ -export interface ArrowFormatOptions { - /** The maximum number of rows to include (default `Infinity`). */ - limit?: number; - /** - * The row offset (default `0`) indicating how many initial rows to skip. - */ - offset?: number; +export interface ArrowFormatOptions extends TableBuilderOptions { /** * Ordered list of column names to include. If function-valued, the * function should accept a dataset as input and return an array of * column name strings. If unspecified all columns are included. */ - columns?: string[] | ((data: any) => string[]); + columns?: ColumnSelectOptions; + /** The maximum number of rows to include (default `Infinity`). */ + limit?: number; /** - * The Arrow data types to use. If specified, the input should be an - * object with column names for keys and Arrow data types for values. - * If a column type is not explicitly provided, type inference will be - * performed to guess an appropriate type. + * The row offset (default `0`) indicating how many initial rows to skip. */ - types?: Record; + offset?: number; } /** Options for Arrow IPC encoding. */ diff --git a/src/format/util.js b/src/format/util.js index 082fa59..c1105b4 100644 --- a/src/format/util.js +++ b/src/format/util.js @@ -26,9 +26,15 @@ import isFunction from '../util/is-function.js'; * @typedef {Object.} ColumnAlignOptions */ +/** + * Return a potentially filtered list of column names. + * @param {import('../table/Table.js').Table} table A data table. + * @param {ColumnSelectOptions} names The column names to select. + * @returns {string[]} The selected column names. + */ export function columns(table, names) { - return isFunction(names) - ? names(table) + // @ts-ignore + return isFunction(names) ? names(table) : names || table.columnNames(); } diff --git a/src/table/ColumnTable.js b/src/table/ColumnTable.js index 4332065..a903039 100644 --- a/src/table/ColumnTable.js +++ b/src/table/ColumnTable.js @@ -31,8 +31,8 @@ import { unroll } from '../verbs/index.js'; import { count } from '../op/op-api.js'; -import toArrow from '../arrow/to-arrow.js'; -import toArrowIPC from '../arrow/to-arrow-ipc.js'; +import toArrow from '../format/to-arrow.js'; +import toArrowIPC from '../format/to-arrow-ipc.js'; import toCSV from '../format/to-csv.js'; import toHTML from '../format/to-html.js'; import toJSON from '../format/to-json.js'; @@ -785,10 +785,10 @@ export class ColumnTable extends Table { // -- Table Output Formats ------------------------------------------------ /** - * Format this table as an Apache Arrow table. - * @param {import('../arrow/types.js').ArrowFormatOptions} [options] + * Format this table as an Flechette Arrow table. + * @param {import('../format/types.js').ArrowFormatOptions} [options] * The Arrow formatting options. - * @return {import('apache-arrow').Table} An Apache Arrow table. + * @return {import('@uwdata/flechette').Table} A Flechette Arrow table. */ toArrow(options) { return toArrow(this, options); @@ -796,7 +796,7 @@ export class ColumnTable extends Table { /** * Format this table as binary data in the Apache Arrow IPC format. - * @param {import('../arrow/types.js').ArrowIPCFormatOptions} [options] + * @param {import('../format/types.js').ArrowIPCFormatOptions} [options] * The Arrow IPC formatting options. * @return {Uint8Array} A new Uint8Array of Arrow-encoded binary data. */ diff --git a/src/table/Table.js b/src/table/Table.js index 734f8e1..ce79b7a 100644 --- a/src/table/Table.js +++ b/src/table/Table.js @@ -602,8 +602,7 @@ export class Table { * @param {boolean} [order=false] Indicates if the table should be * scanned in the order determined by *orderby*. This * argument has no effect if the table is unordered. - * @property {number} [limit=Infinity] The maximum number of - * objects to create. + * @property {number} [limit=Infinity] The maximum number of row to scan. * @property {number} [offset=0] The row offset indicating how many * initial rows to skip. */ diff --git a/test/arrow/data-from-test.js b/test/arrow/data-from-test.js deleted file mode 100644 index a320cd9..0000000 --- a/test/arrow/data-from-test.js +++ /dev/null @@ -1,153 +0,0 @@ -import assert from 'node:assert'; -import { - Bool, DateDay, DateMillisecond, Dictionary, Field, FixedSizeList, - Float32, Float64, Int16, Int32, Int64, Int8, List, Struct, Table, - Uint16, Uint32, Uint64, Uint8, Utf8, tableToIPC, vectorFromArray -} from 'apache-arrow'; -import { dataFromScan } from '../../src/arrow/encode/data-from.js'; -import { scanTable } from '../../src/arrow/encode/scan.js'; -import { table } from '../../src/index.js'; - -function dataFromTable(table, column, type, nullable) { - const nrows = table.numRows(); - const scan = scanTable(table, Infinity, 0); - return dataFromScan(nrows, scan, column, type, nullable); -} - -function integerTest(type) { - const values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); -} - -function floatTest(type) { - const values = [0, NaN, 1/3, Math.PI, 7, Infinity, -Infinity]; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); -} - -function bigintTest(type) { - const values = [0n, 1n, 10n, 100n, 1000n, 10n ** 10n]; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); -} - -function dateTest(type) { - const date = (y, m = 0, d = 1) => new Date(Date.UTC(y, m, d)); - const values = [ - date(2000, 0, 1), - date(2004, 10, 12), - date(2007, 3, 14), - date(2009, 6, 26), - date(2000, 0, 1), - date(2004, 10, 12), - date(2007, 3, 14), - date(2009, 6, 26), - date(2000, 0, 1), - date(2004, 10, 12) - ]; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); -} - -function valueTest(type, values, msg) { - const dt = table({ values }); - const u = dataFromTable(dt, dt.column('values'), type); - const v = vectorFromArray(values, type); - const tu = new Table({ values: u }); - const tv = new Table({ values: v }); - - assert.equal( - tableToIPC(tu).join(' '), - tableToIPC(tv).join(' '), - 'serialized data matches' + msg - ); -} - -describe('dataFrom', () => { - it('encodes dictionary data', () => { - const type = new Dictionary(new Utf8(), new Uint32(), 0); - const values = ['a', 'b', 'FOO', 'b', 'a']; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); - }); - - it('encodes boolean data', () => { - const type = new Bool(); - const values = [true, false, false, true, false]; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); - }); - - it('encodes date millis data', () => { - dateTest(new DateMillisecond()); - }); - - it('encodes date day data', () => { - dateTest(new DateDay()); - }); - - it('encodes int8 data', () => { - integerTest(new Int8()); - }); - - it('encodes int16 data', () => { - integerTest(new Int16()); - }); - - it('encodes int32 data', () => { - integerTest(new Int32()); - }); - - it('encodes int64 data', () => { - bigintTest(new Int64()); - }); - - it('encodes uint8 data', () => { - integerTest(new Uint8()); - }); - - it('encodes uint16 data', () => { - integerTest(new Uint16()); - }); - - it('encodes uint32 data', () => { - integerTest(new Uint32()); - }); - - it('encodes uint64 data', () => { - bigintTest(new Uint64()); - }); - - it('encodes float32 data', () => { - floatTest(new Float32()); - }); - - it('encodes float64 data', () => { - floatTest(new Float64()); - }); - - it('encodes list data', () => { - const field = Field.new({ name: 'value', type: new Int32() }); - const type = new List(field); - const values = [[1, 2], [3], [4, 5, 6], [7]]; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); - }); - - it('encodes fixed size list data', () => { - const field = Field.new({ name: 'value', type: new Int32() }); - const type = new FixedSizeList(1, field); - const values = [[1], [2], [3], [4], [5], [6]]; - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); - }); - - it('encodes struct data', () => { - const key = Field.new({ name: 'key', type: new Int32() }); - const type = new Struct([key]); - const values = [1, 2, 3, null, 5, 6].map(key => ({ key })); - valueTest(type, values, ', without nulls'); - valueTest(type, [null, ...values, null], ', with nulls'); - }); -}); diff --git a/test/arrow/profiler-test.js b/test/arrow/profiler-test.js deleted file mode 100644 index 900e0ed..0000000 --- a/test/arrow/profiler-test.js +++ /dev/null @@ -1,83 +0,0 @@ -import assert from 'node:assert'; -import { - Float64, Int16, Int32, Int64, Int8, - Uint16, Uint32, Uint64, Uint8, util -} from 'apache-arrow'; -import { profiler } from '../../src/arrow/encode/profiler.js'; - -function profile(array) { - const p = profiler(); - array.forEach(value => p.add(value)); - return p; -} - -function typeCompare(a, b) { - return util.compareTypes(a, b); -} - -describe('profiler', () => { - it('infers integer types', () => { - const types = { - uint8: new Uint8(), - uint16: new Uint16(), - uint32: new Uint32(), - int8: new Int8(), - int16: new Int16(), - int32: new Int32() - }; - - const dt = { - uint8: [0, 1 << 7, 1 << 8 - 1], - uint16: [0, 1 << 15, 1 << 16 - 1], - uint32: [0, 2 ** 31 - 1, 2 ** 32 - 1], - int8: [-(1 << 7), 0, (1 << 7) - 1], - int16: [-(1 << 15), 0, (1 << 15) - 1], - int32: [(1 << 31), 0, 2 ** 31 - 1] - }; - - Object.keys(dt).forEach(name => { - const type = profile(dt[name]).type(); - assert.ok(typeCompare(types[name], type), `${name} type`); - }); - - const float = new Float64(); - assert.ok( - typeCompare(float, profile([0, 1, 2 ** 32]).type()), - 'overflow to float64 type' - ); - assert.ok( - typeCompare(float, profile([(1 << 31), 0, 2 ** 32 - 1]).type()), - 'overflow to float64 type' - ); - assert.ok( - typeCompare(float, profile([(1 << 31) - 1, 0, 1]).type()), - 'underflow to float64 type' - ); - }); - - it('infers bigint types', () => { - const types = { - int64: new Int64(), - uint64: new Uint64() - }; - - const dt = { - int64: [-(2n ** 63n), 0n, (2n ** 63n) - 1n], - uint64: [0n, 1n, 2n ** 64n - 1n] - }; - - Object.keys(dt).forEach(name => { - const type = profile(dt[name]).type(); - assert.ok(typeCompare(types[name], type), `${name} type`); - }); - - assert.throws( - () => profile([0n, 1n, 2n ** 64n]).type(), - 'throws on overflow' - ); - assert.throws( - () => profile([-(2n ** 63n), 0n, 2n ** 63n]).type(), - 'throws on underflow' - ); - }); -}); diff --git a/test/arrow/from-arrow-test.js b/test/format/from-arrow-test.js similarity index 87% rename from test/arrow/from-arrow-test.js rename to test/format/from-arrow-test.js index 826b85b..6a22da5 100644 --- a/test/arrow/from-arrow-test.js +++ b/test/format/from-arrow-test.js @@ -1,23 +1,18 @@ import assert from 'node:assert'; -import { Utf8 } from 'apache-arrow'; +import { tableFromArrays } from 'apache-arrow'; import tableEqual from '../table-equal.js'; -import fromArrow from '../../src/arrow/from-arrow.js'; -import toArrow from '../../src/arrow/to-arrow.js'; -import toArrowIPC from '../../src/arrow/to-arrow-ipc.js'; +import fromArrow from '../../src/format/from-arrow.js'; +import toArrow from '../../src/format/to-arrow.js'; import { not } from '../../src/helpers/selection.js'; import { table } from '../../src/index-browser.js'; -import { tableFromIPC } from '@uwdata/flechette'; +import { Type, utf8 } from '@uwdata/flechette'; -function arrowTable(data, types) { - return toArrow(table(data), { types }); -} - -function arrowIPC(data, types) { - return toArrowIPC(table(data), { types }); +function arrowTable(data) { + return tableFromArrays(data); } function flechetteTable(data, types) { - return tableFromIPC(arrowIPC(data, types)); + return toArrow(table(data), { types }); } function getType(table, name) { @@ -42,7 +37,7 @@ describe('fromArrow', () => { x: ['cc', 'dd', 'cc', 'dd', 'cc'], y: ['aa', 'aa', null, 'bb', 'bb'] }; - const at = arrowTable(data, { v: new Utf8() }); + const at = arrowTable(data); const dt = fromArrow(at); tableEqual(dt, data, 'arrow data'); @@ -92,7 +87,7 @@ describe('fromArrow', () => { x: ['cc', 'dd', 'cc', 'dd', 'cc'], y: ['aa', 'aa', null, 'bb', 'bb'] }; - const at = flechetteTable(data, { v: new Utf8() }); + const at = flechetteTable(data, { v: utf8() }); const dt = fromArrow(at); tableEqual(dt, data, 'arrow data'); @@ -130,7 +125,7 @@ describe('fromArrow', () => { const l = [[1, 2, 3], null, [4, 5]]; const at = flechetteTable({ l }); - if (getType(at, 'l').typeId !== 12) { + if (getType(at, 'l').typeId !== Type.List) { assert.fail('Arrow column should have List type'); } tableEqual(fromArrow(at), { l }, 'extract Arrow list'); @@ -140,7 +135,7 @@ describe('fromArrow', () => { const l = [[1, 2], null, [4, 5]]; const at = flechetteTable({ l }); - if (getType(at, 'l').typeId !== 16) { + if (getType(at, 'l').typeId !== Type.FixedSizeList) { assert.fail('Arrow column should have FixedSizeList type'); } tableEqual(fromArrow(at), { l }, 'extract Arrow list'); @@ -150,7 +145,7 @@ describe('fromArrow', () => { const s = [{ foo: 1, bar: [2, 3] }, null, { foo: 2, bar: [4] }]; const at = flechetteTable({ s }); - if (getType(at, 's').typeId !== 13) { + if (getType(at, 's').typeId !== Type.Struct) { assert.fail('Arrow column should have Struct type'); } tableEqual(fromArrow(at), { s }, 'extract Arrow struct'); @@ -160,7 +155,7 @@ describe('fromArrow', () => { const s = [{ foo: 1, bar: { bop: 2 } }, { foo: 2, bar: { bop: 3 } }]; const at = flechetteTable({ s }); - if (getType(at, 's').typeId !== 13) { + if (getType(at, 's').typeId !== Type.Struct) { assert.fail('Arrow column should have Struct type'); } tableEqual(fromArrow(at), { s }, 'extract nested Arrow struct'); diff --git a/test/format/to-arrow-ipc-test.js b/test/format/to-arrow-ipc-test.js new file mode 100644 index 0000000..a90ff02 --- /dev/null +++ b/test/format/to-arrow-ipc-test.js @@ -0,0 +1,64 @@ +import assert from 'node:assert'; +import { table, toArrowIPC } from '../../src/index.js'; + +describe('toArrowIPC', () => { + it('generates the correct output for file option', () => { + const dt = table({ + w: ['a', 'b', 'a'], + x: [1, 2, 3], + y: [1.6181, 2.7182, 3.1415], + z: [true, true, false] + }); + + const buffer = toArrowIPC(dt, { format: 'file' }); + + assert.deepEqual( + buffer.slice(0, 8), + new Uint8Array([65, 82, 82, 79, 87, 49, 0, 0]) + ); + }); + + it('generates the correct output for stream option', () => { + const dt = table({ + w: ['a', 'b', 'a'], + x: [1, 2, 3], + y: [1.6181, 2.7182, 3.1415], + z: [true, true, false] + }); + + const buffer = toArrowIPC(dt, { format: 'stream' }); + + assert.deepEqual( + buffer.slice(0, 4), + new Uint8Array([255, 255, 255, 255]) + ); + }); + + it('defaults to using stream option', () => { + const dt = table({ + w: ['a', 'b', 'a'], + x: [1, 2, 3], + y: [1.6181, 2.7182, 3.1415], + z: [true, true, false] + }); + + const buffer = toArrowIPC(dt); + + assert.deepEqual( + buffer.slice(0, 4), + new Uint8Array([255, 255, 255, 255]) + ); + }); + + it('throws an error if the format is not stream or file', () => { + assert.throws(() => { + const dt = table({ + w: ['a', 'b', 'a'], + x: [1, 2, 3], + y: [1.6181, 2.7182, 3.1415], + z: [true, true, false] + }); + toArrowIPC(dt, { format: 'nonsense' }); + }, 'Unrecognized output format'); + }); +}); diff --git a/test/arrow/to-arrow-test.js b/test/format/to-arrow-test.js similarity index 56% rename from test/arrow/to-arrow-test.js rename to test/format/to-arrow-test.js index 1858f4d..13cf059 100644 --- a/test/arrow/to-arrow-test.js +++ b/test/format/to-arrow-test.js @@ -1,10 +1,13 @@ import assert from 'node:assert'; import { readFileSync } from 'node:fs'; import { - Int8, Type, tableFromIPC, tableToIPC, vectorFromArray -} from 'apache-arrow'; + Type, bool, columnFromArray, dateDay, dateMillisecond, dictionary, + fixedSizeList, float32, float64, int16, int32, int64, int8, list, struct, + tableFromColumns, tableFromIPC, tableToIPC, uint16, uint32, uint64, uint8, + utf8 +} from '@uwdata/flechette'; import { - fromArrow, fromCSV, fromJSON, table, toArrow, toArrowIPC, toJSON + fromArrow, fromCSV, fromJSON, table, toArrow, toJSON } from '../../src/index.js'; function date(year, month=0, date=1, hours=0, minutes=0, seconds=0, ms=0) { @@ -15,8 +18,8 @@ function utc(year, month=0, date=1, hours=0, minutes=0, seconds=0, ms=0) { return new Date(Date.UTC(year, month, date, hours, minutes, seconds, ms)); } -function Int8Vector(data) { - return vectorFromArray(data, new Int8); +function Int8Column(data) { + return columnFromArray(data, int8()); } function isArrayType(value) { @@ -54,6 +57,61 @@ function compareColumns(name, aqt, art) { return err; } +function columnFromTable(table, name, type) { + const at = toArrow(table, { types: { [name]: type } }); + return at.getChild(name); +} + +function integerTest(type) { + const values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); +} + +function floatTest(type) { + const values = [0, NaN, 1/3, Math.PI, 7, Infinity, -Infinity]; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); +} + +function bigintTest(type) { + const values = [0n, 1n, 10n, 100n, 1000n, 10n ** 10n]; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); +} + +function dateTest(type) { + const date = (y, m = 0, d = 1) => new Date(Date.UTC(y, m, d)); + const values = [ + date(2000, 0, 1), + date(2004, 10, 12), + date(2007, 3, 14), + date(2009, 6, 26), + date(2000, 0, 1), + date(2004, 10, 12), + date(2007, 3, 14), + date(2009, 6, 26), + date(2000, 0, 1), + date(2004, 10, 12) + ]; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); +} + +function valueTest(type, values, msg) { + const dt = table({ values }); + const u = columnFromTable(dt, 'values', type); + const v = columnFromArray(values, type); + const tu = tableFromColumns({ values: u }); + const tv = tableFromColumns({ values: v }); + + assert.equal( + tableToIPC(tu).join(' '), + tableToIPC(tv).join(' '), + 'serialized data matches' + msg + ); +} + describe('toArrow', () => { it('produces Arrow data for an input table', () => { const dt = table({ @@ -65,7 +123,7 @@ describe('toArrow', () => { d: [date(2000,0,1), date(2000,1,2), null, date(2010,6,9), date(2018,0,1), date(2020,10,3)], u: [utc(2000,0,1), utc(2000,1,2), null, utc(2010,6,9), utc(2018,0,1), utc(2020,10,3)], e: [null, null, null, null, null, null], - v: Int8Vector([10, 9, 8, 7, 6, 5]), + v: Int8Column([10, 9, 8, 7, 6, 5]), a: [[1, null, 3], [4, 5], null, [6, 7], [8, 9], []], l: [[1], [2], [3], [4], [5], [6]], o: [1, 2, 3, null, 5, 6].map(v => v ? { key: v } : null) @@ -78,11 +136,6 @@ describe('toArrow', () => { 'arquero and arrow tables match' ); - assert.equal( - compareTables(dt, toArrow(dt.objects())), 0, - 'object array and arrow tables match' - ); - const buffer = tableToIPC(at); const bt = tableFromIPC(buffer); @@ -107,11 +160,6 @@ describe('toArrow', () => { 'arquero and arrow tables match' ); - assert.equal( - compareTables(st, toArrow(st.objects())), 0, - 'object array and arrow tables match' - ); - const buffer = tableToIPC(at); assert.equal( @@ -125,12 +173,10 @@ describe('toArrow', () => { ); }); - it('handles ambiguously typed data', async () => { - const at = toArrow(table({ x: [1, 2, 3, 'foo'] })); - assert.deepEqual( - [...at.getChild('x')], - ['1', '2', '3', 'foo'], - 'fallback to string type if a string is observed' + it('throws on ambiguously typed data', async () => { + assert.throws( + () => toArrow(table({ x: [1, 2, 3, 'foo'] })), + 'fail on mixed types' ); assert.throws( @@ -207,7 +253,7 @@ describe('toArrow', () => { }); const at = toArrow(dt, { - types: { w: Type.Utf8, x: Type.Int32, y: Type.Float32 } + types: { w: utf8(), x: int32(), y: float32() } }); const types = ['w', 'x', 'y', 'z'].map(name => at.getChild(name).type); @@ -220,66 +266,87 @@ describe('toArrow', () => { assert.equal(types[1].bitWidth, 32, 'int32'); assert.equal(types[2].precision, 1, 'float32'); }); -}); -describe('toArrowIPC', () => { - it('generates the correct output for file option', () => { - const dt = table({ - w: ['a', 'b', 'a'], - x: [1, 2, 3], - y: [1.6181, 2.7182, 3.1415], - z: [true, true, false] - }); + it('encodes dictionary data', () => { + const type = dictionary(utf8(), int32()); + const values = ['a', 'b', 'FOO', 'b', 'a']; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); + }); - const buffer = toArrowIPC(dt, { format: 'file' }); + it('encodes boolean data', () => { + const type = bool(); + const values = [true, false, false, true, false]; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); + }); - assert.deepEqual( - buffer.slice(0, 8), - new Uint8Array([65, 82, 82, 79, 87, 49, 0, 0]) - ); + it('encodes date millis data', () => { + dateTest(dateMillisecond()); }); - it('generates the correct output for stream option', () => { - const dt = table({ - w: ['a', 'b', 'a'], - x: [1, 2, 3], - y: [1.6181, 2.7182, 3.1415], - z: [true, true, false] - }); + it('encodes date day data', () => { + dateTest(dateDay()); + }); - const buffer = toArrowIPC(dt, { format: 'stream' }); + it('encodes int8 data', () => { + integerTest(int8()); + }); - assert.deepEqual( - buffer.slice(0, 8), - new Uint8Array([255, 255, 255, 255, 88, 1, 0, 0]) - ); + it('encodes int16 data', () => { + integerTest(int16()); }); - it('defaults to using stream option', () => { - const dt = table({ - w: ['a', 'b', 'a'], - x: [1, 2, 3], - y: [1.6181, 2.7182, 3.1415], - z: [true, true, false] - }); + it('encodes int32 data', () => { + integerTest(int32()); + }); - const buffer = toArrowIPC(dt); + it('encodes int64 data', () => { + bigintTest(int64()); + }); - assert.deepEqual( - buffer.slice(0, 8), - new Uint8Array([255, 255, 255, 255, 88, 1, 0, 0]) - ); + it('encodes uint8 data', () => { + integerTest(uint8()); + }); + + it('encodes uint16 data', () => { + integerTest(uint16()); + }); + + it('encodes uint32 data', () => { + integerTest(uint32()); + }); + + it('encodes uint64 data', () => { + bigintTest(uint64()); + }); + + it('encodes float32 data', () => { + floatTest(float32()); + }); + + it('encodes float64 data', () => { + floatTest(float64()); + }); + + it('encodes list data', () => { + const type = list(int32()); + const values = [[1, 2], [3], [4, 5, 6], [7]]; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); + }); + + it('encodes fixed size list data', () => { + const type = fixedSizeList(int32(), 1); + const values = [[1], [2], [3], [4], [5], [6]]; + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); }); - it('throws an error if the format is not stream or file', () => { - assert.throws(() => { - const dt = table({ - w: ['a', 'b', 'a'], - x: [1, 2, 3], - y: [1.6181, 2.7182, 3.1415], - z: [true, true, false] - }); - toArrowIPC(dt, { format: 'nonsense' }); - }, 'Unrecognized output format'); + it('encodes struct data', () => { + const type = struct({ key: int32() }); + const values = [1, 2, 3, null, 5, 6].map(key => ({ key })); + valueTest(type, values, ', without nulls'); + valueTest(type, [null, ...values, null], ', with nulls'); }); }); diff --git a/test/verbs/reify-test.js b/test/verbs/reify-test.js index 228695e..5976942 100644 --- a/test/verbs/reify-test.js +++ b/test/verbs/reify-test.js @@ -1,5 +1,5 @@ import tableEqual from '../table-equal.js'; -import { fromArrow, table, toArrowIPC } from '../../src/index.js'; +import { from, fromArrow, table, toArrowIPC } from '../../src/index.js'; describe('reify', () => { it('materializes filtered and ordered tables', () => { @@ -19,12 +19,12 @@ describe('reify', () => { }); it('preserves binary data', () => { - const data = [ + const data = from([ { a: 1.0, b: 'a', c: [1], d: new Date(2000, 0, 1, 1) }, { a: 1.3, b: 'b', c: [2], d: new Date(2001, 1, 1, 2) }, { a: 1.5, b: 'c', c: [3], d: new Date(2002, 2, 1, 3) }, { a: 1.7, b: 'd', c: [4], d: new Date(2003, 3, 1, 4) } - ]; + ]); const dt = fromArrow(toArrowIPC(data)); const rt = dt.filter(d => d.b !== 'c').reify(); From 0e6d6e506228b36a9f150430a7db58205252c831 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 08:26:32 -0700 Subject: [PATCH 10/16] test: Modify non-existent URLs. --- test/format/load-file-url-test.js | 2 +- test/format/load-url-test.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/format/load-file-url-test.js b/test/format/load-file-url-test.js index 5a8bb42..283dfa2 100644 --- a/test/format/load-file-url-test.js +++ b/test/format/load-file-url-test.js @@ -28,7 +28,7 @@ describe('load file url', () => { it('fails on non-existent path', async () => { try { - await load('https://foo.bar.baz/does.not.exist'); + await load('https://foo.bar.test/does.not.exist'); assert.fail('did not fail'); } catch (err) { // eslint-disable-line no-unused-vars assert.ok(true, 'failed appropriately'); diff --git a/test/format/load-url-test.js b/test/format/load-url-test.js index 21edc25..ca54dc3 100644 --- a/test/format/load-url-test.js +++ b/test/format/load-url-test.js @@ -32,7 +32,7 @@ describe('load url', () => { it('fails on non-existent path', async () => { try { - await load('https://foo.bar.baz/does.not.exist'); + await load('https://foo.bar.test/does.not.exist'); assert.fail('did not fail'); } catch (err) { // eslint-disable-line no-unused-vars assert.ok(true, 'failed appropriately'); From 76763498f330d522ca38858272cf99b0daa5e627 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 08:27:08 -0700 Subject: [PATCH 11/16] chore: Bump flechette to v1.0.0. --- package-lock.json | 37 ++++++++++++++++++------------------- package.json | 4 ++-- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/package-lock.json b/package-lock.json index 028b73e..2ad9fbe 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "6.0.1", "license": "BSD-3-Clause", "dependencies": { - "@uwdata/flechette": "1.0.0-beta.2", + "@uwdata/flechette": "^1.0.0", "acorn": "^8.12.1", "node-fetch": "^3.3.2" }, @@ -22,7 +22,7 @@ "rimraf": "^6.0.1", "rollup": "^4.21.3", "rollup-plugin-bundle-size": "^1.0.3", - "tape": "^5.8.1", + "tape": "^5.9.0", "typescript": "^5.6.2" } }, @@ -637,9 +637,9 @@ "license": "MIT" }, "node_modules/@uwdata/flechette": { - "version": "1.0.0-beta.2", - "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-1.0.0-beta.2.tgz", - "integrity": "sha512-7RdHjuPZ2HqWUHUdeG0YElrZC55NS8ftQCByuyxYpjFqidbsmx8ntekc6k1uT3vVVUMcyo7pycEhfLKEEkdbsQ==" + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-1.0.0.tgz", + "integrity": "sha512-REQiIMDCjMo6KnSVl+GK8jccnJ4Z5CSzeHeyeuxVBTXgGzAPUxem7et084lyy6Hs5lC0LK0wxPEBWivNNXtusQ==" }, "node_modules/acorn": { "version": "8.12.1", @@ -3113,18 +3113,18 @@ } }, "node_modules/mock-property": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/mock-property/-/mock-property-1.0.3.tgz", - "integrity": "sha512-2emPTb1reeLLYwHxyVx993iYyCHEiRRO+y8NFXFPL5kl5q14sgTK76cXyEKkeKCHeRw35SfdkUJ10Q1KfHuiIQ==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mock-property/-/mock-property-1.1.0.tgz", + "integrity": "sha512-1/JjbLoGwv87xVsutkX0XJc0M0W4kb40cZl/K41xtTViBOD9JuFPKfyMNTrLJ/ivYAd0aPqu/vduamXO0emTFQ==", "dev": true, - "license": "MIT", "dependencies": { - "define-data-property": "^1.1.1", + "define-data-property": "^1.1.4", "functions-have-names": "^1.2.3", "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.0", - "hasown": "^2.0.0", - "isarray": "^2.0.5" + "has-property-descriptors": "^1.0.2", + "hasown": "^2.0.2", + "isarray": "^2.0.5", + "object-inspect": "^1.13.2" }, "engines": { "node": ">= 0.4" @@ -4084,11 +4084,10 @@ } }, "node_modules/tape": { - "version": "5.8.1", - "resolved": "https://registry.npmjs.org/tape/-/tape-5.8.1.tgz", - "integrity": "sha512-pUzADXBVYm5Jkneh9hfXnirADrzQrDA3vddKbPOc/ZLORj4dFQ6GR1KdGWX0/NvOLDcYkVgeMdw78Uf6BzO3KA==", + "version": "5.9.0", + "resolved": "https://registry.npmjs.org/tape/-/tape-5.9.0.tgz", + "integrity": "sha512-czbGgxSVwRlbB3Ly/aqQrNwrDAzKHDW/kVXegp4hSFmR2c8qqm3hCgZbUy1+3QAQFGhPDG7J56UsV1uNilBFCA==", "dev": true, - "license": "MIT", "dependencies": { "@ljharb/resumer": "^0.1.3", "@ljharb/through": "^2.3.13", @@ -4105,8 +4104,8 @@ "inherits": "^2.0.4", "is-regex": "^1.1.4", "minimist": "^1.2.8", - "mock-property": "^1.0.3", - "object-inspect": "^1.13.1", + "mock-property": "^1.1.0", + "object-inspect": "^1.13.2", "object-is": "^1.1.6", "object-keys": "^1.1.1", "object.assign": "^4.1.5", diff --git a/package.json b/package.json index 19d325b..89389f9 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { - "@uwdata/flechette": "1.0.0-beta.2", + "@uwdata/flechette": "^1.0.0", "acorn": "^8.12.1", "node-fetch": "^3.3.2" }, @@ -50,7 +50,7 @@ "rimraf": "^6.0.1", "rollup": "^4.21.3", "rollup-plugin-bundle-size": "^1.0.3", - "tape": "^5.8.1", + "tape": "^5.9.0", "typescript": "^5.6.2" } } From c743889b0a06be68b60abea65ddf0ab8df676586 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 10:27:24 -0700 Subject: [PATCH 12/16] chore: Update dependencies. --- package-lock.json | 8 ++++---- package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index 2ad9fbe..f754d74 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "6.0.1", "license": "BSD-3-Clause", "dependencies": { - "@uwdata/flechette": "^1.0.0", + "@uwdata/flechette": "^1.0.1", "acorn": "^8.12.1", "node-fetch": "^3.3.2" }, @@ -637,9 +637,9 @@ "license": "MIT" }, "node_modules/@uwdata/flechette": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-1.0.0.tgz", - "integrity": "sha512-REQiIMDCjMo6KnSVl+GK8jccnJ4Z5CSzeHeyeuxVBTXgGzAPUxem7et084lyy6Hs5lC0LK0wxPEBWivNNXtusQ==" + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-1.0.1.tgz", + "integrity": "sha512-SWpFXBJ7A4FydeYLLX+SWgr+/VgmZozL5FzOUPSrOcVbo/nSw8ZcyiMIMNiJzkv1Sd30DTE8KHWTnn6rLLQ11w==" }, "node_modules/acorn": { "version": "8.12.1", diff --git a/package.json b/package.json index 89389f9..fb26d7b 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { - "@uwdata/flechette": "^1.0.0", + "@uwdata/flechette": "^1.0.1", "acorn": "^8.12.1", "node-fetch": "^3.3.2" }, From d7aa8fd5da95708c90a1b5bedf5cafc87b085eb8 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 10:31:30 -0700 Subject: [PATCH 13/16] feat: Use toArray in toArrow when feasible. --- src/format/from-arrow.js | 2 +- src/format/to-arrow.js | 9 ++--- src/format/types.ts | 2 +- src/table/types.ts | 2 ++ test/format/to-arrow-test.js | 66 +++++++++++++++++++++++++++++++++++- 5 files changed, 74 insertions(+), 7 deletions(-) diff --git a/src/format/from-arrow.js b/src/format/from-arrow.js index 23d0921..5898100 100644 --- a/src/format/from-arrow.js +++ b/src/format/from-arrow.js @@ -30,7 +30,7 @@ export default function(input, options) { // build Arquero columns for backing Arrow columns const cols = columnSet(); sel.forEach((name, key) => { - const col = arrow.getChild(key); + const col = /** @type {import('./types.js').ArrowColumn} */ (arrow.getChild(key)); cols.add(name, col.type.typeId === -1 ? dictionary(col) : col); }); diff --git a/src/format/to-arrow.js b/src/format/to-arrow.js index cc9baff..6be25be 100644 --- a/src/format/to-arrow.js +++ b/src/format/to-arrow.js @@ -1,6 +1,7 @@ import { columnFromArray, columnFromValues, tableFromColumns } from '@uwdata/flechette'; import { columns as select } from './util.js'; import isArrayType from '../util/is-array-type.js'; +import isFunction from '../util/is-function.js'; /** * Create an Apache Arrow table for an input table. @@ -11,7 +12,7 @@ import isArrayType from '../util/is-array-type.js'; * @return {import('@uwdata/flechette').Table} An Arrow Table instance. */ export default function(table, options = {}) { - const { columns, limit, offset, types = {}, ...opt } = options; + const { columns, limit = Infinity, offset = 0, types = {}, ...opt } = options; const names = select(table, columns); const length = table.size; const data = table.data(); @@ -27,9 +28,9 @@ export default function(table, options = {}) { const type = types[name]; const isArray = isArrayType(values); let col; - if (fullScan && isArray) { - // use faster path, take advantange of any typed arrays - col = columnFromArray(values, type, opt); + if (fullScan && (isArray || isFunction(values.toArray))) { + // @ts-ignore - use faster path, takes advantange of typed arrays + col = columnFromArray(isArray ? values : values.toArray(), type, opt); } else { // use table scan method to visit column values const get = isArray diff --git a/src/format/types.ts b/src/format/types.ts index 8142c2a..5b9d0c2 100644 --- a/src/format/types.ts +++ b/src/format/types.ts @@ -12,7 +12,7 @@ export type ArrowInput = export interface ArrowColumn extends ColumnType { type: ArrowDataType; nullCount: number; - toArray(): ColumnType + toArray(): ColumnType; } /** Minimal interface for an Arrow data type. */ diff --git a/src/table/types.ts b/src/table/types.ts index 8c889fe..bee007a 100644 --- a/src/table/types.ts +++ b/src/table/types.ts @@ -15,6 +15,8 @@ export interface ColumnType { at(row: number): T; /** Return a column value iterator. */ [Symbol.iterator]() : Iterator; + /** Optional toArray method. */ + toArray?() : ColumnType; } /** A named collection of columns. */ diff --git a/test/format/to-arrow-test.js b/test/format/to-arrow-test.js index 13cf059..d2b7c0f 100644 --- a/test/format/to-arrow-test.js +++ b/test/format/to-arrow-test.js @@ -116,7 +116,7 @@ describe('toArrow', () => { it('produces Arrow data for an input table', () => { const dt = table({ i: [1, 2, 3, undefined, 4, 5], - f: Float32Array.from([1.2, 2.3, 3.0, 3.4, null, 4.5]), + f: Float32Array.from([1.2, 2.3, 3.0, 3.4, -1.3, 4.5]), n: [4.5, 4.4, 3.4, 3.0, 2.3, 1.2], b: [true, true, false, true, null, false], s: ['foo', null, 'bar', 'baz', 'baz', 'bar'], @@ -173,6 +173,70 @@ describe('toArrow', () => { ); }); + it('produces Arrow data from mixed inputs', () => { + const dt0 = table({ + i: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + f: Float32Array.from([1.2, 2.3, 3.0, 3.4, 4.5, 5.4, 6.5, 7.6, 8.7, 9.2]) + }); + + // create an arrow table with multiple record batches + // then derive a new table + const at0 = toArrow(dt0, { maxBatchRows: 4 }); + const dt = fromArrow(at0).derive({ sum: d => d.i + d.f }); + const at = toArrow(dt); + + assert.equal( + compareTables(dt, at), 0, + 'arquero and arrow tables match' + ); + + const buffer = tableToIPC(at); + const bt = tableFromIPC(buffer); + + assert.equal( + compareTables(dt, bt), 0, + 'arquero and serialized arrow tables match' + ); + + assert.equal( + compareTables(fromArrow(bt), at), 0, + 'serialized arquero and arrow tables match' + ); + }); + + it('produces Arrow data from filtered mixed inputs', () => { + const dt0 = table({ + i: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + f: Float32Array.from([1.2, 2.3, 3.0, 3.4, 4.5, 5.4, 6.5, 7.6, 8.7, 9.2]) + }); + + // create an arrow table with multiple record batches + // then derive a new table + const at0 = toArrow(dt0, { maxBatchRows: 4 }); + const dt = fromArrow(at0) + .derive({ sum: d => d.i + d.f }) + .filter(d => d.i % 2 === 0); + const at = toArrow(dt); + + assert.equal( + compareTables(dt, at), 0, + 'arquero and arrow tables match' + ); + + const buffer = tableToIPC(at); + const bt = tableFromIPC(buffer); + + assert.equal( + compareTables(dt, bt), 0, + 'arquero and serialized arrow tables match' + ); + + assert.equal( + compareTables(fromArrow(bt), at), 0, + 'serialized arquero and arrow tables match' + ); + }); + it('throws on ambiguously typed data', async () => { assert.throws( () => toArrow(table({ x: [1, 2, 3, 'foo'] })), From e70e8db8855688bfd689000ef13efbdd4190cbd4 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 15:42:53 -0700 Subject: [PATCH 14/16] feat: Use updated flechette columnFromValues. --- package-lock.json | 8 ++++---- package.json | 2 +- src/format/to-arrow.js | 2 -- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/package-lock.json b/package-lock.json index f754d74..7c46b5e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "6.0.1", "license": "BSD-3-Clause", "dependencies": { - "@uwdata/flechette": "^1.0.1", + "@uwdata/flechette": "^1.1.0", "acorn": "^8.12.1", "node-fetch": "^3.3.2" }, @@ -637,9 +637,9 @@ "license": "MIT" }, "node_modules/@uwdata/flechette": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-1.0.1.tgz", - "integrity": "sha512-SWpFXBJ7A4FydeYLLX+SWgr+/VgmZozL5FzOUPSrOcVbo/nSw8ZcyiMIMNiJzkv1Sd30DTE8KHWTnn6rLLQ11w==" + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@uwdata/flechette/-/flechette-1.1.0.tgz", + "integrity": "sha512-UG25ytXRsElGDSIsvCuEUUkX/XImdKe7jnZF2Y5Q9uYYQH6WO60F7yLJ/sNRBJqHdn4A0QhTAOuOJevDZZcF8Q==" }, "node_modules/acorn": { "version": "8.12.1", diff --git a/package.json b/package.json index fb26d7b..a227fc1 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "prepublishOnly": "npm test && npm run build" }, "dependencies": { - "@uwdata/flechette": "^1.0.1", + "@uwdata/flechette": "^1.1.0", "acorn": "^8.12.1", "node-fetch": "^3.3.2" }, diff --git a/src/format/to-arrow.js b/src/format/to-arrow.js index 6be25be..c7ae3f8 100644 --- a/src/format/to-arrow.js +++ b/src/format/to-arrow.js @@ -14,7 +14,6 @@ import isFunction from '../util/is-function.js'; export default function(table, options = {}) { const { columns, limit = Infinity, offset = 0, types = {}, ...opt } = options; const names = select(table, columns); - const length = table.size; const data = table.data(); // make a full table scan with no indirection? @@ -37,7 +36,6 @@ export default function(table, options = {}) { ? row => values[row] : row => values.at(row); col = columnFromValues( - length, visit => table.scan(row => visit(get(row)), true, limit, offset), type, opt From 10ffbcb7b30e9b61ddfd316db012f838dd592e45 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 19:08:22 -0700 Subject: [PATCH 15/16] build: Update rollup config. --- rollup.config.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/rollup.config.js b/rollup.config.js index ce0e61b..568a86c 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -8,18 +8,10 @@ const plugins = [ nodeResolve({ modulesOnly: true }) ]; -function onwarn(warning) { - if (warning.code !== 'CIRCULAR_DEPENDENCY') { - // eslint-disable-next-line - console.error(`(!) ${warning.message}`); - } -} - export default [ { input: 'src/index-browser.js', plugins, - onwarn, output: [ { file: 'dist/arquero.js', From 3d98790a73725f9fe4c7531becc99c7e255e8263 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 19:08:34 -0700 Subject: [PATCH 16/16] docs: Update docs and jsdoc. --- docs/api/table.md | 2 +- src/table/ColumnTable.js | 2 +- src/table/Table.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api/table.md b/docs/api/table.md index 4d6f77d..79187cd 100644 --- a/docs/api/table.md +++ b/docs/api/table.md @@ -581,8 +581,8 @@ Format this table as an [Apache Arrow](https://arrow.apache.org/overview/) table * *offset*: The row offset indicating how many initial rows to skip (default `0`). * *types*: An optional object indicating the [Arrow data type](https://idl.uw.edu/flechette/api/data-types) to use for named columns. If specified, the input should be an object with column names for keys and Arrow data types for values. Type values must be instantiated Flechette [DataType](https://idl.uw.edu/flechette/api/data-types) instances (for example, `float64()`,`dateDay()`, `list(int32())` *etc.*). If a column's data type is not explicitly provided, type inference will be performed. * *useBigInt*: Boolean flag (default `false`) to extract 64-bit integer types as JavaScript `BigInt` values. For Flechette tables, the default is to coerce 64-bit integers to JavaScript numbers and raise an error if the number is out of range. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. - * *useDecimalBigInt*: Boolean flag (default `false`) to extract Arrow decimal-type data as BigInt values, where fractional digits are scaled to integers. Otherwise, decimals are (sometimes lossily) converted to floating-point numbers (default). This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. * *useDate*: Boolean flag (default `true`) to convert Arrow date and timestamp values to JavaScript Date objects. Otherwise, numeric timestamps are used. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. + * *useDecimalBigInt*: Boolean flag (default `false`) to extract Arrow decimal-type data as BigInt values, where fractional digits are scaled to integers. Otherwise, decimals are (sometimes lossily) converted to floating-point numbers (default). This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. * *useMap*: Boolean flag (default `false`) to represent Arrow Map data as JavaScript `Map` values. For Flechette tables, the default is to produce an array of `[key, value]` arrays. This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. * *useProxy*: Boolean flag (default `false`) to extract Arrow Struct values and table row objects using zero-copy proxy objects that extract data from underlying Arrow batches. The proxy objects can improve performance and reduce memory usage, but do not support property enumeration (`Object.keys`, `Object.values`, `Object.entries`) or spreading (`{ ...object }`). This option is only applied when parsing IPC binary data, otherwise the settings of the provided table instance are used. diff --git a/src/table/ColumnTable.js b/src/table/ColumnTable.js index a903039..1f4be3e 100644 --- a/src/table/ColumnTable.js +++ b/src/table/ColumnTable.js @@ -785,7 +785,7 @@ export class ColumnTable extends Table { // -- Table Output Formats ------------------------------------------------ /** - * Format this table as an Flechette Arrow table. + * Format this table as a Flechette Arrow table. * @param {import('../format/types.js').ArrowFormatOptions} [options] * The Arrow formatting options. * @return {import('@uwdata/flechette').Table} A Flechette Arrow table. diff --git a/src/table/Table.js b/src/table/Table.js index ce79b7a..606d0d2 100644 --- a/src/table/Table.js +++ b/src/table/Table.js @@ -602,7 +602,7 @@ export class Table { * @param {boolean} [order=false] Indicates if the table should be * scanned in the order determined by *orderby*. This * argument has no effect if the table is unordered. - * @property {number} [limit=Infinity] The maximum number of row to scan. + * @property {number} [limit=Infinity] The maximum number of rows to scan. * @property {number} [offset=0] The row offset indicating how many * initial rows to skip. */