Skip to content

Commit

Permalink
Add batch and split-to-files commands.
Browse files Browse the repository at this point in the history
  • Loading branch information
ashleydavis committed Oct 6, 2023
1 parent a74e719 commit 9aa10b3
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 26 deletions.
8 changes: 8 additions & 0 deletions cli/batch
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env node

'use strict';

const { run } = require("../build/lib/command");
const { main, documentation } = require('../build/cli/batch.js');

run(main, documentation);
8 changes: 8 additions & 0 deletions cli/split-to-files
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env node

'use strict';

const { run } = require("../build/lib/command");
const { main, documentation } = require('../build/cli/split-to-files.js');

run(main, documentation);
58 changes: 58 additions & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ map --help

# Commands

- [batch](#batch)
- [concat](#concat)
- [distinct](#distinct)
- [filter](#filter)
Expand All @@ -41,6 +42,63 @@ map --help
- [to-yaml](#to-yaml)
- [transform](#transform)

# batch

Outputs the input dataset as an array of arrays, each sub-array containing the specified number of records.

## Syntax

```bash
batch <batch-size> [<input-file>] [<output-file>]
```

## Inputs

Input can be 1 of the following:

- JSON file
- CSV file
- YAML file
- JSON formatted array on standard input.

## Outputs

Output can be one of the following:

- JSON file
- YAML file
- JSON formatted data on standard output.

## Arguments

- **batch-size** - Specifies the size for each batch.
- **input-file** - Can be an input file name (json, csv or yaml) or a hypen to indicate reading JSON data from standard input. Can be omitted if there are no further arguments.
- **output-file** - The name of a file (json, csv or yaml) to output the resulting dataset to. Omitting this causes JSON output to be written to standard output.



## Examples

### Reads JSON data from standard input, batches the records into groups of 5 and writes to standard output

```bash
command-that-produces-json | batch 5
```
### Reads data from a file, batches the records into groups of 5 and writes to standard output

```bash
batch 5 input-file.csv
```
### Reads data from a file, batches the records into groups of 5 and writes output to another file

```bash
batch 5 input-file.csv output-file.csv
```
### Reads JSON data from standard input, applies the transformation and writes output to another file

```bash
command-that-produces-json | batch 5 - output-file.csv
```
# concat

Creates an output dataset by concatenating multiple input datasets. Works like `array.concat` in JavaScript.
Expand Down
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "datakit",
"version": "1.0.18",
"version": "1.0.19",
"description": "Simple JavaScript toolkit for data transform across JSON, CSV and YAML.",
"main": "build/index.js",
"types": "build/index.d.ts",
Expand Down Expand Up @@ -47,6 +47,8 @@
"test-err-2": "ts-node ./cli/map --file ./src/test/code/map-test-with-error.js < ./src/test/data/starwars/characters.json"
},
"bin": {
"batch": "./cli/batch",
"split-to-files": "./cli/split-to-files",
"distinct": "./cli/distinct",
"filter": "./cli/filter",
"format-table": "./cli/format-table",
Expand Down
71 changes: 71 additions & 0 deletions src/cli/batch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { inputData, outputData } from "../lib/io";
import { invokeUserFn, loadUserFn } from "./lib/user-fn";
import { verifyInputArray } from "../lib/verify";
import { standardCmdInputs, standardCmdOutputs, standardInputFileHelp, standardOutputFileHelp } from "./lib/help";

//
// Create batches from an array.
//
function createBatches(array: any[], batchSize: number): any[] {

const batches = [];

for (let i = 0; i < array.length; i += batchSize) {
batches.push(array.slice(i, i + batchSize));
}

return batches;
}

export async function main(argv: string[]): Promise<void> {

if (argv.length < 1) {
throw new Error(`Expected <number> argument for for batch size.`);
}

const batchSize = parseInt(argv.shift()!);

const data = await inputData(argv);
verifyInputArray(data, "batch");

const output = createBatches(data, batchSize);
await outputData(argv, output);
}

export const documentation = {
name: "batch",
desc: "Outputs the input dataset as an array of arrays, each sub-array containing the specified number of records.",
syntax: "batch <batch-size> [<input-file>] [<output-file>]",
inputs: standardCmdInputs,
outputs: [
"JSON file",
"YAML file",
"JSON formatted data on standard output.",
],
args: [
{
name: "batch-size",
desc: "Specifies the size for each batch.",
},
standardInputFileHelp,
standardOutputFileHelp,
],
examples: [
{
name: "Reads JSON data from standard input, batches the records into groups of 5 and writes to standard output",
cmd: 'command-that-produces-json | batch 5',
},
{
name: "Reads data from a file, batches the records into groups of 5 and writes to standard output",
cmd: 'batch 5 input-file.csv',
},
{
name: "Reads data from a file, batches the records into groups of 5 and writes output to another file",
cmd: 'batch 5 input-file.csv output-file.csv'
},
{
name: "Reads JSON data from standard input, applies the transformation and writes output to another file",
cmd: 'command-that-produces-json | batch 5 - output-file.csv'
},
],
};
55 changes: 55 additions & 0 deletions src/cli/split-to-files.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { inputData, outputData, outputDataToFile } from "../lib/io";
import { verifyInputArray } from "../lib/verify";
import { Flags, pullOptions } from "./lib/args";
import { standardCmdInputs, standardInputFileHelp } from "./lib/help";

export async function main(argv: string[], flags: Flags): Promise<void> {

if (argv.length < 1) {
throw new Error(`Expected <number> argument for output file name prefix.`);
}

const outputFileNamePrefix = argv.shift()!;
let outputFileNameSuffix = "json";

if (argv.length > 0) {
outputFileNameSuffix = argv.shift()!;
}

const data = await inputData(argv);
verifyInputArray(data, "split-to-files");

for (let i = 0; i < data.length; i++) {
const record = data[i];
await outputDataToFile(record, `${outputFileNamePrefix}-${i+1}.${outputFileNameSuffix}`);
}
}

export const documentation = {
name: "split-to-files",
desc: "Writes a separate file for each record in the dataset. This command does not write to standard output.",
syntax: "split-to-files [<output-file-prefix>] [<output-file-suffix>] [<input-file>]",
inputs: standardCmdInputs,
outputs: [
"JSON files",
"CSV files",
"YAML files",
],
args: [
{
name: "output-file-prefix",
desc: "Specifies the prefix for output files. Each file created is numbered sequentially, starting with 1.",
},
{
name: "output-file-suffix",
desc: "Specifies the suffix for output files. Either 'csv', 'json', 'yaml' or 'yml'. Defaults to 'json'.",
},
standardInputFileHelp,
],
examples: [
{
name: "Reads JSON data from standard input and writes each records to a set of JSON files my-output-1.json, my-output-2.json and so on. Replace 'json' with 'csv' or 'yaml' to write CSV or YAML files instead.",
cmd: 'command-that-produces-json | split-to-files my-output- json',
},
],
};
58 changes: 33 additions & 25 deletions src/lib/io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,38 @@ export function outputDataToStdout(data: any, expectedFormat?: DataFormat, confi
writeStdout(formattedData);
}

//
// Outputs data to a name file.
//
export async function outputDataToFile(data: any, fileName: string, expectedFormat?: DataFormat, config?: ICsvOutputConfig) {

if (expectedFormat === undefined) {
//
// Choose data format from the file extension.
//
expectedFormat = determineFormat(fileName);
}

//
// Load data from the requested file.
//
if (expectedFormat === "json") {
await writeJson(fileName, data);
}
else if (expectedFormat === "csv") {
if (!isArray(data)) {
throw new Error(`To write csv file ${fileName}, expect the data to be an array of records. Instead got "${typeof data}".`);
}
await writeCsv(fileName, data, config);
}
else if (expectedFormat === "yaml") {
return await writeYaml(fileName, data);
}
else {
throw new Error(`Unexpected data format: ${expectedFormat}`);
}
}

//
// Outputs data as JSON to a file or to standard output.
//
Expand All @@ -203,31 +235,7 @@ export async function outputData(argv: string[], data: any, expectedFormat?: Dat
outputDataToStdout(data, expectedFormat, config);
}
else {
if (expectedFormat === undefined) {
//
// Choose data format from the file extension.
//
expectedFormat = determineFormat(fileName);
}

//
// Load data from the requested file.
//
if (expectedFormat === "json") {
await writeJson(fileName, data);
}
else if (expectedFormat === "csv") {
if (!isArray(data)) {
throw new Error(`To write csv file ${fileName}, expect the data to be an array of records. Instead got "${typeof data}".`);
}
await writeCsv(fileName, data, config);
}
else if (expectedFormat === "yaml") {
return await writeYaml(fileName, data);
}
else {
throw new Error(`Unexpected data format: ${expectedFormat}`);
}
await outputDataToFile(data, fileName, expectedFormat, config);
}
}
}
2 changes: 2 additions & 0 deletions src/scripts/create-cli-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import * as fs from 'fs';
import handlebars from 'handlebars';
import { IDocumentation } from '../lib/command';

import { documentation as batch } from "../cli/batch";
import { documentation as concat } from "../cli/concat";
import { documentation as distinct } from "../cli/distinct";
import { documentation as filter } from "../cli/filter";
Expand All @@ -38,6 +39,7 @@ import { documentation as toYaml } from "../cli/to-yaml";
import { documentation as transform } from "../cli/transform";

const docs: IDocumentation[] = [
batch,
concat,
distinct,
filter,
Expand Down

0 comments on commit 9aa10b3

Please sign in to comment.