diff --git a/.gitignore b/.gitignore index 3c3629e..f06235c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +dist diff --git a/README.MD b/README.MD index c9cf587..adae119 100644 --- a/README.MD +++ b/README.MD @@ -1,3 +1,6 @@ +[![code style: prettier](https://img.shields.io/badge/code_style-prettier-ff69b4.svg?style=flat-square)](https://github.com/prettier/prettier) + + This is a tool which converts the Google Keep archive to markdown documents with YAML front matter. The Google Keep archive can be obtained from the Google Takeout page. https://www.google.com/settings/takeout/custom/keep diff --git a/main.js b/main.js deleted file mode 100755 index 4cd31cc..0000000 --- a/main.js +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env node - -var fs = require('fs'); -var parser = require('./parser.js'); -var serializer = require('./serializer.js'); - -// TODO: parse argv and get directory file list -// parse output -// - -// Take notes and then convert it into markdown -// -// .content -// -> This needs to be converted to markdown -// -> listitem -// - -if (process.argv.length != 4) { - console.log("Usage main.js inputDir outputDir"); - process.exit(1); -} - -var inputDir = process.argv[2]; -var outputDir = process.argv[3]; - -var convert = function(filePath, outputDir) { - var data = fs.readFileSync(filePath); - var note = parser(data); - var output = serializer.serialize(note); - - output.forEach(d => { - console.log(filePath, d[0]); - fs.writeFileSync(outputDir + '/' + d[0], d[1]); - }); -} - -var files = fs.readdirSync(inputDir); -files = files.filter(t => t.endsWith('.html')); -files.forEach(filePath => convert(inputDir + '/' + filePath, outputDir)) diff --git a/package.json b/package.json index 20b4a1f..fde3384 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "description": "This is a tool which converts the Google Keep archive to markdown documents with YAML front matter. The Google Keep archive can be obtained from the Google Takeout page.", "main": "index.js", "scripts": { - "test": "./node_modules/.bin/mocha -u tdd ./" + "test": "./node_modules/.bin/mocha -u tdd ./dist/*.test.js" }, "author": "", "dependencies": { @@ -16,6 +16,9 @@ "uuid": "^3.0.1" }, "devDependencies": { - "chai": "^3.5.0" + "@types/mocha": "^5.2.5", + "@types/node": "^10.9.4", + "chai": "^3.5.0", + "typescript": "^3.0.3" } } diff --git a/parser.js b/parser.js deleted file mode 100644 index 11a8dba..0000000 --- a/parser.js +++ /dev/null @@ -1,70 +0,0 @@ -var cheerio = require('cheerio'); -var toMarkdown = require('to-markdown'); -var moment = require('moment'); - - -function getImages(node) { - var images = []; - if (node instanceof Array) { - node.forEach(child => { - images = images.concat(getImages(child)); - }); - return images; - } - - if (node.name == 'img') { - var img = node.attribs.src; - images.push(img); - return images; - } - - if (!node.children) - return []; - - node.children.forEach(child => { - images = images.concat(getImages(child)); - }); - return images; -} - -var converter = { - filter: 'div', - /* - filter: function(node) { - return node.className.indexOf('listitem') != -1; - }, - */ - replacement: function(innerHTML, node) { - return innerHTML + ' '; - } -} - -function parse(data) { - var $ = cheerio.load(data); - - var note = {}; - note.content = $(".content").html(); - note.content = toMarkdown(note.content, {converters: [converter]}).trim(); - - // FIXME: What about timezone? - note.date = $(".heading").text().trim(); - note.date = moment(note.date).toISOString(); - - note.title = $(".title").text().trim(); - note.archived = $.contains(".archived"); - - note.tags = $("span.label").toArray(); - note.tags = note.tags.map(function(elem) { - if (!elem.children) { - return null; - } - return elem.children[0].data; - }); - - var attachments = $("div.attachments").toArray(); - note.attachments = getImages(attachments); - - return note; -} - -module.exports = parse; diff --git a/parser.test.js b/parser.test.js deleted file mode 100644 index 11f5ad4..0000000 --- a/parser.test.js +++ /dev/null @@ -1,24 +0,0 @@ -var assert = require('chai').assert; -var parse = require('./parser'); - -suite('Parser', function() { - test("Should parse some basic info", function() { - var data = ` -
- 21 Jun 2016, 22:39:47
-
Ll
-
Hearts of darkness
Water ship down
The Dubliners

-
Reading ListAnother Tag
-
- `; - - console.log(parse); - var note = parse(data); - assert.deepEqual(note.title, "Ll"); - // FIXME: Is the extra space really required? - assert.deepEqual(note.content, "Hearts of darkness \nWater ship down \nThe Dubliners"); - assert.deepEqual(note.tags, ['Reading List', 'Another Tag']); - assert.notOk(note.archived); - assert.deepEqual(note.date, "2016-06-21T20:39:47.000Z"); - }); -}); diff --git a/serializer.js b/serializer.js deleted file mode 100644 index ae3558b..0000000 --- a/serializer.js +++ /dev/null @@ -1,79 +0,0 @@ -var fs = require('fs'); -var uuidV4 = require('uuid/v4'); - -/** - * Takes a note and serializes it into [(fileName, content)] - */ -var serialize = function(note) { - // FIXME: Serialize the attachments! - var out = note.attachments.map(generateAttachment); - var mainOutput = generateOutputFile(note); - out.forEach(a => { - var fileName = a[0]; - mainOutput += '\n![](./' + fileName + ')\n'; - }); - out.push([generateFilename(note), mainOutput]); - return out; -}; - - -var generateYamlFrontMatter = function(note) { - var lines = ['---']; - for (var key in note) { - if (!note.hasOwnProperty(key)) - continue - - if (key == 'content' || key == 'attachments') - continue - - var val = note[key]; - if (val instanceof Array) { - if (val.length == 0) - continue - val = '[' + val.join(', ') + ']'; - } else { - val = '' + val; // convert to string - } - if (val.trim().length == 0 || val == 'false') - continue - - lines.push(key + ': ' + val); - } - lines.push('---') - - return lines.join("\n"); -} - -function generateOutputFile(note) { - return generateYamlFrontMatter(note) + '\n' + note.content; -} - -function generateFilename(note) { - function sanitizeString(str) { - var newStr = ""; - var re = /[A-Za-z0-9- ]/; - for (var key in str) { - var char = str[key]; - if (char.match(re)) - if (char == ' ') - newStr += '-' - else - newStr += char - } - return newStr; - } - return sanitizeString(note.title || note.date || uuidV4()) + '.md'; -} - -function generateAttachment(a) { - var regex = /^data:.+\/(.+);base64,(.*)$/; - var matches = a.substr(0, 100).match(regex); - var ext = matches[1]; - var data = a.substr(a.indexOf('base64') + 7) - var buffer = new Buffer(data, 'base64'); - return [uuidV4() + '.' + ext, buffer]; -} - -module.exports = { - serialize: serialize -}; diff --git a/src/main.ts b/src/main.ts new file mode 100755 index 0000000..6578b91 --- /dev/null +++ b/src/main.ts @@ -0,0 +1,39 @@ +#!/usr/bin/env node + +var fs = require("fs"); +var parser = require("./parser.js"); +var serializer = require("./serializer.js"); + +// TODO: parse argv and get directory file list +// parse output +// + +// Take notes and then convert it into markdown +// +// .content +// -> This needs to be converted to markdown +// -> listitem +// + +if (process.argv.length != 4) { + console.log("Usage main.js inputDir outputDir"); + process.exit(1); +} + +var inputDir = process.argv[2]; +var outputDir = process.argv[3]; + +var convert = function(filePath, outputDir) { + var data = fs.readFileSync(filePath); + var note = parser(data); + var output = serializer.serialize(note); + + output.forEach(d => { + console.log(filePath, d[0]); + fs.writeFileSync(outputDir + "/" + d[0], d[1]); + }); +}; + +var files = fs.readdirSync(inputDir); +files = files.filter(t => t.endsWith(".html")); +files.forEach(filePath => convert(inputDir + "/" + filePath, outputDir)); diff --git a/src/parser.test.ts b/src/parser.test.ts new file mode 100644 index 0000000..d5aab3e --- /dev/null +++ b/src/parser.test.ts @@ -0,0 +1,27 @@ +var assert = require("chai").assert; +import { parse } from "./parser"; + +suite("Parser", function() { + test("Should parse some basic info", function() { + var data = ` +
+ 21 Jun 2016, 22:39:47
+
Ll
+
Hearts of darkness
Water ship down
The Dubliners

+
Reading ListAnother Tag
+
+ `; + + console.log(parse); + var note = parse(data); + assert.deepEqual(note.title, "Ll"); + // FIXME: Is the extra space really required? + assert.deepEqual( + note.content, + "Hearts of darkness \nWater ship down \nThe Dubliners" + ); + assert.deepEqual(note.tags, ["Reading List", "Another Tag"]); + assert.notOk(note.archived); + assert.deepEqual(note.date, "2016-06-21T20:39:47.000Z"); + }); +}); diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..e0debfe --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,78 @@ +var cheerio = require("cheerio"); +var toMarkdown = require("to-markdown"); +var moment = require("moment"); + +function getImages(node) { + var images = []; + if (node instanceof Array) { + node.forEach(child => { + images = images.concat(getImages(child)); + }); + return images; + } + + if (node.name == "img") { + var img = node.attribs.src; + images.push(img); + return images; + } + + if (!node.children) return []; + + node.children.forEach(child => { + images = images.concat(getImages(child)); + }); + return images; +} + +var converter = { + filter: "div", + /* + filter: function(node) { + return node.className.indexOf('listitem') != -1; + }, + */ + replacement: function(innerHTML, node) { + return innerHTML + " "; + } +}; + +export interface Note { + content: string; + title: string; + date: string; + archived: boolean; + tags: string[]; + attachments: string[]; +} + +export function parse(data) { + var $ = cheerio.load(data); + + var note = {} as Note; + note.content = $(".content").html(); + note.content = toMarkdown(note.content, { converters: [converter] }).trim(); + + // FIXME: What about timezone? + note.date = $(".heading") + .text() + .trim(); + note.date = moment(note.date).toISOString(); + + note.title = $(".title") + .text() + .trim(); + note.archived = $.contains(".archived"); + + note.tags = $("span.label").toArray().map(function(elem) { + if (!elem.children) { + return null; + } + return elem.children[0].data; + }); + + var attachments = $("div.attachments").toArray(); + note.attachments = getImages(attachments); + + return note; +} \ No newline at end of file diff --git a/src/serializer.ts b/src/serializer.ts new file mode 100644 index 0000000..d28f81a --- /dev/null +++ b/src/serializer.ts @@ -0,0 +1,69 @@ +var fs = require("fs"); +var uuidV4 = require("uuid/v4"); + +/** + * Takes a note and serializes it into [(fileName, content)] + */ + +var serialize = function(note) { + // FIXME: Serialize the attachments! + var out = note.attachments.map(generateAttachment); + var mainOutput = generateOutputFile(note); + out.forEach(a => { + var fileName = a[0]; + mainOutput += "\n![](./" + fileName + ")\n"; + }); + out.push([generateFilename(note), mainOutput]); + return out; +}; + +var generateYamlFrontMatter = function(note) { + var lines = ["---"]; + for (var key in note) { + if (!note.hasOwnProperty(key)) continue; + + if (key == "content" || key == "attachments") continue; + + var val = note[key]; + if (val instanceof Array) { + if (val.length == 0) continue; + val = "[" + val.join(", ") + "]"; + } else { + val = "" + val; // convert to string + } + if (val.trim().length == 0 || val == "false") continue; + + lines.push(key + ": " + val); + } + lines.push("---"); + + return lines.join("\n"); +}; + +function generateOutputFile(note) { + return generateYamlFrontMatter(note) + "\n" + note.content; +} + +function generateFilename(note) { + function sanitizeString(str) { + var newStr = ""; + var re = /[A-Za-z0-9- ]/; + for (var key in str) { + var char = str[key]; + if (char.match(re)) + if (char == " ") newStr += "-"; + else newStr += char; + } + return newStr; + } + return sanitizeString(note.title || note.date || uuidV4()) + ".md"; +} + +function generateAttachment(a) { + var regex = /^data:.+\/(.+);base64,(.*)$/; + var matches = a.substr(0, 100).match(regex); + var ext = matches[1]; + var data = a.substr(a.indexOf("base64") + 7); + var buffer = new Buffer(data, "base64"); + return [uuidV4() + "." + ext, buffer]; +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..25b0d31 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,8 @@ +{ + "compilerOptions": { + "outDir": "./dist", + "allowJs": true, + "target": "es5" + }, + "include": ["./src/**/*"] +}