Skip to content

chore: refactor the generateBlogData function (again) (#7607) #7618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions apps/site/next-data/generators/__tests__/blogData.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import { normalize } from 'node:path';
import { Readable } from 'node:stream';

import generateBlogData from '@/next-data/generators/blogData.mjs';

let files = [];

jest.mock('node:fs', () => ({
createReadStream: jest.fn(filename => {
const readable = new Readable();
const file = files.find(f => filename.endsWith(normalize(f.path)));
readable.push(`---\n`);
file.frontMatterContent.forEach(line => readable.push(`${line}\n`));
readable.push(`---\n`);
readable.push(null);
readable.close = () => {};
return readable;
}),
}));

jest.mock('../../../next.helpers.mjs', () => ({
getMarkdownFiles: () => Promise.resolve(files.map(file => file.path)),
}));

describe('generateBlogData', () => {
it('should return zero posts and only the default "all" category is no md file is found', async () => {
files = [];

const blogData = await generateBlogData();

expect(blogData.categories).toStrictEqual(['all']);
expect(blogData.posts).toStrictEqual([]);
});

it('should collect the data from a single md file if only one is found', async () => {
files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: [
`date: '2020-01-01T00:00:00.000Z'`,
`title: POST 1`,
`author: author`,
],
},
];

const blogData = await generateBlogData();

expect(blogData.posts.length).toBe(1);
const post = blogData.posts[0];
expect(post.title).toEqual('POST 1');
expect(post.date).toEqual(new Date('2020-01-01T00:00:00.000Z'));
expect(post.author).toEqual('author');
});

it('should collect the data from multiple md files', async () => {
const currentDate = new Date();

files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: [
`date: '2020-01-01T00:00:00.000Z'`,
`title: POST 1`,
`author: author-a`,
],
},
{
path: 'pages/en/blog/post2.md',
frontMatterContent: [
`date: '2020-01-02T00:00:00.000Z'`,
`title: POST 2`,
`author: author-b`,
],
},
{
path: 'pages/en/blog/post3.md',
frontMatterContent: [
// no date specified (the date defaults to the current date)
`title: POST 3`,
`author: author-c`,
],
},
];

const blogData = await generateBlogData();

expect(blogData.posts.length).toBe(3);
expect(blogData.posts[0].title).toEqual('POST 1');
expect(blogData.posts[0].date).toEqual(
new Date('2020-01-01T00:00:00.000Z')
);
expect(blogData.posts[0].author).toEqual('author-a');
expect(blogData.posts[1].title).toEqual('POST 2');
expect(blogData.posts[1].date).toEqual(
new Date('2020-01-02T00:00:00.000Z')
);
expect(blogData.posts[1].author).toEqual('author-b');
expect(blogData.posts[2].title).toEqual('POST 3');
expect(blogData.posts[2].date.setMilliseconds(0)).toEqual(
currentDate.setMilliseconds(0)
);
expect(blogData.posts[2].author).toEqual('author-c');
});

it('should generate categories based on the categories of md files and their years', async () => {
files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: [
"date: '2020-01-01T00:00:00.000Z'",
'category: category-a',
],
},
{
path: 'pages/en/blog/sub-dir/post2.md',
frontMatterContent: [
"date: '2020-01-02T00:00:00.000Z'",
'category: category-b',
],
},
{
path: 'pages/en/blog/post3.md',
frontMatterContent: [
"date: '2021-03-13T00:00:00.000Z'",
// no category specified (it should be "uncategorized")
],
},
{
path: 'pages/en/blog/post4.md',
frontMatterContent: [
// no date specified (the date defaults to the current date)
'category: category-b',
],
},
];

const blogData = await generateBlogData();

expect(blogData.categories.sort()).toStrictEqual([
'all',
'category-a',
'category-b',
'uncategorized',
'year-2020',
'year-2021',
`year-${new Date().getUTCFullYear()}`,
]);
});

it('should generate slugs based on the md filenames and categories', async () => {
files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: ['category: category-a'],
},
{
path: 'pages/en/blog/post2.md',
frontMatterContent: ['category: category-b'],
},
{
path: 'pages/en/blog/post3.md',
frontMatterContent: [
// no category specified
],
},
];

const blogData = await generateBlogData();

expect(blogData.posts.map(p => p.slug).sort()).toStrictEqual([
'/blog/category-a/post1',
'/blog/category-b/post2',
'/blog/uncategorized/post3',
]);
});
});
107 changes: 52 additions & 55 deletions apps/site/next-data/generators/blogData.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@ import { getMarkdownFiles } from '../../next.helpers.mjs';
// gets the current blog path based on local module path
const blogPath = join(process.cwd(), 'pages/en/blog');

/**
* This contains the metadata of all available blog categories
*/
const blogCategories = new Set(['all']);

/**
* This method parses the source (raw) Markdown content into Frontmatter
* and returns basic information for blog posts
Expand All @@ -39,12 +34,6 @@ const getFrontMatter = (filename, source) => {
// all = (all blog posts), publish year and the actual blog category
const categories = [category, `year-${publishYear}`, 'all'];

// we add the year to the categories set
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we removing these categories? 👀

Copy link
Member Author

@dario-piotrowicz dario-piotrowicz Apr 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nono, these are still being added anyways to the blogCategories array, I'm just doing it from the categories array here: https://github.com/nodejs/nodejs.org/pull/7618/files/7004629e8ff0dfcdc7a57109d5132fe34ef301c3#diff-5ddcf76b865830c001627fef7abd06107b3eafeb210d21737305db2416a279beR95-R98

blogCategories.add(`year-${publishYear}`);

// we add the category to the categories set
blogCategories.add(category);

// this is the url used for the blog post it based on the category and filename
const slug = `/blog/${category}/${basename(filename, extname(filename))}`;

Expand All @@ -63,50 +52,58 @@ const generateBlogData = async () => {
'**/index.md',
]);

return new Promise(resolve => {
const posts = [];
const rawFrontmatter = [];

filenames.forEach(filename => {
// We create a stream for reading a file instead of reading the files
const _stream = createReadStream(join(blogPath, filename));

// We create a readline interface to read the file line-by-line
const _readLine = readline.createInterface({ input: _stream });

// Creates an array of the metadata based on the filename
// This prevents concurrency issues since the for-loop is synchronous
// and these event listeners are not
rawFrontmatter[filename] = [0, ''];

// We read line by line
_readLine.on('line', line => {
rawFrontmatter[filename][1] += `${line}\n`;

// We observe the frontmatter separators
if (line === '---') {
rawFrontmatter[filename][0] += 1;
}

// Once we have two separators we close the readLine and the stream
if (rawFrontmatter[filename][0] === 2) {
_readLine.close();
_stream.close();
}
});

// Then we parse gray-matter on the frontmatter
// This allows us to only read the frontmatter part of each file
// and optimise the read-process as we have thousands of markdown files
_readLine.on('close', () => {
posts.push(getFrontMatter(filename, rawFrontmatter[filename][1]));

if (posts.length === filenames.length) {
resolve({ categories: [...blogCategories], posts });
}
});
});
});
/**
* This contains the metadata of all available blog categories
*/
const blogCategories = new Set(['all']);

const posts = await Promise.all(
filenames.map(
filename =>
new Promise(resolve => {
// We create a stream for reading a file instead of reading the files
const _stream = createReadStream(join(blogPath, filename));

// We create a readline interface to read the file line-by-line
const _readLine = readline.createInterface({ input: _stream });

let rawFrontmatter = '';
let frontmatterSeparatorsEncountered = 0;

// We read line by line
_readLine.on('line', line => {
rawFrontmatter += `${line}\n`;

// We observe the frontmatter separators
if (line === '---') {
frontmatterSeparatorsEncountered++;
}

// Once we have two separators we close the readLine and the stream
if (frontmatterSeparatorsEncountered === 2) {
_readLine.close();
_stream.close();
}
});

// Then we parse gray-matter on the frontmatter
// This allows us to only read the frontmatter part of each file
// and optimise the read-process as we have thousands of markdown files
_readLine.on('close', () => {
const frontMatterData = getFrontMatter(filename, rawFrontmatter);

frontMatterData.categories.forEach(category => {
// we add the category to the categories set
blogCategories.add(category);
});

resolve(frontMatterData);
});
})
)
);

return { categories: [...blogCategories], posts };
};

export default generateBlogData;
Loading