Skip to content
This repository was archived by the owner on Mar 1, 2025. It is now read-only.

feat: implement emoji generation #4

Merged
merged 22 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7f4278a
feat: implement emoji and variation generation commands with error ha…
luxass Feb 15, 2025
4d9672f
feat: enhance emoji data handling with version extraction and improve…
luxass Feb 16, 2025
c0717dd
feat: add shortcode generator
luxass Feb 16, 2025
ed009eb
feat: rename groups to metadata in MojiAdapter and update related com…
luxass Feb 16, 2025
eea2c37
feat: enhance emoji metadata handling with version extraction and imp…
luxass Feb 16, 2025
7eb8130
feat: add unicode version to adapter context
luxass Feb 16, 2025
b8dadc5
chore: lint
luxass Feb 16, 2025
071afb2
refactor: remove errors and merge into base adapter
luxass Feb 16, 2025
c49edcf
feat: add unicodeNames function to fetch and parse Unicode names for …
luxass Feb 16, 2025
b314647
refactor: migrate to a single generate command
luxass Feb 16, 2025
aa607a1
feat: add consola for improved logging throughout the application
luxass Feb 16, 2025
b25d8e7
feat: implement shortcodes functionality and update emoji data structure
luxass Feb 16, 2025
4c21308
feat: restructure emoji handling to include emojiData and flatten emo…
luxass Feb 16, 2025
49bf2cf
chore: dump
luxass Feb 16, 2025
3ce15b5
feat: update emoji version handling to correctly map unicode versions…
luxass Feb 16, 2025
fdf063f
chore: move lockfile out of utils
luxass Feb 20, 2025
5e9706e
chore: update test path
luxass Feb 20, 2025
7c5b445
feat: add README files for Unicode Emoji and Character Database draft…
luxass Feb 20, 2025
cae0e52
refactor: rename extractVersion to extractVersionFromReadme for clarity
luxass Feb 20, 2025
d6453b5
chore: lint
luxass Feb 20, 2025
8730e21
feat: add vitest-fetch-mock for enhanced testing and setup configuration
luxass Feb 20, 2025
9fcf29e
chore: lint
luxass Feb 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
},
"dependencies": {
"cac": "^6.7.14",
"consola": "^3.4.0",
"farver": "^0.4.0",
"fs-extra": "^11.3.0",
"semver": "^7.7.1",
Expand Down
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

138 changes: 123 additions & 15 deletions src/adapter/base.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,40 @@
import type { EmojiGroup } from "../types";
import { defineMojiAdapter } from "../adapter";
import { slugify } from "../utils";
import type { EmojiGroup, EmojiMetadata, EmojiShortcode, ShortcodeProvider } from "../types";
import consola from "consola";
import { red, yellow } from "farver/fast";
import { defineMojiAdapter, MojisNotImplemented } from "../adapter";
import { extractEmojiVersion, extractUnicodeVersion, slugify } from "../utils";
import { fetchCache } from "../utils/cache";

function notImplemented(adapterFn: string) {
return async () => {
throw new Error(`the adapter function ${adapterFn} is not implemented`);
throw new MojisNotImplemented(`the adapter function ${red(adapterFn)} is not implemented`);
};
}

export default defineMojiAdapter({
name: "base",
description: "base adapter",
range: "*",
groups: async ({ version, force }) => {
if (version === "1.0" || version === "2.0" || version === "3.0") {
console.warn(`version ${version} does not have group data`);
return [];
metadata: async (ctx) => {
if (ctx.emojiVersion === "1.0" || ctx.emojiVersion === "2.0" || ctx.emojiVersion === "3.0") {
consola.warn(`skipping metadata for emoji version ${yellow(ctx.emojiVersion)}, as it's not supported.`);
return {
groups: [],
emojiMetadata: {},
};
}

const groups = await fetchCache(`https://unicode.org/Public/emoji/${version}/emoji-test.txt`, {
cacheKey: `v${version}/metadata.json`,
return fetchCache(`https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-test.txt`, {
cacheKey: `v${ctx.emojiVersion}/metadata.json`,
parser(data) {
const lines = data.split("\n");
let currentGroup: EmojiGroup | undefined;

const groups: EmojiGroup[] = [];

// [group-subgroup][hexcode] = metadata
const emojiMetadata: Record<string, Record<string, EmojiMetadata>> = {};

for (const line of lines) {
if (line.trim() === "") {
continue;
Expand All @@ -44,23 +52,123 @@ export default defineMojiAdapter({
currentGroup = group;

groups.push(group);

continue;
} else if (line.startsWith("# subgroup:")) {
const subgroupName = line.slice(11).trim();

if (currentGroup == null) {
throw new Error(`subgroup ${subgroupName} without group`);
}

currentGroup.subgroups.push(subgroupName);
currentGroup.subgroups.push(slugify(subgroupName));

continue;
} else if (line.startsWith("#")) {
continue;
}

const [baseHexcode, trailingLine] = line.split(";");

if (baseHexcode == null || trailingLine == null) {
throw new Error(`invalid line: ${line}`);
}

const [baseQualifier, comment] = trailingLine.split("#");

if (baseQualifier == null || comment == null) {
throw new Error(`invalid line: ${line}`);
}

const hexcode = baseHexcode.trim().replace(/\s+/g, "-");
const qualifier = baseQualifier.trim();

const emojiVersion = extractEmojiVersion(comment.trim());
const [emoji, trimmedComment] = comment.trim().split(` E${emojiVersion} `);

const groupName = currentGroup?.slug ?? "unknown";
const subgroupName = currentGroup?.subgroups[currentGroup.subgroups.length - 1] ?? "unknown";

const metadataGroup = `${groupName}-${subgroupName}`;

if (emojiMetadata[metadataGroup] == null) {
emojiMetadata[metadataGroup] = {};
}

emojiMetadata[metadataGroup][hexcode] = {
group: groupName,
subgroup: subgroupName,
qualifier,
emojiVersion: emojiVersion || null,
unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion),
description: trimmedComment || "",
emoji: emoji || null,
hexcodes: hexcode.split("-"),
};
}

return groups;
return {
groups,
emojiMetadata,
};
},
bypassCache: force,
bypassCache: ctx.force,
});

return groups;
},
sequences: notImplemented("sequences"),
emojis: notImplemented("emojis"),
variations: notImplemented("variations"),
unicodeNames: async (ctx) => {
return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/UnicodeData.txt`, {
cacheKey: `v${ctx.emojiVersion}/unicode-names.json`,
parser(data) {
const lines = data.split("\n");
const unicodeNames: Record<string, string> = {};

for (const line of lines) {
if (line.trim() === "" || line.startsWith("#")) {
continue;
}

const [hex, name] = line.split(";").map((col) => col.trim());

if (hex == null || name == null) {
throw new Error(`invalid line: ${line}`);
}

unicodeNames[hex] = name;
}

return unicodeNames;
},
bypassCache: ctx.force,
});
},
async shortcodes(ctx) {
const providers = ctx.providers;

if (providers.length === 0) {
throw new Error("no shortcode providers specified");
}

const shortcodes: Partial<Record<ShortcodeProvider, EmojiShortcode[]>> = {};

if (this.emojis == null) {
throw new MojisNotImplemented("emojis");
}

const emojis = await this.emojis(ctx);

if (providers.includes("github")) {
const githubShortcodesFn = await import("../shortcode/github").then((m) => m.generateGitHubShortcodes);

shortcodes.github = await githubShortcodesFn({
emojis,
force: ctx.force,
version: ctx.emojiVersion,
});
}

return shortcodes;
},
});
34 changes: 25 additions & 9 deletions src/adapter/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { EmojiGroup, EmojiSequence, EmojiVariation } from "../types";
import type { Emoji, EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types";

Check failure on line 1 in src/adapter/index.ts

View workflow job for this annotation

GitHub Actions / build

'EmojiData' is defined but never used
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Remove unused import.

The EmojiData type is imported but never used, as flagged by static analysis.

-import type { Emoji, EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types";
+import type { Emoji, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types";
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
import type { Emoji, EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types";
import type { Emoji, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types";
🧰 Tools
🪛 GitHub Check: build

[failure] 1-1:
'EmojiData' is defined but never used

🪛 ESLint

[error] 1-1: 'EmojiData' is defined but never used.

(unused-imports/no-unused-imports)

🪛 GitHub Actions: CI

[error] 1-1: 'EmojiData' is defined but never used

import semver from "semver";

export interface MojiAdapter {
Expand All @@ -22,11 +22,6 @@
*/
extend?: string;

/**
* A function to generate the emoji groups for the specified version.
*/
groups?: GroupFn;

/**
* A function to generate the emoji sequences for the specified version
*/
Expand All @@ -41,17 +36,31 @@
* A function to generate emoji variations for the specified version.
*/
variations?: EmojiVariationFn;

shortcodes?: ShortcodeFn;

metadata?: MetadataFn;

unicodeNames?: UnicodeNamesFn;
}

export interface BaseAdapterContext {
version: string;
emojiVersion: string;
unicodeVersion: string;
force: boolean;
}

export type GroupFn = (ctx: BaseAdapterContext) => Promise<EmojiGroup[]>;
export type UnicodeNamesFn = (ctx: BaseAdapterContext) => Promise<Record<string, string>>;
export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>;
export type EmojiFn = (ctx: BaseAdapterContext) => Promise<any>;
export type EmojiFn = (ctx: BaseAdapterContext) => Promise<Record<string, Emoji>>;
export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise<EmojiVariation[]>;
export type ShortcodeFn = (ctx: BaseAdapterContext & {
providers: ShortcodeProvider[];
}) => Promise<Partial<Record<ShortcodeProvider, EmojiShortcode[]>>>;
export type MetadataFn = (ctx: BaseAdapterContext) => Promise<{
groups: EmojiGroup[];
emojiMetadata: Record<string, Record<string, EmojiMetadata>>;
}>;

export const ADAPTERS = new Map<string, MojiAdapter>();

Expand Down Expand Up @@ -82,3 +91,10 @@

return adapter;
}

export class MojisNotImplemented extends Error {
constructor(message: string) {
super(message);
this.name = "MojisNotImplemented";
}
}
95 changes: 86 additions & 9 deletions src/adapter/v16.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type { EmojiSequence, EmojiVariation } from "../types";
import type { Emoji, EmojiData, EmojiSequence, EmojiShortcode, EmojiVariation, Property, ShortcodeProvider } from "../types";

Check failure on line 1 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'EmojiShortcode' is defined but never used

Check failure on line 1 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'ShortcodeProvider' is defined but never used
import { defineMojiAdapter } from "../adapter";
import { FEMALE_SIGN, MALE_SIGN } from "../constants";
import { extractEmojiVersion, extractUnicodeVersion } from "../utils";
import { fetchCache } from "../utils/cache";
import { expandHexRange } from "../utils/hexcode";

Expand All @@ -12,12 +13,12 @@
sequences: async (ctx) => {
const [sequences, zwj] = await Promise.all([
{
cacheKey: `v${ctx.version}/sequences.json`,
url: `https://unicode.org/Public/emoji/${ctx.version}/emoji-sequences.txt`,
cacheKey: `v${ctx.emojiVersion}/sequences.json`,
url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`,
},
{
cacheKey: `v${ctx.version}/zwj-sequences.json`,
url: `https://unicode.org/Public/emoji/${ctx.version}/emoji-zwj-sequences.txt`,
cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`,
url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`,
},
].map(async ({ cacheKey, url }) => {
return await fetchCache(url, {
Expand All @@ -28,7 +29,7 @@
const sequences: EmojiSequence[] = [];

for (let line of lines) {
// skip empty line & comments
// skip empty line & comments
if (line.trim() === "" || line.startsWith("#")) {
continue;
}
Expand Down Expand Up @@ -68,11 +69,87 @@
zwj: zwj || [],
};
},
async emojis({ version, force }) {
async emojis(ctx) {
const unicodeNames = await this.unicodeNames!(ctx);
const { sequences, zwj } = await this.sequences!(ctx);

Check failure on line 74 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'sequences' is assigned a value but never used. Allowed unused vars must match /^_/u

Check failure on line 74 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'zwj' is assigned a value but never used. Allowed unused vars must match /^_/u
const metadata = await this.metadata!(ctx);

Check failure on line 75 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'metadata' is assigned a value but never used. Allowed unused vars must match /^_/u
const variations = await this.variations!(ctx);

Check failure on line 76 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'variations' is assigned a value but never used. Allowed unused vars must match /^_/u

const emojis: Record<string, Emoji> = {};

Check failure on line 78 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'emojis' is assigned a value but never used. Allowed unused vars must match /^_/u

const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, {
cacheKey: `v${ctx.emojiVersion}/emoji-data.json`,
parser(data) {
const lines = data.split("\n");

const emojiData: Record<string, EmojiData> = {};

for (const line of lines) {
// skip empty line & comments
if (line.trim() === "" || line.startsWith("#")) {
continue;
}

const lineCommentIndex = line.indexOf("#");
const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : "";

let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4);

if (hex == null || property == null) {
throw new Error(`invalid line: ${line}`);
}

// remove line comment from property
const propertyCommentIndex = property.indexOf("#");
if (propertyCommentIndex !== -1) {
property = property.slice(0, propertyCommentIndex).trim();
}

if (property === "Extended_Pictographic") {
continue;
}

const expandedHex = expandHexRange(hex);
const emojiVersion = extractEmojiVersion(lineComment);

const emoji: EmojiData = {
description: lineComment,
hexcode: "",
gender: null,
properties: [(property as Property) || "Emoji"],
unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion),
emojiVersion,
name: unicodeNames[hex] || "",
};

for (const hex of expandedHex) {
if (emojiData[hex] != null) {
emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])];
} else {
emojiData[hex] = {
...emoji,
hexcode: hex.replace(/\s+/g, "-"),
};
}
}
}

return emojiData;
},
bypassCache: ctx.force,
});

// join names, metadata, variations, sequences, zwj

for (const [hex, data] of Object.entries(emojiData)) {

Check failure on line 144 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'hex' is assigned a value but never used. Allowed unused vars must match /^_/u

Check failure on line 144 in src/adapter/v16.ts

View workflow job for this annotation

GitHub Actions / build

'data' is assigned a value but never used. Allowed unused vars must match /^_/u

}

return {};
},
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Complete the implementation of the emojis function.

The function has several issues:

  1. The empty block at lines 144-146 needs to be implemented.
  2. Several variables are fetched but never used: sequences, zwj, metadata, variations, emojis, hex, data.

Please complete the implementation by using the fetched data to construct and return the emoji objects.

🧰 Tools
🪛 GitHub Check: build

[failure] 74-74:
'sequences' is assigned a value but never used. Allowed unused vars must match /^_/u


[failure] 74-74:
'zwj' is assigned a value but never used. Allowed unused vars must match /^_/u


[failure] 75-75:
'metadata' is assigned a value but never used. Allowed unused vars must match /^_/u


[failure] 76-76:
'variations' is assigned a value but never used. Allowed unused vars must match /^_/u


[failure] 78-78:
'emojis' is assigned a value but never used. Allowed unused vars must match /^_/u


[failure] 144-144:
'hex' is assigned a value but never used. Allowed unused vars must match /^_/u


[failure] 144-144:
'data' is assigned a value but never used. Allowed unused vars must match /^_/u

🪛 ESLint

[error] 74-74: 'sequences' is assigned a value but never used. Allowed unused vars must match /^_/u.

(unused-imports/no-unused-vars)


[error] 74-74: 'zwj' is assigned a value but never used. Allowed unused vars must match /^_/u.

(unused-imports/no-unused-vars)


[error] 75-75: 'metadata' is assigned a value but never used. Allowed unused vars must match /^_/u.

(unused-imports/no-unused-vars)


[error] 76-76: 'variations' is assigned a value but never used. Allowed unused vars must match /^_/u.

(unused-imports/no-unused-vars)


[error] 78-78: 'emojis' is assigned a value but never used. Allowed unused vars must match /^_/u.

(unused-imports/no-unused-vars)


[error] 144-144: 'hex' is assigned a value but never used. Allowed unused vars must match /^_/u.

(unused-imports/no-unused-vars)


[error] 144-144: 'data' is assigned a value but never used. Allowed unused vars must match /^_/u.

(unused-imports/no-unused-vars)


[error] 144-146: Empty block statement.

(no-empty)

variations: async (ctx) => {
return fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-variation-sequences.txt`, {
cacheKey: `v${ctx.version}/variations.json`,
return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, {
cacheKey: `v${ctx.emojiVersion}/variations.json`,
parser(data) {
const lines = data.split("\n");

Expand Down
Loading