Skip to content
This repository was archived by the owner on Mar 1, 2025. It is now read-only.

feat: implement emoji generation #4

Merged
merged 22 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7f4278a
feat: implement emoji and variation generation commands with error ha…
luxass Feb 15, 2025
4d9672f
feat: enhance emoji data handling with version extraction and improve…
luxass Feb 16, 2025
c0717dd
feat: add shortcode generator
luxass Feb 16, 2025
ed009eb
feat: rename groups to metadata in MojiAdapter and update related com…
luxass Feb 16, 2025
eea2c37
feat: enhance emoji metadata handling with version extraction and imp…
luxass Feb 16, 2025
7eb8130
feat: add unicode version to adapter context
luxass Feb 16, 2025
b8dadc5
chore: lint
luxass Feb 16, 2025
071afb2
refactor: remove errors and merge into base adapter
luxass Feb 16, 2025
c49edcf
feat: add unicodeNames function to fetch and parse Unicode names for …
luxass Feb 16, 2025
b314647
refactor: migrate to a single generate command
luxass Feb 16, 2025
aa607a1
feat: add consola for improved logging throughout the application
luxass Feb 16, 2025
b25d8e7
feat: implement shortcodes functionality and update emoji data structure
luxass Feb 16, 2025
4c21308
feat: restructure emoji handling to include emojiData and flatten emo…
luxass Feb 16, 2025
49bf2cf
chore: dump
luxass Feb 16, 2025
3ce15b5
feat: update emoji version handling to correctly map unicode versions…
luxass Feb 16, 2025
fdf063f
chore: move lockfile out of utils
luxass Feb 20, 2025
5e9706e
chore: update test path
luxass Feb 20, 2025
7c5b445
feat: add README files for Unicode Emoji and Character Database draft…
luxass Feb 20, 2025
cae0e52
refactor: rename extractVersion to extractVersionFromReadme for clarity
luxass Feb 20, 2025
d6453b5
chore: lint
luxass Feb 20, 2025
8730e21
feat: add vitest-fetch-mock for enhanced testing and setup configuration
luxass Feb 20, 2025
9fcf29e
chore: lint
luxass Feb 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 71 additions & 14 deletions src/adapter/base.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,40 @@
import type { EmojiGroup } from "../types";
import type { EmojiGroup, EmojiMetadata } from "../types";
import { red } from "farver/fast";
import { defineMojiAdapter } from "../adapter";
import { slugify } from "../utils";
import { extractEmojiVersion, extractUnicodeVersion, slugify } from "../utils";
import { fetchCache } from "../utils/cache";
import { MojisNotImplemented } from "../utils/errors";

function notImplemented(adapterFn: string) {
return async () => {
throw new Error(`the adapter function ${adapterFn} is not implemented`);
throw new MojisNotImplemented(`the adapter function ${red(adapterFn)} is not implemented`);
};
}

export default defineMojiAdapter({
name: "base",
description: "base adapter",
range: "*",
groups: async ({ version, force }) => {
if (version === "1.0" || version === "2.0" || version === "3.0") {
console.warn(`version ${version} does not have group data`);
return [];
metadata: async (ctx) => {
if (ctx.version === "1.0" || ctx.version === "2.0" || ctx.version === "3.0") {
console.warn(`version ${ctx.version} does not have group data`);
return {
groups: [],
emojiMetadata: {},
};
}

const groups = await fetchCache(`https://unicode.org/Public/emoji/${version}/emoji-test.txt`, {
cacheKey: `v${version}/metadata.json`,
return fetchCache(`https://unicode.org/Public/emoji/${ctx.version}/emoji-test.txt`, {
cacheKey: `v${ctx.version}/metadata.json`,
parser(data) {
const lines = data.split("\n");
let currentGroup: EmojiGroup | undefined;

const groups: EmojiGroup[] = [];

// [group-subgroup][hexcode] = metadata
const emojiMetadata: Record<string, Record<string, EmojiMetadata>> = {};

for (const line of lines) {
if (line.trim() === "") {
continue;
Expand All @@ -44,23 +52,72 @@ export default defineMojiAdapter({
currentGroup = group;

groups.push(group);

continue;
} else if (line.startsWith("# subgroup:")) {
const subgroupName = line.slice(11).trim();

if (currentGroup == null) {
throw new Error(`subgroup ${subgroupName} without group`);
}

currentGroup.subgroups.push(subgroupName);
currentGroup.subgroups.push(slugify(subgroupName));

continue;
} else if (line.startsWith("#")) {
continue;
}

const [baseHexcode, trailingLine] = line.split(";");

if (baseHexcode == null || trailingLine == null) {
throw new Error(`invalid line: ${line}`);
}

const [baseQualifier, comment] = trailingLine.split("#");

if (baseQualifier == null || comment == null) {
throw new Error(`invalid line: ${line}`);
}

const hexcode = baseHexcode.trim().replace(/\s+/g, "-");
const qualifier = baseQualifier.trim();

const emojiVersion = extractEmojiVersion(comment.trim());
const [emoji, trimmedComment] = comment.trim().split(` E${emojiVersion} `);

const groupName = currentGroup?.slug ?? "unknown";
const subgroupName = currentGroup?.subgroups[currentGroup.subgroups.length - 1] ?? "unknown";

const metadataGroup = `${groupName}-${subgroupName}`;

if (emojiMetadata[metadataGroup] == null) {
emojiMetadata[metadataGroup] = {};
}

emojiMetadata[metadataGroup][hexcode] = {
group: groupName,
subgroup: subgroupName,
qualifier,
emojiVersion: emojiVersion || null,
// TODO: use correct unicode version
unicodeVersion: extractUnicodeVersion(emojiVersion, "16.0"),
description: trimmedComment || "",
emoji: emoji || null,
hexcodes: hexcode.split("-"),
};
}

return groups;
return {
groups,
emojiMetadata,
};
},
bypassCache: force,
bypassCache: ctx.force,
});

return groups;
},
sequences: notImplemented("sequences"),
emojis: notImplemented("emojis"),
variations: notImplemented("variations"),
shortcodes: notImplemented("shortcodes"),
});
19 changes: 12 additions & 7 deletions src/adapter/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { EmojiGroup, EmojiSequence, EmojiVariation } from "../types";
import type { EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation } from "../types";
import semver from "semver";

export interface MojiAdapter {
Expand All @@ -22,11 +22,6 @@ export interface MojiAdapter {
*/
extend?: string;

/**
* A function to generate the emoji groups for the specified version.
*/
groups?: GroupFn;

/**
* A function to generate the emoji sequences for the specified version
*/
Expand All @@ -41,17 +36,27 @@ export interface MojiAdapter {
* A function to generate emoji variations for the specified version.
*/
variations?: EmojiVariationFn;

shortcodes?: ShortcodeFn;

metadata?: MetadataFn;
}

export interface BaseAdapterContext {
version: string;
force: boolean;
}

export type GroupFn = (ctx: BaseAdapterContext) => Promise<EmojiGroup[]>;
export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>;
export type EmojiFn = (ctx: BaseAdapterContext) => Promise<any>;
export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise<EmojiVariation[]>;
export type ShortcodeFn = (ctx: BaseAdapterContext & {
providers: string[];
}) => Promise<EmojiShortcode[]>;
export type MetadataFn = (ctx: BaseAdapterContext) => Promise<{
groups: EmojiGroup[];
emojiMetadata: Record<string, Record<string, EmojiMetadata>>;
}>;

export const ADAPTERS = new Map<string, MojiAdapter>();

Expand Down
70 changes: 67 additions & 3 deletions src/adapter/v16.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type { EmojiSequence, EmojiVariation } from "../types";
import type { EmojiData, EmojiSequence, EmojiVariation, Property } from "../types";
import { defineMojiAdapter } from "../adapter";
import { FEMALE_SIGN, MALE_SIGN } from "../constants";
import { extractEmojiVersion, extractUnicodeVersion } from "../utils";
import { fetchCache } from "../utils/cache";
import { expandHexRange } from "../utils/hexcode";

Expand Down Expand Up @@ -28,7 +29,7 @@ export default defineMojiAdapter({
const sequences: EmojiSequence[] = [];

for (let line of lines) {
// skip empty line & comments
// skip empty line & comments
if (line.trim() === "" || line.startsWith("#")) {
continue;
}
Expand Down Expand Up @@ -68,7 +69,70 @@ export default defineMojiAdapter({
zwj: zwj || [],
};
},
async emojis({ version, force }) {
async emojis(ctx) {
const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-data.txt`, {
cacheKey: `v${ctx.version}/emoji-data.json`,
parser(data) {
const lines = data.split("\n");

const emojiData: Record<string, EmojiData> = {};

for (const line of lines) {
// skip empty line & comments
if (line.trim() === "" || line.startsWith("#")) {
continue;
}

const lineCommentIndex = line.indexOf("#");
const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : "";

let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4);

if (hex == null || property == null) {
throw new Error(`invalid line: ${line}`);
}

// remove line comment from property
const propertyCommentIndex = property.indexOf("#");
if (propertyCommentIndex !== -1) {
property = property.slice(0, propertyCommentIndex).trim();
}

if (property === "Extended_Pictographic") {
continue;
}

const expandedHex = expandHexRange(hex);
const emojiVersion = extractEmojiVersion(lineComment);

const emoji: EmojiData = {
description: lineComment,
hexcode: "",
gender: null,
properties: [(property as Property) || "Emoji"],
// TODO: use correct unicode version
unicodeVersion: extractUnicodeVersion(emojiVersion, "16.0"),
emojiVersion,
};

for (const hex of expandedHex) {
if (emojiData[hex] != null) {
emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])];
} else {
emojiData[hex] = {
...emoji,
hexcode: hex.replace(/\s+/g, "-"),
};
}
}
}

return emojiData;
},
bypassCache: ctx.force,
});

return emojiData;
},
variations: async (ctx) => {
return fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-variation-sequences.txt`, {
Expand Down
Loading