diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1800114 --- /dev/null +++ b/.gitignore @@ -0,0 +1,174 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc \ No newline at end of file diff --git a/koiwriter/output.svg b/koiwriter/output.svg deleted file mode 100644 index 9cdda28..0000000 --- a/koiwriter/output.svg +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/koiwriter/ripples/images/`.svg b/koiwriter/ripples/images/`.svg deleted file mode 100644 index 04931fc..0000000 --- a/koiwriter/ripples/images/`.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/a.svg b/koiwriter/ripples/images/a.svg deleted file mode 100644 index a67e482..0000000 --- a/koiwriter/ripples/images/a.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/ae.svg b/koiwriter/ripples/images/ae.svg deleted file mode 100644 index ba2ff3a..0000000 --- a/koiwriter/ripples/images/ae.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/ai.svg b/koiwriter/ripples/images/ai.svg deleted file mode 100644 index acb3a25..0000000 --- a/koiwriter/ripples/images/ai.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/au.svg b/koiwriter/ripples/images/au.svg deleted file mode 100644 index 82d1039..0000000 --- a/koiwriter/ripples/images/au.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/b.svg b/koiwriter/ripples/images/b.svg deleted file mode 100644 index 34cf0ee..0000000 --- a/koiwriter/ripples/images/b.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/base.svg b/koiwriter/ripples/images/base.svg deleted file mode 100644 index 90a46be..0000000 --- a/koiwriter/ripples/images/base.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/c.svg b/koiwriter/ripples/images/c.svg deleted file mode 100644 index 29d22e4..0000000 --- a/koiwriter/ripples/images/c.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/ch.svg b/koiwriter/ripples/images/ch.svg deleted file mode 100644 index d1a4457..0000000 --- a/koiwriter/ripples/images/ch.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/d.svg b/koiwriter/ripples/images/d.svg deleted file mode 100644 index c2e9236..0000000 --- a/koiwriter/ripples/images/d.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/e.svg b/koiwriter/ripples/images/e.svg deleted file mode 100644 index 63d8709..0000000 --- a/koiwriter/ripples/images/e.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/eu.svg b/koiwriter/ripples/images/eu.svg deleted file mode 100644 index 9ce42ba..0000000 --- a/koiwriter/ripples/images/eu.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/f.svg b/koiwriter/ripples/images/f.svg deleted file mode 100644 index e569ba0..0000000 --- a/koiwriter/ripples/images/f.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/g.svg b/koiwriter/ripples/images/g.svg deleted file mode 100644 index f3b56f5..0000000 --- a/koiwriter/ripples/images/g.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/h.svg b/koiwriter/ripples/images/h.svg deleted file mode 100644 index 67e0624..0000000 --- a/koiwriter/ripples/images/h.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/i.svg b/koiwriter/ripples/images/i.svg deleted file mode 100644 index ba92fbe..0000000 --- a/koiwriter/ripples/images/i.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/ii.svg b/koiwriter/ripples/images/ii.svg deleted file mode 100644 index e02d02d..0000000 --- a/koiwriter/ripples/images/ii.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/j.svg b/koiwriter/ripples/images/j.svg deleted file mode 100644 index 88bf779..0000000 --- a/koiwriter/ripples/images/j.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/k.svg b/koiwriter/ripples/images/k.svg deleted file mode 100644 index 1680799..0000000 --- a/koiwriter/ripples/images/k.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/kh.svg b/koiwriter/ripples/images/kh.svg deleted file mode 100644 index 0a814d5..0000000 --- a/koiwriter/ripples/images/kh.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/l.svg b/koiwriter/ripples/images/l.svg deleted file mode 100644 index da1a6d8..0000000 --- a/koiwriter/ripples/images/l.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/m.svg b/koiwriter/ripples/images/m.svg deleted file mode 100644 index f64cdac..0000000 --- a/koiwriter/ripples/images/m.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/n.svg b/koiwriter/ripples/images/n.svg deleted file mode 100644 index 1bc8898..0000000 --- a/koiwriter/ripples/images/n.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/o.svg b/koiwriter/ripples/images/o.svg deleted file mode 100644 index 9f3446f..0000000 --- a/koiwriter/ripples/images/o.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/oi.svg b/koiwriter/ripples/images/oi.svg deleted file mode 100644 index ba4ec52..0000000 --- a/koiwriter/ripples/images/oi.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/p.svg b/koiwriter/ripples/images/p.svg deleted file mode 100644 index ade9821..0000000 --- a/koiwriter/ripples/images/p.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/ph.svg b/koiwriter/ripples/images/ph.svg deleted file mode 100644 index c6f1367..0000000 --- a/koiwriter/ripples/images/ph.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/q.svg b/koiwriter/ripples/images/q.svg deleted file mode 100644 index c4569e4..0000000 --- a/koiwriter/ripples/images/q.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/r.svg b/koiwriter/ripples/images/r.svg deleted file mode 100644 index 5ffcd2c..0000000 --- a/koiwriter/ripples/images/r.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/s.svg b/koiwriter/ripples/images/s.svg deleted file mode 100644 index 030e978..0000000 --- a/koiwriter/ripples/images/s.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/sh.svg b/koiwriter/ripples/images/sh.svg deleted file mode 100644 index af07714..0000000 --- a/koiwriter/ripples/images/sh.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/t.svg b/koiwriter/ripples/images/t.svg deleted file mode 100644 index 86dd1f6..0000000 --- a/koiwriter/ripples/images/t.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/th.svg b/koiwriter/ripples/images/th.svg deleted file mode 100644 index 7394b1d..0000000 --- a/koiwriter/ripples/images/th.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/ts.svg b/koiwriter/ripples/images/ts.svg deleted file mode 100644 index 53a077d..0000000 --- a/koiwriter/ripples/images/ts.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/u.svg b/koiwriter/ripples/images/u.svg deleted file mode 100644 index e766c98..0000000 --- a/koiwriter/ripples/images/u.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/v.svg b/koiwriter/ripples/images/v.svg deleted file mode 100644 index 507ea17..0000000 --- a/koiwriter/ripples/images/v.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/vh.svg b/koiwriter/ripples/images/vh.svg deleted file mode 100644 index 256357a..0000000 --- a/koiwriter/ripples/images/vh.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/w.svg b/koiwriter/ripples/images/w.svg deleted file mode 100644 index f24c304..0000000 --- a/koiwriter/ripples/images/w.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/x.svg b/koiwriter/ripples/images/x.svg deleted file mode 100644 index 2046080..0000000 --- a/koiwriter/ripples/images/x.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/y.svg b/koiwriter/ripples/images/y.svg deleted file mode 100644 index 3688cda..0000000 --- a/koiwriter/ripples/images/y.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/koiwriter/ripples/images/z.svg b/koiwriter/ripples/images/z.svg deleted file mode 100644 index 2a931b9..0000000 --- a/koiwriter/ripples/images/z.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9f83c89 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pandas>=2.2.3 +openpyxl>=3.1.5 \ No newline at end of file diff --git a/koiwriter/koilang.xlsx b/src/koilang.xlsx similarity index 100% rename from koiwriter/koilang.xlsx rename to src/koilang.xlsx diff --git a/koiwriter/koiwriter.ipynb b/src/koiwriter.ipynb similarity index 59% rename from koiwriter/koiwriter.ipynb rename to src/koiwriter.ipynb index 8bbe1cb..c43edce 100644 --- a/koiwriter/koiwriter.ipynb +++ b/src/koiwriter.ipynb @@ -17,14 +17,17 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import json\n", "import pandas as pd\n", "from pathlib import Path\n", - "from typing import List, Union" + "from typing import List, Union\n", + "from utils.tokenization import rules_for_tokens\n", + "from utils.orientation import choose_orientation\n", + "from utils.drawing import create_drawing_XML" ] }, { @@ -36,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -191,7 +194,7 @@ "[2526 rows x 5 columns]" ] }, - "execution_count": 25, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -210,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -220,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -254,10 +257,10 @@ "\n", "Chosen translation:\n", " Word Pronunciation POS \\\n", - "1 aesiivh esɪβ v \n", + "4 li'enak liʔɛnɑk n \n", "\n", " Meaning Verb Class \n", - "1 sink; drown; go below; slip under; under(groun... 3 \n" + "4 lizard (lit. sunbathing shield (shield often r... animal \n" ] } ], @@ -315,91 +318,16 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "def rules_for_tokens(word: str) -> List[str]:\n", - " \"\"\"All the words in Koilang only have at most two characters to represent a ripple, \n", - " and it's either a vowel or \"h\", so separating a word into tokens that can map to\n", - " the ripples is very straightforward. If the language evolves with more complex rules,\n", - " I recommend switching to using a parser like PEST or something similar.\n", - "\n", - " Args:\n", - " word (str): Word to tokenize\n", - "\n", - " Returns:\n", - " List[str]: List of tokens as strings\n", - " \"\"\"\n", - " def both_chars_go_together(char_before: str, char_now: str) -> bool:\n", - " # CONSONANTS\n", - " # Ending in 'h'\n", - " if char_now == 'h' and char_before in ['c', 'k', 'p', 's', 't', 'v', ]:\n", - " return True\n", - " # Ending in 's'\n", - " if char_now == 's' and char_before in ['t']:\n", - " return True\n", - " \n", - " # VOWELS\n", - " # Ending in 'e'\n", - " if char_now == 'e' and char_before in ['a']:\n", - " return True\n", - " \n", - " # Ending in 'i'\n", - " if char_now == 'i' and char_before in ['a', 'i', 'o']:\n", - " return True\n", - " \n", - " # Ending in 'e'\n", - " if char_now == 'u' and char_before in ['a', 'e']:\n", - " return True\n", - "\n", - " # None of the previous conditions where met\n", - " return False\n", - " word = word.strip().lower()\n", - " \n", - " tokens = []\n", - " \n", - " # Start at second char\n", - " i = 1\n", - " word_length = len(word)\n", - " last_char_index = word_length - 1\n", - " while i < word_length:\n", - " \n", - " char_before = word[i-1]\n", - " char_now = word[i]\n", - " \n", - " # Add both chars as one token or just add char_before\n", - " if both_chars_go_together(char_before, char_now):\n", - " tokens.append(f'{char_before}{char_now}')\n", - " \n", - " if i + 1 == last_char_index: # If only one char left after char_now\n", - " tokens.append(word[i+1]) # Just add that last char\n", - " break # End tokenizing\n", - " else:\n", - " i += 1 # Skip char_now becoming char_before in the next iteration\n", - " \n", - " else:\n", - " tokens.append(char_before) # Add char normally\n", - " if i == last_char_index: # If char_now is the last char in the word\n", - " tokens.append(char_now)\n", - " \n", - " i += 1\n", - " \n", - " return tokens" - ] - }, - { - "cell_type": "code", - "execution_count": 29, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['ae', 's', 'ii', 'vh']" + "['l', 'i', '`', 'e', 'n', 'a', 'k']" ] }, - "execution_count": 29, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -407,9 +335,6 @@ "source": [ "tokens = rules_for_tokens(chosen_translation[\"Word\"].values[0])\n", "# tokens = rules_for_tokens(\"Tsevhu\")\n", - "for i, t in enumerate(tokens):\n", - " if t == \"'\":\n", - " tokens[i] = \"`\"\n", "tokens" ] }, @@ -422,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -433,100 +358,34 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Get how to translate the orientations\n", + "with open(Path(\"ripples/translate_orientation.json\")) as f:\n", + " translate_orientation = json.loads(f.read())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[0, 1, 2, 3]" + "[0, 3, 0, 1, 0, 2, 2]" ] }, - "execution_count": 31, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "translate_orientation = {\n", - " \"0\": {\n", - " \"N\": \"N\",\n", - " \"NE\": \"NE\",\n", - " \"E\": \"E\",\n", - " \"SE\": \"SE\",\n", - " \"S\": \"S\",\n", - " \"SW\": \"SW\",\n", - " \"W\": \"W\",\n", - " \"NW\": \"NW\"\n", - " },\n", - " \"1\": {\n", - " \"N\": \"E\",\n", - " \"NE\": \"SE\",\n", - " \"E\": \"S\",\n", - " \"SE\": \"SW\",\n", - " \"S\": \"W\",\n", - " \"SW\": \"NW\",\n", - " \"W\": \"N\",\n", - " \"NW\": \"NE\"\n", - " },\n", - " \"2\": {\n", - " \"N\": \"S\",\n", - " \"NE\": \"SW\",\n", - " \"E\": \"W\",\n", - " \"SE\": \"NW\",\n", - " \"S\": \"N\",\n", - " \"SW\": \"NE\",\n", - " \"W\": \"E\",\n", - " \"NW\": \"SE\"\n", - " },\n", - " \"3\": {\n", - " \"N\": \"W\",\n", - " \"NE\": \"NW\",\n", - " \"E\": \"N\",\n", - " \"SE\": \"NE\",\n", - " \"S\": \"E\",\n", - " \"SW\": \"SE\",\n", - " \"W\": \"S\",\n", - " \"NW\": \"SW\"\n", - " },\n", - "}\n", - "\n", "# Choose orientations based on densities\n", - "orientations = [] \n", - "previous_dense_above_dir = None\n", - "previous_quarters = None\n", - "\n", - "for i, token in enumerate(tokens):\n", - " if i == 0:\n", - " # Only need to record dense above direction as is. The first token is never rotated.\n", - " previous_dense_above_dir: List[str] = density_data[token]['dense_above_dir']\n", - " previous_quarters: int = density_data[token]['quarters']\n", - " orientations.append(0)\n", - " continue\n", - " \n", - " # Have a temporary orientation which starts where the previous ripple ended\n", - " temp_orientation = (orientations[i-1] + previous_quarters) % 4\n", - " \n", - " # CHOOSE ORIENTATION BASE ON DENSITY\n", - " # Where it is dense below, it must not coincide with where the previous one is dense above\n", - " dense_below_dir: List[str] = density_data[token]['dense_below_dir']\n", - " chosen_orientation = 0\n", - " for orientation in range(4):\n", - " temp_dense_below_dir = [translate_orientation[str((orientation + temp_orientation) % 4)][d] for d in dense_below_dir]\n", - " \n", - " # Check intersection. Assumes orientation choices have been taken into consideration\n", - " common_dirs = set(temp_dense_below_dir).intersection(set(previous_dense_above_dir))\n", - " if not common_dirs:\n", - " # No clash, valid orientation found\n", - " chosen_orientation = (orientation + temp_orientation) % 4\n", - " break\n", - " \n", - " orientations.append(chosen_orientation)\n", - " \n", - " # Keep information to help next token orientation\n", - " previous_dense_above_dir = [translate_orientation[str(chosen_orientation)][d] for d in density_data[token]['dense_above_dir']]\n", - " previous_quarters: int = density_data[token]['quarters']\n", - "\n", + "orientations = choose_orientation(tokens, density_data, translate_orientation)\n", "orientations\n", " \n", " " @@ -541,38 +400,12 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from xml.dom.minidom import parse\n", - "\n", - "# Create the output file\n", - "with open(Path(\"output.svg\"), 'w') as output_f:\n", - " view_box_val = 500\n", - " view_box_val2 = 500\n", - " output_f.write(f'\\n')\n", - " \n", - " # Obtain the SVG files for the tokens and add it to the image\n", - " num_tokens = len(tokens)\n", - " for i, token in enumerate(tokens):\n", - " doc = parse(str(Path(f\"ripples/images/{token}.svg\")))\n", - " scale_value = 0.65 ** (num_tokens - (i + 1))\n", - " \n", - " output_f.write(f'\\t\\n')\n", - " for child_elem in doc.getElementsByTagName(\"path\"):\n", - " output_f.write(f'\\t\\t{child_elem.toprettyxml()}\\n')\n", - " output_f.write(f'\\t\\n')\n", - " \n", - " output_f.write('\\n')" + "create_drawing_XML(tokens, orientations, path_to_ripple_images=Path('./ripples/images'), scale=0.8, stroke_width=6).write('output.svg')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -591,7 +424,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/src/output.svg b/src/output.svg new file mode 100644 index 0000000..73eb263 --- /dev/null +++ b/src/output.svg @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/koiwriter/ripples/density_data.json b/src/ripples/density_data.json similarity index 100% rename from koiwriter/ripples/density_data.json rename to src/ripples/density_data.json diff --git a/src/ripples/images/`.svg b/src/ripples/images/`.svg new file mode 100644 index 0000000..e0320a4 --- /dev/null +++ b/src/ripples/images/`.svg @@ -0,0 +1,10 @@ + + + + \ No newline at end of file diff --git a/src/ripples/images/a.svg b/src/ripples/images/a.svg new file mode 100644 index 0000000..9ed7ec3 --- /dev/null +++ b/src/ripples/images/a.svg @@ -0,0 +1,21 @@ + + + + + + + + \ No newline at end of file diff --git a/src/ripples/images/ae.svg b/src/ripples/images/ae.svg new file mode 100644 index 0000000..036c402 --- /dev/null +++ b/src/ripples/images/ae.svg @@ -0,0 +1,10 @@ + + + + \ No newline at end of file diff --git a/src/ripples/images/ai.svg b/src/ripples/images/ai.svg new file mode 100644 index 0000000..21de76c --- /dev/null +++ b/src/ripples/images/ai.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/au.svg b/src/ripples/images/au.svg new file mode 100644 index 0000000..b2cc455 --- /dev/null +++ b/src/ripples/images/au.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/b.svg b/src/ripples/images/b.svg new file mode 100644 index 0000000..d508256 --- /dev/null +++ b/src/ripples/images/b.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/base.svg b/src/ripples/images/base.svg new file mode 100644 index 0000000..df3dd05 --- /dev/null +++ b/src/ripples/images/base.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/c.svg b/src/ripples/images/c.svg new file mode 100644 index 0000000..5d002a8 --- /dev/null +++ b/src/ripples/images/c.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/ch.svg b/src/ripples/images/ch.svg new file mode 100644 index 0000000..7aff701 --- /dev/null +++ b/src/ripples/images/ch.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/d.svg b/src/ripples/images/d.svg new file mode 100644 index 0000000..cd63e99 --- /dev/null +++ b/src/ripples/images/d.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/e.svg b/src/ripples/images/e.svg new file mode 100644 index 0000000..e5f7e56 --- /dev/null +++ b/src/ripples/images/e.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/eu.svg b/src/ripples/images/eu.svg new file mode 100644 index 0000000..9e00d45 --- /dev/null +++ b/src/ripples/images/eu.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/f.svg b/src/ripples/images/f.svg new file mode 100644 index 0000000..5231b46 --- /dev/null +++ b/src/ripples/images/f.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/g.svg b/src/ripples/images/g.svg new file mode 100644 index 0000000..f3d644c --- /dev/null +++ b/src/ripples/images/g.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/h.svg b/src/ripples/images/h.svg new file mode 100644 index 0000000..1e41a7d --- /dev/null +++ b/src/ripples/images/h.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/i.svg b/src/ripples/images/i.svg new file mode 100644 index 0000000..135072d --- /dev/null +++ b/src/ripples/images/i.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/ii.svg b/src/ripples/images/ii.svg new file mode 100644 index 0000000..c0880bd --- /dev/null +++ b/src/ripples/images/ii.svg @@ -0,0 +1,7 @@ + + + \ No newline at end of file diff --git a/src/ripples/images/j.svg b/src/ripples/images/j.svg new file mode 100644 index 0000000..bf0162b --- /dev/null +++ b/src/ripples/images/j.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/k.svg b/src/ripples/images/k.svg new file mode 100644 index 0000000..aa263cb --- /dev/null +++ b/src/ripples/images/k.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/kh.svg b/src/ripples/images/kh.svg new file mode 100644 index 0000000..1c63983 --- /dev/null +++ b/src/ripples/images/kh.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/l.svg b/src/ripples/images/l.svg new file mode 100644 index 0000000..3dbf17f --- /dev/null +++ b/src/ripples/images/l.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/m.svg b/src/ripples/images/m.svg new file mode 100644 index 0000000..a22ac98 --- /dev/null +++ b/src/ripples/images/m.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/n.svg b/src/ripples/images/n.svg new file mode 100644 index 0000000..219d28e --- /dev/null +++ b/src/ripples/images/n.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/o.svg b/src/ripples/images/o.svg new file mode 100644 index 0000000..3f91fe1 --- /dev/null +++ b/src/ripples/images/o.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/oi.svg b/src/ripples/images/oi.svg new file mode 100644 index 0000000..1031607 --- /dev/null +++ b/src/ripples/images/oi.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/p.svg b/src/ripples/images/p.svg new file mode 100644 index 0000000..8887080 --- /dev/null +++ b/src/ripples/images/p.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/ph.svg b/src/ripples/images/ph.svg new file mode 100644 index 0000000..941ab00 --- /dev/null +++ b/src/ripples/images/ph.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/q.svg b/src/ripples/images/q.svg new file mode 100644 index 0000000..f7e1251 --- /dev/null +++ b/src/ripples/images/q.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/r.svg b/src/ripples/images/r.svg new file mode 100644 index 0000000..5eab2b4 --- /dev/null +++ b/src/ripples/images/r.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/s.svg b/src/ripples/images/s.svg new file mode 100644 index 0000000..e286220 --- /dev/null +++ b/src/ripples/images/s.svg @@ -0,0 +1,14 @@ + + + + + \ No newline at end of file diff --git a/src/ripples/images/sh.svg b/src/ripples/images/sh.svg new file mode 100644 index 0000000..bfc1afb --- /dev/null +++ b/src/ripples/images/sh.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/t.svg b/src/ripples/images/t.svg new file mode 100644 index 0000000..7bd7cb5 --- /dev/null +++ b/src/ripples/images/t.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/th.svg b/src/ripples/images/th.svg new file mode 100644 index 0000000..8bed1e7 --- /dev/null +++ b/src/ripples/images/th.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/ts.svg b/src/ripples/images/ts.svg new file mode 100644 index 0000000..2084423 --- /dev/null +++ b/src/ripples/images/ts.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/u.svg b/src/ripples/images/u.svg new file mode 100644 index 0000000..d804501 --- /dev/null +++ b/src/ripples/images/u.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/v.svg b/src/ripples/images/v.svg new file mode 100644 index 0000000..bf358ce --- /dev/null +++ b/src/ripples/images/v.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/vh.svg b/src/ripples/images/vh.svg new file mode 100644 index 0000000..fba2d3a --- /dev/null +++ b/src/ripples/images/vh.svg @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/src/ripples/images/w.svg b/src/ripples/images/w.svg new file mode 100644 index 0000000..bfbf39c --- /dev/null +++ b/src/ripples/images/w.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/x.svg b/src/ripples/images/x.svg new file mode 100644 index 0000000..bbf22c8 --- /dev/null +++ b/src/ripples/images/x.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/y.svg b/src/ripples/images/y.svg new file mode 100644 index 0000000..a8e0128 --- /dev/null +++ b/src/ripples/images/y.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/images/z.svg b/src/ripples/images/z.svg new file mode 100644 index 0000000..8762924 --- /dev/null +++ b/src/ripples/images/z.svg @@ -0,0 +1 @@ + diff --git a/src/ripples/translate_orientation.json b/src/ripples/translate_orientation.json new file mode 100644 index 0000000..53ea708 --- /dev/null +++ b/src/ripples/translate_orientation.json @@ -0,0 +1,42 @@ +{ + "0": { + "N": "N", + "NE": "NE", + "E": "E", + "SE": "SE", + "S": "S", + "SW": "SW", + "W": "W", + "NW": "NW" + }, + "1": { + "N": "E", + "NE": "SE", + "E": "S", + "SE": "SW", + "S": "W", + "SW": "NW", + "W": "N", + "NW": "NE" + }, + "2": { + "N": "S", + "NE": "SW", + "E": "W", + "SE": "NW", + "S": "N", + "SW": "NE", + "W": "E", + "NW": "SE" + }, + "3": { + "N": "W", + "NE": "NW", + "E": "N", + "SE": "NE", + "S": "E", + "SW": "SE", + "W": "S", + "NW": "SW" + } +} \ No newline at end of file diff --git a/src/utils/drawing.py b/src/utils/drawing.py new file mode 100644 index 0000000..8df94c2 --- /dev/null +++ b/src/utils/drawing.py @@ -0,0 +1,54 @@ +from pathlib import Path +import xml.etree.ElementTree as ET + + +def create_drawing_XML( + tokens: list[str], + orientations: list[int], + view_box_val: int = 500, + scale: float = 0.65, + stroke_width: int = 5, + path_to_ripple_images: Path = Path(f'src/ripples/images') +) -> ET.Element: + element_to_return: ET.ElementTree = ET.ElementTree( + ET.fromstring( + f''' + + + ''' + ) + ) + element_to_return_root: ET.Element = element_to_return.getroot() + + num_tokens: int = len(tokens) + for i, token in enumerate(tokens): + tokenElement: ET.Element = ET.parse(str(path_to_ripple_images / Path(f"{token}.svg"))).getroot() + scale_value = scale ** (num_tokens - (i + 1)) + tokenElement.tag = 'g' + tokenElement.attrib = { + 'id': f'{token}_{i}', + 'xmlns': 'http://www.w3.org/2000/svg', + 'transform-origin': f'{view_box_val//2} {view_box_val//2}', + 'transform': f'scale({scale_value}) rotate({90 * orientations[i]})', + } + + _enforce_non_scaling_stroke_and_stroke_width_on_path_elements(tokenElement, stroke_width) + + + element_to_return_root.append(tokenElement) + + + return element_to_return + +def _enforce_non_scaling_stroke_and_stroke_width_on_path_elements(element: ET.Element, stroke_width: int): + for child in element: + if child.tag == 'path': + child.attrib = child.attrib | { + 'vector-effect': 'non-scaling-stroke', # Important to enforce + 'stroke-width': str(stroke_width) + } + _enforce_non_scaling_stroke_and_stroke_width_on_path_elements(child, stroke_width) + + +if __name__ == "__main__": + create_drawing_XML(['ae', 's', 'ii', 'vh'], [0, 1, 2, 3]).write("output1.svg") diff --git a/src/utils/orientation.py b/src/utils/orientation.py new file mode 100644 index 0000000..7f2c2b6 --- /dev/null +++ b/src/utils/orientation.py @@ -0,0 +1,37 @@ +def choose_orientation(tokens: list[str], density_data: dict, translate_orientation: dict) -> list[int]: + orientations = [] + previous_dense_above_dir = None + previous_num_quarters_occupying = None + + for i, token in enumerate(tokens): + if i == 0: + # Only need to record dense above direction as is. The first token is never rotated. + previous_dense_above_dir: list[str] = density_data[token]['dense_above_dir'] + previous_num_quarters_occupying: int = density_data[token]['quarters'] + orientations.append(0) + continue + + # Have a temporary orientation which starts where the previous ripple ended + temp_orientation = (orientations[i-1] + previous_num_quarters_occupying) % 4 + + # CHOOSE ORIENTATION BASE ON DENSITY + # Where it is dense below, it must not coincide with where the previous one is dense above + dense_below_dir: list[str] = density_data[token]['dense_below_dir'] + chosen_orientation = 0 + for orientation in range(4): + temp_dense_below_dir = [translate_orientation[str((orientation + temp_orientation) % 4)][d] for d in dense_below_dir] + + # Check intersection. Assumes orientation choices have been taken into consideration + common_dirs = set(temp_dense_below_dir).intersection(set(previous_dense_above_dir)) + if not common_dirs: + # No clash, valid orientation found + chosen_orientation = (orientation + temp_orientation) % 4 + break + + orientations.append(chosen_orientation) + + # Keep information to help next token orientation + previous_dense_above_dir = [translate_orientation[str(chosen_orientation)][d] for d in density_data[token]['dense_above_dir']] + previous_num_quarters_occupying: int = density_data[token]['quarters'] + + return orientations \ No newline at end of file diff --git a/src/utils/tokenization.py b/src/utils/tokenization.py new file mode 100644 index 0000000..28c2e03 --- /dev/null +++ b/src/utils/tokenization.py @@ -0,0 +1,72 @@ +def rules_for_tokens(word: str) -> list[str]: + """All the words in Koilang only have at most two characters to represent a ripple, + and it's either a vowel or "h", so separating a word into tokens that can map to + the ripples is very straightforward. If the language evolves with more complex rules, + I recommend switching to using a parser like PEST or something similar. + + Args: + word (str): Word to tokenize + + Returns: + List[str]: List of tokens as strings + """ + def both_chars_go_together(char_before: str, char_now: str) -> bool: + # CONSONANTS + # Ending in 'h' + if char_now == 'h' and char_before in ['c', 'k', 'p', 's', 't', 'v', ]: + return True + # Ending in 's' + if char_now == 's' and char_before in ['t']: + return True + + # VOWELS + # Ending in 'e' + if char_now == 'e' and char_before in ['a']: + return True + + # Ending in 'i' + if char_now == 'i' and char_before in ['a', 'i', 'o']: + return True + + # Ending in 'e' + if char_now == 'u' and char_before in ['a', 'e']: + return True + + # None of the previous conditions where met + return False + word = word.strip().lower() + + tokens = [] + + # Start at second char + i = 1 + word_length = len(word) + last_char_index = word_length - 1 + while i < word_length: + + char_before = word[i-1] + char_now = word[i] + + # Add both chars as one token or just add char_before + if both_chars_go_together(char_before, char_now): + tokens.append(f'{char_before}{char_now}') + + if i + 1 == last_char_index: # If only one char left after char_now + tokens.append(word[i+1]) # Just add that last char + break # End tokenizing + else: + i += 1 # Skip char_now becoming char_before in the next iteration + + else: + tokens.append(char_before) # Add char normally + if i == last_char_index: # If char_now is the last char in the word + tokens.append(char_now) + + i += 1 + + # Overwrite single tick to backtick + for i, t in enumerate(tokens): + if t == "'": + tokens[i] = "`" + + return tokens \ No newline at end of file