Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shaping_differs to allow >=2 items, added defs for nl_Latn and tr_Latn #38

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 81 additions & 54 deletions shaperglot/checks/shaping_differs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from strictyaml import FixedSeq, Map, Int, Optional
from itertools import permutations
from strictyaml import Seq, Map, Int, Optional
from num2words import num2words

from .common import shaping_input_schema, ShaperglotCheck, check_schema
Expand All @@ -10,8 +11,8 @@ class ShapingDiffersCheck(ShaperglotCheck):
name = "shaping_differs"
schema = check_schema(
{
"inputs": FixedSeq([shaping_input_schema, shaping_input_schema]),
Optional("differs"): FixedSeq([cluster_schema, cluster_schema]),
"inputs": Seq(shaping_input_schema),
Optional("differs"): Seq(cluster_schema),
}
)

Expand All @@ -24,27 +25,91 @@ def describe(self):
desc += f" of the {num2words(1+int(differs['cluster']), to='ordinal')} cluster"
cluster_desc.append(desc)
result = (
f"{cluster_desc[0]} of the first output is different to "
f"{cluster_desc[1]} of the second output."
" is different to ".join(
[
f"{cluster_desc[i]} of the {num2words(1+i, to='ordinal')} output"
for i in range(len(cluster_desc))
]
)
+ "."
)
else:
result = "the outputs differ."
full_result = (
f"that, when {self.inputs[0].describe()}, and then {self.inputs[1].describe()}, "
) + result
full_result = f"that, when {self.inputs[0].describe()}, "
for input_index in range(1, len(self.inputs)):
full_result += f"and then {self.inputs[input_index].describe()}, "
full_result += result
if str(self.definition["rationale"]):
full_result += f" This is because {self.definition['rationale']}."
return full_result

def execute(self, checker):
buffers = [i.shape(checker) for i in self.inputs]
if "differs" not in self.definition:
# Any difference is OK
serialized_buf1 = checker.vharfbuzz.serialize_buf(buffers[0])
serialized_buf2 = checker.vharfbuzz.serialize_buf(buffers[1])
if serialized_buf1 != serialized_buf2:
# Additional validation to allow arbitrary number of inputs
assert len(self.inputs) >= 2, "shaping_differs check needs at least two inputs"
if "differs" in self.definition:
assert (
len(self.definition["differs"]) >= 2
), "shaping_differs check needs at least two 'differs' items"
assert len(self.definition["differs"]) == len(
self.inputs
), "'inputs' and 'differs' must have the same length"

# Build permutations of inputs and differs
input_permutations = list(permutations(self.inputs, 2))
differs_permutations = []
if "differs" in self.definition:
differs_permutations = list(permutations(self.definition["differs"], 2))

# Check each permutation
for permutation, inputs in enumerate(input_permutations):
buffers = [i.shape(checker) for i in inputs]
if "differs" not in self.definition:
# Any difference is OK
serialized_buf1 = checker.vharfbuzz.serialize_buf(buffers[0])
serialized_buf2 = checker.vharfbuzz.serialize_buf(buffers[1])
if serialized_buf1 != serialized_buf2:
checker.results.okay(
check_name="shaping-differs",
message=f"{self.definition['rationale']}",
)
else:
checker.results.fail(
check_name="shaping-differs",
result_code="shaping-did-not-differ",
message=f"{self.definition['rationale']}"
+ "; both buffers returned "
+ serialized_buf1,
context={
"input1": inputs[0].check_yaml,
"input2": inputs[1].check_yaml,
},
)
return
# We are looking for a specific difference
glyphs = []
for differs, buffer in zip(differs_permutations[permutation], buffers):
buffer = list(zip(buffer.glyph_infos, buffer.glyph_positions))
if "cluster" in differs:
buffer = [
x for x in buffer if x[0].cluster == int(differs["cluster"])
]
glyph_ix = int(differs["glyph"])
if len(buffer) - 1 < glyph_ix:
checker.results.fail(
check_name="shaping-differs",
result_code="too-few-glyphs",
message=f"Test asked for glyph {glyph_ix} but shaper only returned {len(buffer)} glyphs",
context={
"input1": inputs[0].check_yaml,
"input2": inputs[1].check_yaml,
},
)
return
glyphs.append((buffer[glyph_ix][0].codepoint, buffer[glyph_ix][1]))
if glyphs[0] != glyphs[1]:
checker.results.okay(
check_name="shaping-differs",
result_code="shaping-did-not-differ",
message=f"{self.definition['rationale']}",
)
else:
Expand All @@ -55,45 +120,7 @@ def execute(self, checker):
+ "; both buffers returned "
+ serialized_buf1,
context={
"input1": self.inputs[0].check_yaml,
"input2": self.inputs[0].check_yaml,
"input1": inputs[0].check_yaml,
"input2": inputs[1].check_yaml,
},
)
return
# We are looking for a specific difference
glyphs = []
for differs, buffer in zip(self.definition["differs"], buffers):
buffer = list(zip(buffer.glyph_infos, buffer.glyph_positions))
if "cluster" in differs:
buffer = [x for x in buffer if x[0].cluster == int(differs["cluster"])]
glyph_ix = int(differs["glyph"])
if len(buffer) - 1 < glyph_ix:
checker.results.fail(
check_name="shaping-differs",
result_code="too-few-glyphs",
message=f"Test asked for glyph {glyph_ix} but shaper only returned {len(buffer)} glyphs",
context={
"input1": self.inputs[0].check_yaml,
"input2": self.inputs[0].check_yaml,
},
)
return
glyphs.append((buffer[glyph_ix][0].codepoint, buffer[glyph_ix][1]))
if glyphs[0] != glyphs[1]:
checker.results.okay(
check_name="shaping-differs",
result_code="shaping-did-not-differ",
message=f"{self.definition['rationale']}",
)
else:
checker.results.fail(
check_name="shaping-differs",
result_code="shaping-did-not-differ",
message=f"{self.definition['rationale']}"
+ "; both buffers returned "
+ serialized_buf1,
context={
"input1": self.inputs[0].check_yaml,
"input2": self.inputs[0].check_yaml,
},
)
54 changes: 54 additions & 0 deletions shaperglot/languages/nl_Latn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
- check: shaping_differs
inputs:
- text: ij
language: NLD
- text: íj
language: NLD
- text: íj́
language: NLD
rationale: 'Dutch uses ij with 3 different accent variations'
- check: shaping_differs
inputs:
- text: IJ
language: NLD
- text: ÍJ
language: NLD
- text: ÍJ́
language: NLD
rationale: 'Dutch uses IJ with 3 different accent variations'
- check: shaping_differs
inputs:
- text: ij
language: NLD
features:
smcp: true
- text: íj
language: NLD
features:
smcp: true
- text: íj́
language: NLD
features:
smcp: true
conditions:
features:
- smcp
rationale: 'Dutch uses ij with 3 different accent variations'
- check: shaping_differs
inputs:
- text: IJ
language: NLD
features:
c2sc: true
- text: ÍJ
language: NLD
features:
c2sc: true
- text: ÍJ́
language: NLD
features:
c2sc: true
conditions:
features:
- c2sc
rationale: 'Dutch uses IJ with 3 different accent variations'
28 changes: 28 additions & 0 deletions shaperglot/languages/tr_Latn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
- check: shaping_differs
inputs:
- text: ı
language: TRK
features:
smcp: true
- text: i
language: TRK
features:
smcp: true
conditions:
features:
- smcp
rationale: 'Turkish has i and ı which must be uniquely-shaped even in small-caps (smcp)'
- check: shaping_differs
inputs:
- text: I
language: TRK
features:
c2sc: true
- text: İ
language: TRK
features:
c2sc: true
conditions:
features:
- c2sc
rationale: 'Turkish has I and İ which must be uniquely-shaped even in caps-to-small-caps (c2sc)'
Loading