Skip to content

Commit 36750ff

Browse files
committed
Fixes #86. "gethexaformat" messes up column widths
1 parent fa4fcc5 commit 36750ff

File tree

4 files changed

+138
-124
lines changed

4 files changed

+138
-124
lines changed

NStack/unicode/Rune.ColumnWidth.cs

+20-61
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44
//
55
using NStack;
66

7-
namespace System
8-
{
9-
public partial struct Rune
10-
{
11-
static uint[,] combining = new uint[,] {
7+
namespace System {
8+
public partial struct Rune {
9+
static uint [,] combining = new uint [,] {
1210
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
1311
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1412
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
@@ -48,16 +46,16 @@ public partial struct Rune
4846
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
4947
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
5048
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
51-
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2e9a, 0x2e9a },
52-
{ 0x2ef4, 0x2eff }, { 0x2fd6, 0x2fef }, { 0x2ffc, 0x2fff },
53-
{ 0x31e4, 0x31ef }, { 0x321f, 0x321f }, { 0xA48D, 0xA48F },
49+
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2E9A, 0x2E9A },
50+
{ 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
51+
{ 0x31E4, 0x31EF }, { 0x321F, 0x321F }, { 0xA48D, 0xA48F },
5452
{ 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
5553
{ 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE1A, 0xFE1F },
5654
{ 0xFE20, 0xFE23 }, { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 },
5755
{ 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
5856
};
5957

60-
static uint[,] combiningWideChars = new uint[,] {
58+
static uint [,] combiningWideChars = new uint [,] {
6159
/* Hangul Jamo init. consonants - 0x1100, 0x11ff */
6260
/* Miscellaneous Technical - 0x2300, 0x23ff */
6361
/* Hangul Syllables - 0x11a8, 0x11c2 */
@@ -84,22 +82,21 @@ public partial struct Rune
8482
{ 0x3131, 0x318e }, { 0x3190, 0x3247 }, { 0x3250, 0x4dbf },
8583
{ 0x4e00, 0xa4c6 }, { 0xa960, 0xa97c }, { 0xac00 ,0xd7a3 },
8684
{ 0xf900, 0xfaff }, { 0xfe10, 0xfe1f }, { 0xfe30 ,0xfe6b },
87-
{ 0xff01, 0xff60 }, { 0xffe0, 0xffe6 }
85+
{ 0xff01, 0xff60 }, { 0xffe0, 0xffe6 }, { 0x10000, 0x10ffff }
8886
};
8987

90-
static int bisearch(uint rune, uint[,] table, int max)
88+
static int bisearch (uint rune, uint [,] table, int max)
9189
{
9290
int min = 0;
9391
int mid;
9492

95-
if (rune < table[0, 0] || rune > table[max, 1])
93+
if (rune < table [0, 0] || rune > table [max, 1])
9694
return 0;
97-
while (max >= min)
98-
{
95+
while (max >= min) {
9996
mid = (min + max) / 2;
100-
if (rune > table[mid, 1])
97+
if (rune > table [mid, 1])
10198
min = mid + 1;
102-
else if (rune < table[mid, 0])
99+
else if (rune < table [mid, 0])
103100
max = mid - 1;
104101
else
105102
return 1;
@@ -127,82 +124,44 @@ static int bisearch(uint rune, uint[,] table, int max)
127124
// return false;
128125
//}
129126

130-
static uint gethexaformat(uint rune, int length)
131-
{
132-
var hex = rune.ToString($"x{length}");
133-
var hexstr = hex.Substring(hex.Length - length, length);
134-
return (uint)int.Parse(hexstr, System.Globalization.NumberStyles.HexNumber);
135-
}
136-
137127
/// <summary>
138128
/// Check if the rune is a non-spacing character.
139129
/// </summary>
140130
/// <param name="rune">The rune.</param>
141131
/// <returns>True if is a non-spacing character, false otherwise.</returns>
142-
public static bool IsNonSpacingChar(uint rune)
132+
public static bool IsNonSpacingChar (uint rune)
143133
{
144-
return bisearch(rune, combining, combining.GetLength(0) - 1) != 0;
134+
return bisearch (rune, combining, combining.GetLength (0) - 1) != 0;
145135
}
146136

147137
/// <summary>
148138
/// Check if the rune is a wide character.
149139
/// </summary>
150140
/// <param name="rune">The rune.</param>
151141
/// <returns>True if is a wide character, false otherwise.</returns>
152-
public static bool IsWideChar(uint rune)
142+
public static bool IsWideChar (uint rune)
153143
{
154-
return bisearch(gethexaformat(rune, 4), combiningWideChars, combiningWideChars.GetLength(0) - 1) != 0;
144+
return bisearch (rune, combiningWideChars, combiningWideChars.GetLength (0) - 1) != 0;
155145
}
156146

157-
static char firstSurrogatePairChar = '\0';
158-
159147
/// <summary>
160148
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
161149
/// </summary>
162150
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
163151
/// <param name="rune">The rune.</param>
164-
public static int ColumnWidth(Rune rune)
152+
public static int ColumnWidth (Rune rune)
165153
{
166-
if (firstSurrogatePairChar != '\0')
167-
firstSurrogatePairChar = '\0';
168154
uint irune = (uint)rune;
169155
if (irune < 0x20 || (irune >= 0x7f && irune < 0xa0))
170156
return -1;
171157
if (irune < 0x7f)
172158
return 1;
173159
/* binary search in table of non-spacing characters */
174-
if (bisearch(gethexaformat(irune, 4), combining, combining.GetLength(0) - 1) != 0)
160+
if (bisearch (irune, combining, combining.GetLength (0) - 1) != 0)
175161
return 0;
176162
/* if we arrive here, ucs is not a combining or C0/C1 control character */
177163
return 1 +
178-
(bisearch(gethexaformat(irune, 4), combiningWideChars, combiningWideChars.GetLength(0) - 1) != 0 ? 1 : 0);
179-
}
180-
181-
/// <summary>
182-
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
183-
/// </summary>
184-
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
185-
/// <param name="c">The char.</param>
186-
public static int ColumnWidth(char c)
187-
{
188-
if (!((Rune)c).IsValid)
189-
{
190-
if (firstSurrogatePairChar == '\0')
191-
{
192-
firstSurrogatePairChar = c;
193-
return 0;
194-
}
195-
else if (firstSurrogatePairChar != '\0')
196-
{
197-
var r = new Rune(firstSurrogatePairChar, c);
198-
firstSurrogatePairChar = '\0';
199-
return ColumnWidth(r);
200-
}
201-
}
202-
if (firstSurrogatePairChar != '\0')
203-
firstSurrogatePairChar = '\0';
204-
205-
return ColumnWidth((Rune)c);
164+
(bisearch (irune, combiningWideChars, combiningWideChars.GetLength (0) - 1) != 0 ? 1 : 0);
206165
}
207166
}
208167
}

NStack/unicode/Rune.cs

+50-37
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace System {
88
/// <remarks>
99
///
1010
/// </remarks>
11-
[StructLayout(LayoutKind.Sequential)]
11+
[StructLayout (LayoutKind.Sequential)]
1212
public partial struct Rune {
1313
// Stores the rune
1414
uint value;
@@ -54,9 +54,8 @@ public partial struct Rune {
5454
/// </remarks>
5555
public Rune (uint rune)
5656
{
57-
if (rune > maxRune)
58-
{
59-
throw new ArgumentOutOfRangeException("Value is beyond the supplementary range!");
57+
if (rune > maxRune) {
58+
throw new ArgumentOutOfRangeException ("Value is beyond the supplementary range!");
6059
}
6160
this.value = rune;
6261
}
@@ -77,43 +76,48 @@ public Rune (char ch)
7776
/// <param name="lowSurrogate">The low surrogate code point.</param>
7877
public Rune (uint highSurrogate, uint lowSurrogate)
7978
{
80-
if (EncodeSurrogatePair(highSurrogate, lowSurrogate, out Rune rune))
81-
{
79+
if (EncodeSurrogatePair (highSurrogate, lowSurrogate, out Rune rune)) {
8280
this.value = rune;
83-
}
84-
else if (highSurrogate < highSurrogateMin || lowSurrogate > lowSurrogateMax)
85-
{
86-
throw new ArgumentOutOfRangeException($"Must be between {highSurrogateMin:x} and {lowSurrogateMax:x} inclusive!");
87-
}
88-
else
89-
{
90-
throw new ArgumentOutOfRangeException($"Resulted rune must be less or equal to {(uint)MaxRune:x}!");
81+
} else if (highSurrogate < highSurrogateMin || lowSurrogate > lowSurrogateMax) {
82+
throw new ArgumentOutOfRangeException ($"Must be between {highSurrogateMin:x} and {lowSurrogateMax:x} inclusive!");
83+
} else {
84+
throw new ArgumentOutOfRangeException ($"Resulted rune must be less or equal to {(uint)MaxRune:x}!");
9185
}
9286
}
9387

9488
/// <summary>
9589
/// Gets a value indicating whether this <see cref="T:System.Rune"/> can be encoded as UTF-8
9690
/// </summary>
9791
/// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
98-
public bool IsValid => ValidRune(value);
92+
public bool IsValid => ValidRune (value);
9993

10094
/// <summary>
10195
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a surrogate code point.
10296
/// </summary>
10397
/// <returns><c>true</c>If is a surrogate code point, <c>false</c>otherwise.</returns>
104-
public bool IsSurrogate => IsSurrogateRune(value);
98+
public bool IsSurrogate => IsSurrogateRune (value);
10599

106100
/// <summary>
107101
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a valid surrogate pair.
108102
/// </summary>
109103
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
110-
public bool IsSurrogatePair => DecodeSurrogatePair(value, out _);
104+
public bool IsSurrogatePair => DecodeSurrogatePair (value, out _);
105+
106+
/// <summary>
107+
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a high surrogate.
108+
/// </summary>
109+
public bool IsHighSurrogate => value >= highSurrogateMin && value <= highSurrogateMax;
110+
111+
/// <summary>
112+
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a low surrogate.
113+
/// </summary>
114+
public bool IsLowSurrogate => value >= lowSurrogateMin && value <= lowSurrogateMax;
111115

112116
/// <summary>
113117
/// Check if the rune is a non-spacing character.
114118
/// </summary>
115119
/// <returns>True if is a non-spacing character, false otherwise.</returns>
116-
public bool IsNonSpacing => IsNonSpacingChar(value);
120+
public bool IsNonSpacing => IsNonSpacingChar (value);
117121

118122
// Code points in the surrogate range are not valid for UTF-8.
119123
const uint highSurrogateMin = 0xd800;
@@ -539,8 +543,7 @@ public static int InvalidIndex (byte [] buffer)
539543
public static bool ValidRune (Rune rune)
540544
{
541545
if ((0 <= (int)rune.value && rune.value < highSurrogateMin) ||
542-
(lowSurrogateMax < rune.value && rune.value <= MaxRune.value))
543-
{
546+
(lowSurrogateMax < rune.value && rune.value <= MaxRune.value)) {
544547
return true;
545548
}
546549

@@ -552,7 +555,7 @@ public static bool ValidRune (Rune rune)
552555
/// </summary>
553556
/// <param name="rune">The rune.</param>
554557
/// <returns><c>true</c>If is a surrogate code point, <c>false</c>otherwise.</returns>
555-
public static bool IsSurrogateRune(uint rune)
558+
public static bool IsSurrogateRune (uint rune)
556559
{
557560
return rune >= highSurrogateMin && rune <= lowSurrogateMax;
558561
}
@@ -564,12 +567,11 @@ public static bool IsSurrogateRune(uint rune)
564567
/// <param name="lowSurrogate">The low surrogate code point.</param>
565568
/// <param name="rune">The returning rune.</param>
566569
/// <returns><c>True</c>if the returning rune is greater than 0 <c>False</c>otherwise.</returns>
567-
public static bool EncodeSurrogatePair(uint highsurrogate, uint lowSurrogate, out Rune rune)
570+
public static bool EncodeSurrogatePair (uint highsurrogate, uint lowSurrogate, out Rune rune)
568571
{
569572
rune = 0;
570573
if (highsurrogate >= highSurrogateMin && highsurrogate <= highSurrogateMax &&
571-
lowSurrogate >= lowSurrogateMin && lowSurrogate <= lowSurrogateMax)
572-
{
574+
lowSurrogate >= lowSurrogateMin && lowSurrogate <= lowSurrogateMax) {
573575
//return 0x10000 + ((highsurrogate - highSurrogateMin) * 0x0400) + (lowSurrogate - lowSurrogateMin);
574576
return (rune = 0x10000 + ((highsurrogate - highSurrogateMin) << 10) + (lowSurrogate - lowSurrogateMin)) > 0;
575577
}
@@ -582,14 +584,13 @@ public static bool EncodeSurrogatePair(uint highsurrogate, uint lowSurrogate, ou
582584
/// <param name="rune">The rune</param>
583585
/// <param name="chars">The chars if is valid. Empty otherwise.</param>
584586
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
585-
public static bool DecodeSurrogatePair(uint rune, out char [] chars)
587+
public static bool DecodeSurrogatePair (uint rune, out char [] chars)
586588
{
587589
uint s = rune - 0x10000;
588590
uint h = highSurrogateMin + (s >> 10);
589591
uint l = lowSurrogateMin + (s & 0x3FF);
590592

591-
if (EncodeSurrogatePair (h, l, out Rune dsp) && dsp == rune)
592-
{
593+
if (EncodeSurrogatePair (h, l, out Rune dsp) && dsp == rune) {
593594
chars = new char [] { (char)h, (char)l };
594595
return true;
595596
}
@@ -603,13 +604,11 @@ public static bool DecodeSurrogatePair(uint rune, out char [] chars)
603604
/// <param name="str">The string.</param>
604605
/// <param name="chars">The chars if is valid. Empty otherwise.</param>
605606
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
606-
public static bool DecodeSurrogatePair(string str, out char [] chars)
607+
public static bool DecodeSurrogatePair (string str, out char [] chars)
607608
{
608-
if (str.Length == 2)
609-
{
610-
chars = str.ToCharArray();
611-
if (EncodeSurrogatePair(chars[0], chars[1], out _))
612-
{
609+
if (str.Length == 2) {
610+
chars = str.ToCharArray ();
611+
if (EncodeSurrogatePair (chars [0], chars [1], out _)) {
613612
return true;
614613
}
615614
}
@@ -622,9 +621,9 @@ public static bool DecodeSurrogatePair(string str, out char [] chars)
622621
/// </summary>
623622
/// <returns>The number of UTF8 bytes expected given the first prefix.</returns>
624623
/// <param name="firstByte">Is the first byte of a UTF8 sequence.</param>
625-
public static int ExpectedSizeFromFirstByte(byte firstByte)
624+
public static int ExpectedSizeFromFirstByte (byte firstByte)
626625
{
627-
var x = first[firstByte];
626+
var x = first [firstByte];
628627

629628
// Invalid runes, just return 1 for byte, and let higher level pass to print
630629
if (x == xx)
@@ -806,7 +805,7 @@ public static Rune To (Case toCase, Rune rune)
806805
{
807806
uint rval = rune.value;
808807
switch (toCase) {
809-
case Case.Lower:
808+
case Case.Lower:
810809
return new Rune (NStack.Unicode.To (NStack.Unicode.Case.Lower, rval));
811810
case Case.Title:
812811
return new Rune (NStack.Unicode.To (NStack.Unicode.Case.Title, rval));
@@ -874,6 +873,20 @@ public static Rune To (Case toCase, Rune rune)
874873
/// <param name="rune">Rune.</param>
875874
public static implicit operator uint (Rune rune) => rune.value;
876875

876+
/// <summary>
877+
/// Implicit operator conversion from a C# integer into a rune.
878+
/// </summary>
879+
/// <returns>Rune representing the C# integer</returns>
880+
/// <param name="value">32-bit Integer.</param>
881+
public static implicit operator Rune (int value) => new Rune ((uint)value);
882+
883+
/// <summary>
884+
/// Implicit operator conversion from a byte to an unsigned integer
885+
/// </summary>
886+
/// <returns>The unsigned integer representation.</returns>
887+
/// <param name="byt">Byte.</param>
888+
public static implicit operator Rune (byte byt) => new Rune (byt);
889+
877890
/// <summary>
878891
/// Implicit operator conversion from a C# char into a rune.
879892
/// </summary>
@@ -905,7 +918,7 @@ public override string ToString ()
905918
{
906919
var buff = new byte [4];
907920
var size = EncodeRune (this, buff, 0);
908-
return System.Text.Encoding.UTF8.GetString(buff, 0, size);
921+
return System.Text.Encoding.UTF8.GetString (buff, 0, size);
909922
}
910923

911924
/// <summary>

0 commit comments

Comments
 (0)