Skip to content

Commit

Permalink
bugzilla 4389 ICE(constfold.c, expression.c), or wrong code: string~=…
Browse files Browse the repository at this point in the history
…dchar in CTFE
  • Loading branch information
Walter Bright committed Dec 28, 2010
1 parent 70889ad commit 5d63a25
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 6 deletions.
16 changes: 11 additions & 5 deletions src/constfold.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "expression.h"
#include "aggregate.h"
#include "declaration.h"
#include "utf.h"

#if __FreeBSD__
#define fmodl fmod // hack for now, fix later
Expand Down Expand Up @@ -1353,23 +1354,28 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)

if (e1->op == TOKnull && (e2->op == TOKint64 || e2->op == TOKstructliteral))
{ e = e2;
t = t1;
goto L2;
}
else if ((e1->op == TOKint64 || e1->op == TOKstructliteral) && e2->op == TOKnull)
{ e = e1;
t = t2;
L2:
Type *tn = e->type->toBasetype();
if (tn->ty == Tchar || tn->ty == Twchar || tn->ty == Tdchar)
{
// Create a StringExp
void *s;
StringExp *es;
size_t len = 1;
int sz = tn->size();
if (t->nextOf())
t = t->nextOf()->toBasetype();
int sz = t->size();

dinteger_t v = e->toInteger();

size_t len = utf_codeLength(sz, v);
s = mem.malloc((len + 1) * sz);
memcpy((unsigned char *)s, &v, sz);
utf_encode(sz, s, v);

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
Expand Down Expand Up @@ -1459,13 +1465,13 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
StringExp *es1 = (StringExp *)e1;
StringExp *es;
Type *t;
size_t len = es1->len + 1;
int sz = es1->sz;
dinteger_t v = e2->toInteger();

size_t len = es1->len + utf_codeLength(sz, v);
s = mem.malloc((len + 1) * sz);
memcpy(s, es1->string, es1->len * sz);
memcpy((unsigned char *)s + es1->len * sz, &v, sz);
utf_encode(sz, (unsigned char *)s + (sz * es1->len), v);

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
Expand Down
91 changes: 91 additions & 0 deletions src/utf.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8

#include <stdio.h>
#include <string.h>
#include <assert.h>

#include "utf.h"
Expand Down Expand Up @@ -227,3 +228,93 @@ const char *utf_decodeWchar(unsigned short *s, size_t len, size_t *pidx, dchar_t
return msg;
}

void utf_encodeChar(unsigned char *s, dchar_t c)
{
if (c <= 0x7F)
{
s[0] = (char) c;
}
else if (c <= 0x7FF)
{
s[0] = (char)(0xC0 | (c >> 6));
s[1] = (char)(0x80 | (c & 0x3F));
}
else if (c <= 0xFFFF)
{
s[0] = (char)(0xE0 | (c >> 12));
s[1] = (char)(0x80 | ((c >> 6) & 0x3F));
s[2] = (char)(0x80 | (c & 0x3F));
}
else if (c <= 0x10FFFF)
{
s[0] = (char)(0xF0 | (c >> 18));
s[1] = (char)(0x80 | ((c >> 12) & 0x3F));
s[2] = (char)(0x80 | ((c >> 6) & 0x3F));
s[3] = (char)(0x80 | (c & 0x3F));
}
else
assert(0);
}

void utf_encodeWchar(unsigned short *s, dchar_t c)
{
if (c <= 0xFFFF)
{
s[0] = (wchar_t) c;
}
else
{
s[0] = (wchar_t) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
s[1] = (wchar_t) (((c - 0x10000) & 0x3FF) + 0xDC00);
}
}


/**
* Returns the code length of c in the encoding.
* The code is returned in character count, not in bytes.
*/

int utf_codeLengthChar(dchar_t c)
{
return
c <= 0x7F ? 1
: c <= 0x7FF ? 2
: c <= 0xFFFF ? 3
: c <= 0x10FFFF ? 4
: (assert(false), 6);
}

int utf_codeLengthWchar(dchar_t c)
{
return c <= 0xFFFF ? 1 : 2;
}

/**
* Returns the code length of c in the encoding.
* sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32.
* The code is returned in character count, not in bytes.
*/
int utf_codeLength(int sz, dchar_t c)
{
if (sz == 1)
return utf_codeLengthChar(c);
if (sz == 2)
return utf_codeLengthWchar(c);
assert(sz == 4);
return 1;
}

void utf_encode(int sz, void *s, dchar_t c)
{
if (sz == 1)
utf_encodeChar((unsigned char *)s, c);
else if (sz == 2)
utf_encodeWchar((unsigned short *)s, c);
else
{
assert(sz == 4);
memcpy((unsigned char *)s, &c, sz);
}
}

11 changes: 10 additions & 1 deletion src/utf.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Compiler implementation of the D programming language
// utf.h
// Copyright (c) 2003-2008 by Digital Mars
// Copyright (c) 2003-2010 by Digital Mars
// All Rights Reserved
// written by Walter Bright
// http://www.digitalmars.com
Expand All @@ -23,4 +23,13 @@ const char *utf_validateString(unsigned char *s, size_t len);

extern int isUniAlpha(dchar_t);

void utf_encodeChar(unsigned char *s, dchar_t c);
void utf_encodeWchar(unsigned short *s, dchar_t c);

int utf_codeLengthChar(dchar_t c);
int utf_codeLengthWchar(dchar_t c);

int utf_codeLength(int sz, dchar_t c);
void utf_encode(int sz, void *s, dchar_t c);

#endif

0 comments on commit 5d63a25

Please sign in to comment.