Skip to content

Commit 6ba5f23

Browse files
GyumeijieGyumeijie
Gyumeijie
authored and
Gyumeijie
committed
feat: initial success for processing float literal in lexical analysis phase
1 parent bd34b3f commit 6ba5f23

File tree

3 files changed

+142
-36
lines changed

3 files changed

+142
-36
lines changed

Diff for: lex.c

+122-36
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
#include "lex.h"
22
#include <stdio.h>
3+
#include <string.h>
34
#include <stdlib.h>
45

56
static const char* src;
67
static int *symbols;
78

8-
// fields of identifier
9+
// 标识符的描述信息
910
enum {Token, Hash, Name, Type, Class, Value, BType, BClass, BValue, IdSize};
1011

11-
// types of variable/function
12+
// 变量或者函数类型
1213
enum { CHAR, INT, PTR };
1314

14-
// type of declaration.
15+
//声明类型
1516
enum {Global, Local, Extern};
1617

1718

@@ -39,71 +40,94 @@ void next() {
3940
}
4041

4142
else if (token == '#') {
42-
// skip macro, because we will not support it
43+
//跳过宏定义,因为不支持
4344
while (*src != 0 && *src != '\n') {
4445
src++;
4546
}
4647
}
4748

48-
4949
//解析标识符
50-
else if ((token >= 'a' && token <= 'z') || (token >= 'A' && token <= 'Z') || (token == '_')) {
50+
else if (is_valid_starting_character(token)) {
5151

52-
// parse identifier
5352
last_pos = (char*)src - 1;
5453
hash = token;
5554

5655
char block_keyword[32];
57-
while ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9') || (*src == '_')) {
56+
while (is_valid_identifier_character(*src)) {
5857
hash = hash * 147 + *src;
5958
src++;
6059
}
6160

62-
63-
// look for existing identifier, linear search
6461
// 搜索符号表
6562
// 这里默认设置的IdSize即标识符的长度是10,如果两个符号的前面10个是
6663
// 相同的,那么就区分不出来了,可以根据实际情况下重新设置其大小
6764
current_id = symbols;
65+
int id_len = src - last_pos;
6866
while (current_id[Token]) {
69-
if (current_id[Hash] == hash && !memcmp((char *)current_id[Name], last_pos, src - last_pos)) {
70-
//found one, return
71-
//printf("find token %d\n", current_id[Token]);
67+
if (current_id[Hash] == hash &&
68+
!memcmp((char *)current_id[Name], last_pos, id_len)) {
7269
token = current_id[Token];
70+
7371
return;
7472
}
7573
//查找下一个条目
7674
current_id = current_id + IdSize;
7775
}
7876

79-
// store new ID
77+
//如果没有找到就在新的symbols表项中创建一个ID条目
8078
current_id[Name] = (int)last_pos;
8179
current_id[Hash] = hash;
8280
token = current_id[Token] = Id;
81+
8382
return;
8483
}
8584

86-
//TODO 增加浮点字面量,也就是意味着要
8785
//如果是字面量的话就计算其数值
8886
else if (token >= '0' && token <= '9') {
89-
// parse number, three kinds: dec(123) hex(0x123) oct(017)
87+
//保存浮点数字面量,之后用转换函数进行转换
88+
char float_string[32];
89+
const char* string_begin = src - 1;
90+
9091
token_val = token - '0';
9192
if (token_val > 0) {
92-
// dec, starts with [1-9]
93+
float_string[0] = token;
94+
int idx = 1;
95+
// 十进制
9396
while (*src >= '0' && *src <= '9') {
9497
token_val = token_val*10 + *src++ - '0';
9598
}
99+
100+
//检测是否可能是浮点
101+
if (*src == '.'){
102+
memcpy(&float_string[1], string_begin, src - string_begin);
103+
idx = idx + src - string_begin;
104+
float_string[idx] = '.';
105+
process_fraction(float_string, idx + 1);
106+
token_val = (int)strtod(float_string, NULL);
107+
}
108+
96109
} else {
97-
// starts with number 0
110+
// '0'开头的数,八进制或者十六进制或者是小数
98111
if (*src == 'x' || *src == 'X') {
99-
//hex
112+
// 十六进制
100113
token = *++src;
101-
while ((token >= '0' && token <= '9') || (token >= 'a' && token <= 'f') || (token >= 'A' && token <= 'F')) {
102-
token_val = token_val * 16 + (token & 15) + (token >= 'A' ? 9 : 0);
114+
while ((token >= '0' && token <= '9') ||
115+
(token >= 'a' && token <= 'f') ||
116+
(token >= 'A' && token <= 'F')) {
117+
token_val = token_val*16 + (token&15) + (token >= 'A' ? 9 : 0);
103118
token = *++src;
104119
}
105-
} else {
106-
// oct
120+
//TODO 增加浮点运算
121+
}else if(*src == '.'){
122+
// 小数0.xxxx
123+
float_string[0] = '0';
124+
float_string[1] = '.';
125+
126+
process_fraction(float_string, 2);
127+
128+
token_val = (int)strtod(float_string, NULL);
129+
}else{
130+
// 八进制
107131
while (*src >= '0' && *src <= '7') {
108132
token_val = token_val*8 + *src++ - '0';
109133
}
@@ -114,15 +138,13 @@ void next() {
114138
return;
115139
}
116140

117-
118141
else if (token == '/') {
119142
if (*src == '/') {
120-
// skip comments
143+
//跳过注释
121144
while (*src != 0 && *src != '\n') {
122145
++src;
123146
}
124-
} else {
125-
// divide operator
147+
} else {
126148
token = Div;
127149
return;
128150
}
@@ -158,7 +180,7 @@ void next() {
158180
return;
159181
}
160182
else if (token == '=') {
161-
// parse '==' and '='
183+
// 解析 '==' '='
162184
if (*src == '=') {
163185
src ++;
164186
token = Eq;
@@ -168,7 +190,7 @@ void next() {
168190
return;
169191
}
170192
else if (token == '+') {
171-
// parse '+' and '++'
193+
// 解析 '+' '++'
172194
if (*src == '+') {
173195
src ++;
174196
token = Inc;
@@ -178,7 +200,7 @@ void next() {
178200
return;
179201
}
180202
else if (token == '-') {
181-
// parse '-' and '--'
203+
// 解析 '-' '--'
182204
if (*src == '-') {
183205
src ++;
184206
token = Dec;
@@ -188,15 +210,15 @@ void next() {
188210
return;
189211
}
190212
else if (token == '!') {
191-
// parse '!='
213+
// 解析'!='
192214
if (*src == '=') {
193215
src++;
194216
token = Ne;
195217
}
196218
return;
197219
}
198220
else if (token == '<') {
199-
// parse '<=', '<<' or '<'
221+
// 解析 '<=', '<<' or '<'
200222
if (*src == '=') {
201223
src ++;
202224
token = Le;
@@ -209,7 +231,7 @@ void next() {
209231
return;
210232
}
211233
else if (token == '>') {
212-
// parse '>=', '>>' or '>'
234+
//解析'>=''>>' 或者 '>'
213235
if (*src == '=') {
214236
src ++;
215237
token = Ge;
@@ -222,7 +244,7 @@ void next() {
222244
return;
223245
}
224246
else if (token == '|') {
225-
// parse '|' or '||'
247+
//解析'|''||'
226248
if (*src == '|') {
227249
src ++;
228250
token = Lor;
@@ -232,7 +254,7 @@ void next() {
232254
return;
233255
}
234256
else if (token == '&') {
235-
// parse '&' and '&&'
257+
//解析'&''&&'
236258
if (*src == '&') {
237259
src ++;
238260
token = Lan;
@@ -261,7 +283,15 @@ void next() {
261283
token = Cond;
262284
return;
263285
}
264-
else if (token == '~' || token == ';' || token == '{' || token == '}' || token == '(' || token == ')' || token == ']' || token == ',' || token == ':') {
286+
else if (token == '~' ||
287+
token == ';' ||
288+
token == '{' ||
289+
token == '}' ||
290+
token == '(' ||
291+
token == ')' ||
292+
token == ']' ||
293+
token == ',' ||
294+
token == ':') {
265295
// directly return the character as token;
266296
return;
267297
}
@@ -277,3 +307,59 @@ void match(int tk) {
277307
exit(-1);
278308
}
279309
}
310+
311+
static Boolean is_valid_starting_character(char ch)
312+
{
313+
314+
if ( (ch >= 'a' && ch <= 'z') ||
315+
(ch >= 'A' && ch <= 'Z') ||
316+
(ch == '_')){
317+
return True;
318+
}
319+
320+
return False;
321+
}
322+
323+
324+
static Boolean is_valid_identifier_character(char ch)
325+
{
326+
327+
if (is_valid_starting_character(ch) || is_digit(ch)){
328+
return True;
329+
}
330+
331+
return False;
332+
}
333+
334+
static Boolean is_digit(char ch)
335+
{
336+
return (ch >= '0' && ch <= '9') ? True : False;
337+
}
338+
339+
340+
static void process_fraction(char* float_string, int start_idx)
341+
{
342+
int idx = start_idx;
343+
344+
token = *++src;
345+
while ((token >= '0' && token <= '9')){
346+
float_string[idx] = token;
347+
idx++;
348+
token = *++src;
349+
}
350+
351+
//判断是否是非法的浮点数字面量
352+
printf("trailing charater of float literal '%c'\n", token);
353+
if (! (token == ',' || token == ';' || token == ' ')){
354+
printf("bad float literal\n");
355+
exit(-1);
356+
}
357+
358+
float_string[idx] = '\0';
359+
printf("float val:%lf\n", strtod(float_string, NULL));
360+
361+
362+
}
363+
364+
365+

Diff for: lex.h

+11
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,20 @@
22
#define LEX_H
33

44
#include "symbol.h"
5+
#include "types.h"
56

67
extern void prepare_for_tokenize(const char* src_code, int* symbol_table);
8+
79
extern void next();
10+
811
extern void match(int tk);
912

13+
static Boolean is_valid_starting_character(char ch);
14+
15+
static Boolean is_valid_identifier_character(char ch);
16+
17+
static Boolean is_digit(char ch);
18+
19+
static void process_fraction(char* float_string, int start_idx);
20+
1021
#endif

Diff for: types.h

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#ifndef TYPES_H
2+
#define TYPES_H
3+
4+
typedef enum{
5+
False = 0,
6+
True = 1
7+
} Boolean;
8+
9+
#endif

0 commit comments

Comments
 (0)