forked from DoctorWkt/acwj
-
Notifications
You must be signed in to change notification settings - Fork 0
/
expr.c
570 lines (484 loc) · 16.8 KB
/
expr.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
#include "defs.h"
#include "data.h"
#include "decl.h"
// Parsing of expressions
// Copyright (c) 2019 Warren Toomey, GPL3
// expression_list: <null>
// | expression
// | expression ',' expression_list
// ;
// Parse a list of zero or more comma-separated expressions and
// return an AST composed of A_GLUE nodes with the left-hand child
// being the sub-tree of previous expressions (or NULL) and the right-hand
// child being the next expression. Each A_GLUE node will have size field
// set to the number of expressions in the tree at this point. If no
// expressions are parsed, NULL is returned
struct ASTnode *expression_list(int endtoken) {
struct ASTnode *tree = NULL;
struct ASTnode *child = NULL;
int exprcount = 0;
// Loop until the end token
while (Token.token != endtoken) {
// Parse the next expression and increment the expression count
child = binexpr(0);
exprcount ;
// Build an A_GLUE AST node with the previous tree as the left child
// and the new expression as the right child. Store the expression count.
tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount);
// Stop when we reach the end token
if (Token.token == endtoken)
break;
// Must have a ',' at this point
match(T_COMMA, ",");
}
// Return the tree of expressions
return (tree);
}
// Parse a function call and return its AST
static struct ASTnode *funccall(void) {
struct ASTnode *tree;
struct symtable *funcptr;
// Check that the identifier has been defined as a function,
// then make a leaf node for it.
if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) {
fatals("Undeclared function", Text);
}
// Get the '('
lparen();
// Parse the argument expression list
tree = expression_list(T_RPAREN);
// XXX Check type of each argument against the function's prototype
// Build the function call AST node. Store the
// function's return type as this node's type.
// Also record the function's symbol-id
tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0);
// Get the ')'
rparen();
return (tree);
}
// Parse the index into an array and return an AST tree for it
static struct ASTnode *array_access(void) {
struct ASTnode *left, *right;
struct symtable *aryptr;
// Check that the identifier has been defined as an array
// then make a leaf node for it that points at the base
if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) {
fatals("Undeclared array", Text);
}
left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0);
// Get the '['
scan(&Token);
// Parse the following expression
right = binexpr(0);
// Get the ']'
match(T_RBRACKET, "]");
// Ensure that this is of int type
if (!inttype(right->type))
fatal("Array index is not of integer type");
// Scale the index by the size of the element's type
right = modify_type(right, left->type, A_ADD);
// Return an AST tree where the array's base has the offset
// added to it, and dereference the element. Still an lvalue
// at this point.
left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0);
left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0);
return (left);
}
// Parse the member reference of a struct or union
// and return an AST tree for it. If withpointer is true,
// the access is through a pointer to the member.
static struct ASTnode *member_access(int withpointer) {
struct ASTnode *left, *right;
struct symtable *compvar;
struct symtable *typeptr;
struct symtable *m;
// Check that the identifier has been declared as a
// struct/union or a struct/union pointer
if ((compvar = findsymbol(Text)) == NULL)
fatals("Undeclared variable", Text);
if (withpointer && compvar->type != pointer_to(P_STRUCT)
&& compvar->type != pointer_to(P_UNION))
fatals("Undeclared variable", Text);
if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION)
fatals("Undeclared variable", Text);
// If a pointer to a struct/union, get the pointer's value.
// Otherwise, make a leaf node that points at the base
// Either way, it's an rvalue
if (withpointer) {
left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0);
} else
left = mkastleaf(A_ADDR, compvar->type, compvar, 0);
left->rvalue = 1;
// Get the details of the composite type
typeptr = compvar->ctype;
// Skip the '.' or '->' token and get the member's name
scan(&Token);
ident();
// Find the matching member's name in the type
// Die if we can't find it
for (m = typeptr->member; m != NULL; m = m->next)
if (!strcmp(m->name, Text))
break;
if (m == NULL)
fatals("No member found in struct/union: ", Text);
// Build an A_INTLIT node with the offset
right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn);
// Add the member's offset to the base of the struct/union
// and dereference it. Still an lvalue at this point
left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0);
left = mkastunary(A_DEREF, m->type, left, NULL, 0);
return (left);
}
// Parse a postfix expression and return
// an AST node representing it. The
// identifier is already in Text.
static struct ASTnode *postfix(void) {
struct ASTnode *n;
struct symtable *varptr;
struct symtable *enumptr;
// If the identifier matches an enum value,
// return an A_INTLIT node
if ((enumptr = findenumval(Text)) != NULL) {
scan(&Token);
return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn));
}
// Scan in the next token to see if we have a postfix expression
scan(&Token);
// Function call
if (Token.token == T_LPAREN)
return (funccall());
// An array reference
if (Token.token == T_LBRACKET)
return (array_access());
// Access into a struct or union
if (Token.token == T_DOT)
return (member_access(0));
if (Token.token == T_ARROW)
return (member_access(1));
// A variable. Check that the variable exists.
if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE)
fatals("Unknown variable", Text);
switch (Token.token) {
// Post-increment: skip over the token
case T_INC:
scan(&Token);
n = mkastleaf(A_POSTINC, varptr->type, varptr, 0);
break;
// Post-decrement: skip over the token
case T_DEC:
scan(&Token);
n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0);
break;
// Just a variable reference
default:
n = mkastleaf(A_IDENT, varptr->type, varptr, 0);
}
return (n);
}
// Parse a primary factor and return an
// AST node representing it.
static struct ASTnode *primary(void) {
struct ASTnode *n;
int id;
int type = 0;
int size, class;
struct symtable *ctype;
switch (Token.token) {
case T_STATIC:
case T_EXTERN:
fatal("Compiler doesn't support static or extern local declarations");
case T_SIZEOF:
// Skip the T_SIZEOF and ensure we have a left parenthesis
scan(&Token);
if (Token.token != T_LPAREN)
fatal("Left parenthesis expected after sizeof");
scan(&Token);
// Get the type inside the parentheses
type = parse_stars(parse_type(&ctype, &class));
// Get the type's size
size = typesize(type, ctype);
rparen();
// Return a leaf node int literal with the size
return (mkastleaf(A_INTLIT, P_INT, NULL, size));
case T_INTLIT:
// For an INTLIT token, make a leaf AST node for it.
// Make it a P_CHAR if it's within the P_CHAR range
if (Token.intvalue >= 0 && Token.intvalue < 256)
n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue);
else
n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue);
break;
case T_STRLIT:
// For a STRLIT token, generate the assembly for it.
// Then make a leaf AST node for it. id is the string's label.
id = genglobstr(Text);
n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id);
break;
case T_IDENT:
return (postfix());
case T_LPAREN:
// Beginning of a parenthesised expression, skip the '('.
scan(&Token);
// If the token after is a type identifier, this is a cast expression
switch (Token.token) {
case T_IDENT:
// We have to see if the identifier matches a typedef.
// If not, treat it as an expression.
if (findtypedef(Text) == NULL) {
n = binexpr(0);
break;
}
case T_VOID:
case T_CHAR:
case T_INT:
case T_LONG:
case T_STRUCT:
case T_UNION:
case T_ENUM:
// Get the type inside the parentheses
type = parse_cast();
// Skip the closing ')' and then parse the following expression
rparen();
default:
n = binexpr(0); // Scan in the expression
}
// We now have at least an expression in n, and possibly a non-zero type in type
// if there was a cast. Skip the closing ')' if there was no cast.
if (type == 0)
rparen();
else
// Otherwise, make a unary AST node for the cast
n = mkastunary(A_CAST, type, n, NULL, 0);
return (n);
default:
fatals("Expecting a primary expression, got token", Token.tokstr);
}
// Scan in the next token and return the leaf node
scan(&Token);
return (n);
}
// Convert a binary operator token into a binary AST operation.
// We rely on a 1:1 mapping from token to AST operation
static int binastop(int tokentype) {
if (tokentype > T_EOF && tokentype <= T_SLASH)
return (tokentype);
fatald("Syntax error, token", tokentype);
return (0); // Keep -Wall happy
}
// Return true if a token is right-associative,
// false otherwise.
static int rightassoc(int tokentype) {
if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH)
return (1);
return (0);
}
// Operator precedence for each token. Must
// match up with the order of tokens in defs.h
static int OpPrec[] = {
0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS,
10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH,
15, // T_QUESTION,
20, 30, // T_LOGOR, T_LOGAND
40, 50, 60, // T_OR, T_XOR, T_AMPER
70, 70, // T_EQ, T_NE
80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE
90, 90, // T_LSHIFT, T_RSHIFT
100, 100, // T_PLUS, T_MINUS
110, 110 // T_STAR, T_SLASH
};
// Check that we have a binary operator and
// return its precedence.
static int op_precedence(int tokentype) {
int prec;
if (tokentype > T_SLASH)
fatald("Token with no precedence in op_precedence:", tokentype);
prec = OpPrec[tokentype];
if (prec == 0)
fatald("Syntax error, token", tokentype);
return (prec);
}
// prefix_expression: primary
// | '*' prefix_expression
// | '&' prefix_expression
// | '-' prefix_expression
// | ' ' prefix_expression
// | '--' prefix_expression
// ;
// Parse a prefix expression and return
// a sub-tree representing it.
struct ASTnode *prefix(void) {
struct ASTnode *tree;
switch (Token.token) {
case T_AMPER:
// Get the next token and parse it
// recursively as a prefix expression
scan(&Token);
tree = prefix();
// Ensure that it's an identifier
if (tree->op != A_IDENT)
fatal("& operator must be followed by an identifier");
// Now change the operator to A_ADDR and the type to
// a pointer to the original type
tree->op = A_ADDR;
tree->type = pointer_to(tree->type);
break;
case T_STAR:
// Get the next token and parse it
// recursively as a prefix expression
scan(&Token);
tree = prefix();
// For now, ensure it's either another deref or an
// identifier
if (tree->op != A_IDENT && tree->op != A_DEREF)
fatal("* operator must be followed by an identifier or *");
// Prepend an A_DEREF operation to the tree
tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0);
break;
case T_MINUS:
// Get the next token and parse it
// recursively as a prefix expression
scan(&Token);
tree = prefix();
// Prepend a A_NEGATE operation to the tree and
// make the child an rvalue. Because chars are unsigned,
// also widen this to int so that it's signed
tree->rvalue = 1;
tree = modify_type(tree, P_INT, 0);
tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0);
break;
case T_INVERT:
// Get the next token and parse it
// recursively as a prefix expression
scan(&Token);
tree = prefix();
// Prepend a A_INVERT operation to the tree and
// make the child an rvalue.
tree->rvalue = 1;
tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0);
break;
case T_LOGNOT:
// Get the next token and parse it
// recursively as a prefix expression
scan(&Token);
tree = prefix();
// Prepend a A_LOGNOT operation to the tree and
// make the child an rvalue.
tree->rvalue = 1;
tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0);
break;
case T_INC:
// Get the next token and parse it
// recursively as a prefix expression
scan(&Token);
tree = prefix();
// For now, ensure it's an identifier
if (tree->op != A_IDENT)
fatal(" operator must be followed by an identifier");
// Prepend an A_PREINC operation to the tree
tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0);
break;
case T_DEC:
// Get the next token and parse it
// recursively as a prefix expression
scan(&Token);
tree = prefix();
// For now, ensure it's an identifier
if (tree->op != A_IDENT)
fatal("-- operator must be followed by an identifier");
// Prepend an A_PREDEC operation to the tree
tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0);
break;
default:
tree = primary();
}
return (tree);
}
// Return an AST tree whose root is a binary operator.
// Parameter ptp is the previous token's precedence.
struct ASTnode *binexpr(int ptp) {
struct ASTnode *left, *right;
struct ASTnode *ltemp, *rtemp;
int ASTop;
int tokentype;
// Get the tree on the left.
// Fetch the next token at the same time.
left = prefix();
// If we hit one of several terminating tokens, return just the left node
tokentype = Token.token;
if (tokentype == T_SEMI || tokentype == T_RPAREN ||
tokentype == T_RBRACKET || tokentype == T_COMMA ||
tokentype == T_COLON || tokentype == T_RBRACE) {
left->rvalue = 1;
return (left);
}
// While the precedence of this token is more than that of the
// previous token precedence, or it's right associative and
// equal to the previous token's precedence
while ((op_precedence(tokentype) > ptp) ||
(rightassoc(tokentype) && op_precedence(tokentype) == ptp)) {
// Fetch in the next integer literal
scan(&Token);
// Recursively call binexpr() with the
// precedence of our token to build a sub-tree
right = binexpr(OpPrec[tokentype]);
// Determine the operation to be performed on the sub-trees
ASTop = binastop(tokentype);
switch (ASTop) {
case A_TERNARY:
// Ensure we have a ':' token, scan in the expression after it
match(T_COLON, ":");
ltemp= binexpr(0);
// Build and return the AST for this statement. Use the middle
// expression's type as the return type. XXX We should also
// consider the third expression's type.
return (mkastnode(A_TERNARY, right->type, left, right, ltemp, NULL, 0));
case A_ASSIGN:
// Assignment
// Make the right tree into an rvalue
right->rvalue = 1;
// Ensure the right's type matches the left
right = modify_type(right, left->type, 0);
if (right == NULL)
fatal("Incompatible expression in assignment");
// Make an assignment AST tree. However, switch
// left and right around, so that the right expression's
// code will be generated before the left expression
ltemp = left;
left = right;
right = ltemp;
break;
default:
// We are not doing a ternary or assignment, so both trees should
// be rvalues. Convert both trees into rvalue if they are lvalue trees
left->rvalue = 1;
right->rvalue = 1;
// Ensure the two types are compatible by trying
// to modify each tree to match the other's type.
ltemp = modify_type(left, right->type, ASTop);
rtemp = modify_type(right, left->type, ASTop);
if (ltemp == NULL && rtemp == NULL)
fatal("Incompatible types in binary expression");
if (ltemp != NULL)
left = ltemp;
if (rtemp != NULL)
right = rtemp;
}
// Join that sub-tree with ours. Convert the token
// into an AST operation at the same time.
left =
mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0);
// Update the details of the current token.
// If we hit a terminating token, return just the left node
tokentype = Token.token;
if (tokentype == T_SEMI || tokentype == T_RPAREN ||
tokentype == T_RBRACKET || tokentype == T_COMMA ||
tokentype == T_COLON || tokentype == T_RBRACE) {
left->rvalue = 1;
return (left);
}
}
// Return the tree we have when the precedence
// is the same or lower
left->rvalue = 1;
return (left);
}