-
Notifications
You must be signed in to change notification settings - Fork 24
Fix typedef/variable name conflict parsing error #219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 8 commits
3494d12
98004a6
f9a7b12
d8822cc
160543c
d977be5
363114d
5960536
aa40855
66f8d59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,6 +78,10 @@ let smooth_expression lst = | |
|
|
||
| let currentFunctionName = ref "<outside any function>" | ||
|
|
||
| (* Set to true while parsing a typedef declaration's declarator list, so that | ||
| declarator_no_init knows not to call add_identifier for typedef names. *) | ||
| let is_typedef_decl = ref false | ||
|
|
||
| let announceFunctionName ((n, decl, _, _):name) = | ||
| !Lexerhack.add_identifier n; | ||
| (* Start a context that includes the parameter names and the whole body. | ||
|
|
@@ -115,6 +119,7 @@ let applyPointer (ptspecs: attribute list list) (dt: decl_type) | |
| loop ptspecs | ||
|
|
||
| let doDeclaration (loc: cabsloc) (specs: spec_elem list) (nl: init_name list) : definition = | ||
| is_typedef_decl := false; | ||
| if isTypedef specs then begin | ||
| (* Tell the lexer about the new type names *) | ||
| List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_type n) nl; | ||
|
|
@@ -123,8 +128,8 @@ let doDeclaration (loc: cabsloc) (specs: spec_elem list) (nl: init_name list) : | |
| if nl = [] then | ||
| ONLYTYPEDEF (specs, loc) | ||
| else begin | ||
| (* Tell the lexer about the new variable names *) | ||
| List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_identifier n) nl; | ||
| (* add_identifier is called in declarator_no_init / declarator_init_start as each | ||
| declarator is parsed (C11 6.2.1.7), so nothing to do here. *) | ||
| DECDEF ((specs, nl), loc) | ||
| end | ||
|
|
||
|
|
@@ -388,7 +393,7 @@ let transformOffsetOf (speclist, dtype) member = | |
|
|
||
| %type <Cabs.init_name> init_declarator | ||
| %type <Cabs.init_name list> init_declarator_list | ||
| %type <Cabs.name> declarator | ||
| %type <Cabs.name> declarator declarator_no_init declarator_init_start | ||
| %type <Cabs.name * expression option> field_decl | ||
| %type <(Cabs.name * expression option) list> field_decl_list | ||
| %type <string * Cabs.decl_type> direct_decl | ||
|
|
@@ -1036,14 +1041,34 @@ init_declarator_attr: | |
|
|
||
| ; | ||
| init_declarator: /* ISO 6.7 */ | ||
| declarator { ($1, NO_INIT) } | ||
| | declarator EQ init_expression location | ||
| { let (n, d, a, l) = $1 in ((n, d, a, joinLoc l $4), $3) } | ||
| declarator_no_init { ($1, NO_INIT) } | ||
| | declarator_init_start init_expression location | ||
| { let (n, d, a, l) = $1 in ((n, d, a, joinLoc l $3), $2) } | ||
| ; | ||
|
|
||
| /* (* Parses "declarator" (without initializer) and immediately adds the declared name | ||
| as a variable identifier in the lexer hack, for non-typedef declarations only, | ||
| so that subsequent declarators in the same declaration see the name as an | ||
| identifier, not as a type (C11 6.2.1.7: scope begins just after the completion | ||
| of its declarator). For typedef declarations (is_typedef_decl = true), the name | ||
| is registered later via add_type in doDeclaration. *) */ | ||
| declarator_no_init: | ||
| declarator { let (n, _, _, _) = $1 in | ||
| if not !is_typedef_decl then !Lexerhack.add_identifier n; | ||
| $1 } | ||
| ; | ||
|
|
||
| /* (* Parses "declarator =" and adds the declared name as a variable identifier | ||
| in the lexer hack, so that in the initializer the name shadows any typedef | ||
| with the same name (C11 6.2.1.7: scope begins just after the completion of the | ||
| declarator). *) */ | ||
| declarator_init_start: | ||
| declarator EQ { let (n, _, _, _) = $1 in !Lexerhack.add_identifier n; $1 } | ||
|
michael-schwarz marked this conversation as resolved.
Comment on lines
+1053
to
+1065
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not do the lexer hack in
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moving the registration into
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An interesting question here is: where exactly in the C standard is it stated that
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apparently, the grammar doesn't explicitly ban it, but one can get some kind of semantic ban out of
C11 draft as typedefs do not define objects.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still, there's no need for two new grammar rules. The Having Instead, I would expect
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what exactly you suggest here. If you're happy with the rest of it feel free to push directly to this branch. I'll have a look at it and then we can merge. |
||
| ; | ||
|
|
||
| decl_spec_list_common: /* ISO 6.7 */ | ||
| /* ISO 6.7.1 */ | ||
| | TYPEDEF decl_spec_list_opt { SpecTypedef :: $2, $1 } | ||
| | TYPEDEF decl_spec_list_opt { is_typedef_decl := true; SpecTypedef :: $2, $1 } | ||
| | EXTERN decl_spec_list_opt { SpecStorage EXTERN :: $2, $1 } | ||
| | STATIC decl_spec_list_opt { SpecStorage STATIC :: $2, $1 } | ||
| | AUTO decl_spec_list_opt { SpecStorage AUTO :: $2, $1 } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| /* Test for typedef and variable name conflict (issue #114). | ||
| A variable with the same name as a typedef should shadow it in its initializer, | ||
| per C11 6.2.1.7: "Any other identifier has scope that begins just after the | ||
| completion of its declarator." */ | ||
| #include "testharness.h" | ||
| #include <stdint.h> | ||
| #include <stdlib.h> | ||
|
|
||
| typedef struct raxNode { | ||
| uint32_t iskey:1; | ||
| uint32_t isnull:1; | ||
| uint32_t iscompr:1; | ||
| uint32_t size:29; | ||
| unsigned char data[]; | ||
| } raxNode; | ||
|
|
||
| typedef struct rax { | ||
| raxNode *head; | ||
| uint64_t numele; | ||
| uint64_t numnodes; | ||
| } rax; | ||
|
|
||
| typedef int mytype; | ||
|
|
||
| int main() { | ||
| rax *rax = malloc(sizeof(*rax)); /* variable rax shadows typedef rax in initializer */ | ||
| if (rax == 0) E(1); | ||
| free(rax); | ||
|
|
||
| /* NO_INIT variable in the middle of a declaration list shadows a typedef. | ||
| After "mytype a = 1, mytype", the name "mytype" is an identifier (variable). | ||
| So "b = sizeof mytype" (without parens) is only valid when mytype is a variable | ||
| (types require parens: "sizeof(type)"). This fails to parse without the fix. */ | ||
| { | ||
| mytype a = 1, mytype, b = sizeof mytype; | ||
| mytype = 5; | ||
| if (a != 1) E(2); | ||
| if (mytype != 5) E(3); | ||
| if (b != sizeof(int)) E(4); | ||
| } | ||
|
|
||
| SUCCESS; | ||
| } | ||
|
michael-schwarz marked this conversation as resolved.
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see this test testing anything different from the other one. It's just the same situation three times.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 66f8d59. The unique test ( |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| /* Test for typedef/variable name conflict in a declaration list with multiple | ||
| initialized declarators. | ||
|
|
||
| Per C11 6.2.1.7, a declared identifier's scope begins just after the completion | ||
| of its declarator. For an initialized declarator "T = expr", scope begins just | ||
| after the declarator (before the "="), so the initializer of a LATER declarator | ||
| in the same list can refer to T as a variable. | ||
|
|
||
| Concretely, after "T T = 1", the name T is registered as a variable (identifier) | ||
| by the lexer hack, so that the next initializer "T + 1" correctly sees T as a | ||
| variable. | ||
|
|
||
| This test fails to parse (without the fix) because the initializer T + 1 would | ||
| see T as a type name (NAMED_TYPE), making "T + 1" a syntax error. */ | ||
| #include "testharness.h" | ||
|
|
||
| typedef int T; | ||
| typedef int mytype; | ||
|
|
||
| int test_init_chain(void) { | ||
| /* "T T = 1": declarator_init_start registers T as an identifier before "= 1". | ||
| "U = T + 1": T in the initializer must be the variable T (value 1), not the | ||
| typedef. If it's still a type, "T + 1" is a syntax error. */ | ||
| { | ||
| T T = 1, U = T + 1; | ||
| if (T != 1) E(1); | ||
| if (U != 2) E(2); | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| int test_typedef_varname_init(void) { | ||
| /* Original issue: rax-style conflict where a typedef and variable share a name. | ||
| The variable's initializer can use the variable name as a regular identifier. */ | ||
| { | ||
| mytype mytype = 42; | ||
| if (mytype != 42) E(3); | ||
| (void)mytype; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| int main(void) { | ||
| if (test_init_chain()) return 1; | ||
| if (test_typedef_varname_init()) return 1; | ||
| SUCCESS; | ||
| } | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't looked at this in detail but introducing even more global state into the parser is a bit worrying.
Is
typedefscoping actually defined to work differently like this by the standard? Or maybe it would also be the right thing to do theadd_typestuff earlier than at the very end?Perhaps one
typedefcould also define multiple types with commas (I haven't looked at the standard grammar) and then it would also matter there (and even be more correct)?I might be optimistically hoping for more consistency and less special-casing than C actually has...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot Look up the details of this in the standard and either implement Simmo's suggestion or argue why it is not a good idea.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemented in aa40855.
C11 6.2.1.7: "Any other identifier has scope that begins just after the completion of its declarator." This applies uniformly to all declarations — typedef names included. So moving
add_typeearlier (per-declarator) is both correct and more standard-conformant.What changed:
declarator_no_initnow handles both cases with a single branch:add_type nwhenis_typedef_decl = true(typedef declarators)add_identifier nwhenis_typedef_decl = false(variable declarators)doDeclarationnow has zero lexer management — it only builds AST nodes (TYPEDEF / ONLYTYPEDEF / DECDEF).On the
is_typedef_declglobal flag: it's still necessary. Yacc reductions are bottom-up — by the timedeclarator_no_initreduces, theTYPEDEFkeyword has already been consumed (indecl_spec_list_common). There's no way to pass this context through grammar parameters in standard yacc without a mid-rule side effect, which is whatis_typedef_declprovides. The flag is set by theTYPEDEFrule and cleared bydoDeclaration, giving it a well-defined lifetime that spans exactly the declarator list of a typedef declaration.On typedef commas: per the C grammar,
typedef int T, *TP;uses the sameinit_declarator_list→init_declaratorpath as variables. With this change,TandTPeach getadd_typecalled immediately after their declarator — so a hypotheticaltypedef int T, T_alias;would seeTas already registered whenT_aliasis parsed. This is C11-correct behaviour.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By the way, nonsense like this is also allowed: