Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions src/frontc/cparser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ let smooth_expression lst =

let currentFunctionName = ref "<outside any function>"

(* Set to true while parsing a typedef declaration's declarator list, so that
declarator_no_init knows not to call add_identifier for typedef names. *)
let is_typedef_decl = ref false

let announceFunctionName ((n, decl, _, _):name) =
!Lexerhack.add_identifier n;
(* Start a context that includes the parameter names and the whole body.
Expand Down Expand Up @@ -115,6 +119,7 @@ let applyPointer (ptspecs: attribute list list) (dt: decl_type)
loop ptspecs

let doDeclaration (loc: cabsloc) (specs: spec_elem list) (nl: init_name list) : definition =
is_typedef_decl := false;
if isTypedef specs then begin
(* Tell the lexer about the new type names *)
List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_type n) nl;
Expand All @@ -123,8 +128,8 @@ let doDeclaration (loc: cabsloc) (specs: spec_elem list) (nl: init_name list) :
if nl = [] then
ONLYTYPEDEF (specs, loc)
else begin
(* Tell the lexer about the new variable names *)
List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_identifier n) nl;
(* add_identifier is called in declarator_no_init / declarator_init_start as each
declarator is parsed (C11 6.2.1.7), so nothing to do here. *)
DECDEF ((specs, nl), loc)
end

Expand Down Expand Up @@ -388,7 +393,7 @@ let transformOffsetOf (speclist, dtype) member =

%type <Cabs.init_name> init_declarator
%type <Cabs.init_name list> init_declarator_list
%type <Cabs.name> declarator
%type <Cabs.name> declarator declarator_no_init declarator_init_start
%type <Cabs.name * expression option> field_decl
%type <(Cabs.name * expression option) list> field_decl_list
%type <string * Cabs.decl_type> direct_decl
Expand Down Expand Up @@ -1036,14 +1041,34 @@ init_declarator_attr:

;
init_declarator: /* ISO 6.7 */
declarator { ($1, NO_INIT) }
| declarator EQ init_expression location
{ let (n, d, a, l) = $1 in ((n, d, a, joinLoc l $4), $3) }
declarator_no_init { ($1, NO_INIT) }
| declarator_init_start init_expression location
{ let (n, d, a, l) = $1 in ((n, d, a, joinLoc l $3), $2) }
;

/* (* Parses "declarator" (without initializer) and immediately adds the declared name
as a variable identifier in the lexer hack, for non-typedef declarations only,
so that subsequent declarators in the same declaration see the name as an
identifier, not as a type (C11 6.2.1.7: scope begins just after the completion
of its declarator). For typedef declarations (is_typedef_decl = true), the name
is registered later via add_type in doDeclaration. *) */
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't looked at this in detail but introducing even more global state into the parser is a bit worrying.

Is typedef scoping actually defined to work differently like this by the standard? Or maybe it would also be the right thing to do the add_type stuff earlier than at the very end?
Perhaps one typedef could also define multiple types with commas (I haven't looked at the standard grammar) and then it would also matter there (and even be more correct)?

I might be optimistically hoping for more consistency and less special-casing than C actually has...

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot Look up the details of this in the standard and either implement Simmo's suggestion or argue why it is not a good idea.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented in aa40855.

C11 6.2.1.7: "Any other identifier has scope that begins just after the completion of its declarator." This applies uniformly to all declarations — typedef names included. So moving add_type earlier (per-declarator) is both correct and more standard-conformant.

What changed: declarator_no_init now handles both cases with a single branch:

  • add_type n when is_typedef_decl = true (typedef declarators)
  • add_identifier n when is_typedef_decl = false (variable declarators)

doDeclaration now has zero lexer management — it only builds AST nodes (TYPEDEF / ONLYTYPEDEF / DECDEF).

On the is_typedef_decl global flag: it's still necessary. Yacc reductions are bottom-up — by the time declarator_no_init reduces, the TYPEDEF keyword has already been consumed (in decl_spec_list_common). There's no way to pass this context through grammar parameters in standard yacc without a mid-rule side effect, which is what is_typedef_decl provides. The flag is set by the TYPEDEF rule and cleared by doDeclaration, giving it a well-defined lifetime that spans exactly the declarator list of a typedef declaration.

On typedef commas: per the C grammar, typedef int T, *TP; uses the same init_declarator_listinit_declarator path as variables. With this change, T and TP each get add_type called immediately after their declarator — so a hypothetical typedef int T, T_alias; would see T as already registered when T_alias is parsed. This is C11-correct behaviour.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way, nonsense like this is also allowed:

typedef int foo, bar[sizeof(foo)];

int main() {
    _Static_assert(sizeof(foo) == 4);
    _Static_assert(sizeof(bar) == 4*4);
    return 0;
}

declarator_no_init:
declarator { let (n, _, _, _) = $1 in
if not !is_typedef_decl then !Lexerhack.add_identifier n;
$1 }
;

/* (* Parses "declarator =" and adds the declared name as a variable identifier
in the lexer hack, so that in the initializer the name shadows any typedef
with the same name (C11 6.2.1.7: scope begins just after the completion of the
declarator). *) */
declarator_init_start:
declarator EQ { let (n, _, _, _) = $1 in !Lexerhack.add_identifier n; $1 }
Comment thread
michael-schwarz marked this conversation as resolved.
Comment on lines +1053 to +1065
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not do the lexer hack in declarator, instead of duplicating it at both new rules?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving the registration into declarator itself would fire in every context where declarator is used — including field_decl (struct members), parameter_decl (function parameters), function_def_start (function definitions), and old_pardef (K&R-style parameters). All of those must not trigger lexer registration. The two helper rules are the minimal way to isolate the registration to the init_declarator context, while also handling the semantic difference: declarator_no_init must conditionally call add_type or add_identifier (based on is_typedef_decl), whereas declarator_init_start always calls add_identifier (typedef declarations cannot have initializers).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An interesting question here is: where exactly in the C standard is it stated that typedefs cannot have initializers?
Of course, I don't know what it would mean, but it'd also be strange if it isn't forbidden.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently, the grammar doesn't explicitly ban it, but one can get some kind of semantic ban out of

6.7.9 (8) An initializer specifies the initial value stored in an object.

C11 draft

as typedefs do not define objects.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still, there's no need for two new grammar rules. The declarator_no_init would suffice and could be reused also in the EQ case. It just needs a suitable name then.

Having declarator_init_start hardcode add_identifier is odd because the grammar doesn't forbid typedef initializers. As is, the parser accepts such a thing, but adds it to the lexer hack with the wrong kind which will screw up all the following parsing.
It seems more reasonable to me to just use the same logic for both and not try to be clever about it if the grammar was never intended to be so precise.

Instead, I would expect Cabs2cil to reject a typedef with initializer. I haven't checked if it does but surely it cannot use it for anything meaningful.
Either way, then the following lexer hack state wouldn't be affected by the strange but valid grammatical construct.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what exactly you suggest here. If you're happy with the rest of it feel free to push directly to this branch. I'll have a look at it and then we can merge.

;

decl_spec_list_common: /* ISO 6.7 */
/* ISO 6.7.1 */
| TYPEDEF decl_spec_list_opt { SpecTypedef :: $2, $1 }
| TYPEDEF decl_spec_list_opt { is_typedef_decl := true; SpecTypedef :: $2, $1 }
| EXTERN decl_spec_list_opt { SpecStorage EXTERN :: $2, $1 }
| STATIC decl_spec_list_opt { SpecStorage STATIC :: $2, $1 }
| AUTO decl_spec_list_opt { SpecStorage AUTO :: $2, $1 }
Expand Down
43 changes: 43 additions & 0 deletions test/small1/typedef_varname.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/* Test for typedef and variable name conflict (issue #114).
A variable with the same name as a typedef should shadow it in its initializer,
per C11 6.2.1.7: "Any other identifier has scope that begins just after the
completion of its declarator." */
#include "testharness.h"
#include <stdint.h>
#include <stdlib.h>

typedef struct raxNode {
uint32_t iskey:1;
uint32_t isnull:1;
uint32_t iscompr:1;
uint32_t size:29;
unsigned char data[];
} raxNode;

typedef struct rax {
raxNode *head;
uint64_t numele;
uint64_t numnodes;
} rax;

typedef int mytype;

int main() {
rax *rax = malloc(sizeof(*rax)); /* variable rax shadows typedef rax in initializer */
if (rax == 0) E(1);
free(rax);

/* NO_INIT variable in the middle of a declaration list shadows a typedef.
After "mytype a = 1, mytype", the name "mytype" is an identifier (variable).
So "b = sizeof mytype" (without parens) is only valid when mytype is a variable
(types require parens: "sizeof(type)"). This fails to parse without the fix. */
{
mytype a = 1, mytype, b = sizeof mytype;
mytype = 5;
if (a != 1) E(2);
if (mytype != 5) E(3);
if (b != sizeof(int)) E(4);
}

SUCCESS;
}
Comment thread
michael-schwarz marked this conversation as resolved.
48 changes: 48 additions & 0 deletions test/small1/typedef_varname_noinit.c
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see this test testing anything different from the other one. It's just the same situation three times.
Also the name noinit makes no sense: all of the variables are being initialized. It's the other test that also includes a variable without the initializer.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 66f8d59. The unique test (T T = 1, U = T + 1) has been merged into typedef_varname.c and typedef_varname_noinit.c has been deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/* Test for typedef/variable name conflict in a declaration list with multiple
initialized declarators.

Per C11 6.2.1.7, a declared identifier's scope begins just after the completion
of its declarator. For an initialized declarator "T = expr", scope begins just
after the declarator (before the "="), so the initializer of a LATER declarator
in the same list can refer to T as a variable.

Concretely, after "T T = 1", the name T is registered as a variable (identifier)
by the lexer hack, so that the next initializer "T + 1" correctly sees T as a
variable.

This test fails to parse (without the fix) because the initializer T + 1 would
see T as a type name (NAMED_TYPE), making "T + 1" a syntax error. */
#include "testharness.h"

typedef int T;
typedef int mytype;

int test_init_chain(void) {
/* "T T = 1": declarator_init_start registers T as an identifier before "= 1".
"U = T + 1": T in the initializer must be the variable T (value 1), not the
typedef. If it's still a type, "T + 1" is a syntax error. */
{
T T = 1, U = T + 1;
if (T != 1) E(1);
if (U != 2) E(2);
}
return 0;
}

int test_typedef_varname_init(void) {
/* Original issue: rax-style conflict where a typedef and variable share a name.
The variable's initializer can use the variable name as a regular identifier. */
{
mytype mytype = 42;
if (mytype != 42) E(3);
(void)mytype;
}
return 0;
}

int main(void) {
if (test_init_chain()) return 1;
if (test_typedef_varname_init()) return 1;
SUCCESS;
}

2 changes: 2 additions & 0 deletions test/testcil.pl
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,8 @@ sub addToGroup {
addTest("testrun/for1 ");
addTest("testrun/void _GNUCC=1");
addTest("test/voidtypedef ");
addTest("testrun/typedef_varname ");
addTest("testrun/typedef_varname_noinit ");
addTest("testrun/wrongnumargs ");
addBadComment("testrun/wrongnumargs",
"Notbug. Should fail since we don't pad argument lists");
Expand Down
Loading