Skip to content

Commit fe58757

Browse files
Make sure we get a matching number of #if and #endifs before copying function to output
glibc has some weird cornercases in `sysdeps/generic/ldsodefs.h` where a struct definition depends of a macro definition (SHARED), but its fields attributes does not. Here we devise an heuristic to try to detect such cases by looking if the number of #ifs and #endifs matches in the function. If not, then fallback to AST dumping. Closes #147 Signed-off-by: Giuliano Belinassi <gbelinassi@suse.de>
1 parent cd62a5c commit fe58757

File tree

5 files changed

+108
-4
lines changed

5 files changed

+108
-4
lines changed

libcextract/LLVMMisc.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,55 @@ bool Is_Decl_Equivalent_To(Decl *a, Decl *b)
315315

316316
return a_str == b_str;
317317
}
318+
319+
#define TOKEN_VECTOR " ().,;+-*/^|&{}[]<>^&|!\r\n\t"
320+
321+
/** Check if string has unmatched #if, #ifdef, #ifndef. */
322+
bool Has_Balanced_Ifdef(const StringRef &string)
323+
{
324+
/* Create a temporary buffer for strtok and copy the string. */
325+
size_t len = string.size();
326+
char buf[len + 1];
327+
memcpy(buf, string.data(), len);
328+
buf[len] = '\0';
329+
330+
/* Count the number of parenthesis problem. ifdef, ifndef, if increases,
331+
endif decreases. */
332+
int balance = 0;
333+
334+
/* Tokenize. */
335+
bool in_comments = false;
336+
char *tok = strtok(buf, TOKEN_VECTOR);
337+
while (tok != nullptr) {
338+
/* There is the silly case in which #ifdef is written as `# ifdef`. */
339+
if (*tok == '#') {
340+
/* Now check if it got into the same token or if we need to pull another
341+
token. */
342+
if (tok[1] == '\0') {
343+
tok = strtok(nullptr, TOKEN_VECTOR);
344+
} else {
345+
tok++;
346+
}
347+
348+
assert(tok != nullptr && "tok is null! why?");
349+
350+
/* Case 1: #ifdef, #ifndef, and #if has "if" as prefix. */
351+
if (prefix("if", tok)) {
352+
balance++;
353+
}
354+
355+
/* Case 2: #endif. Decrease the balance counter. */
356+
else if (strcmp("endif", tok) == 0) {
357+
balance--;
358+
if (balance < 0) {
359+
/* Impossible. This means there is an #endif for no matching #if. */
360+
return false;
361+
}
362+
}
363+
}
364+
365+
tok = strtok(nullptr, TOKEN_VECTOR);
366+
}
367+
368+
return balance == 0;
369+
}

libcextract/LLVMMisc.hh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,6 @@ DeclContextLookupResult Get_Decl_From_Symtab(ASTUnit *ast, const StringRef &name
109109

110110
/** Check if two Decls are equivalent. */
111111
bool Is_Decl_Equivalent_To(Decl *a, Decl *b);
112+
113+
/** Check if string has unmatched #if, #ifdef, #ifndef. */
114+
bool Has_Balanced_Ifdef(const StringRef &string);

libcextract/PrettyPrint.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,29 @@ void PrettyPrint::Print_Decl_Raw(Decl *decl)
191191
}
192192
}
193193
} else {
194-
Out << decl_source;
194+
/* Check if the output string has a balanced number of ifdefs. This is
195+
required because of the following construct from glibc:
196+
197+
#ifdef !SHARED
198+
#else
199+
struct rtld_global_ro {
200+
#endif
201+
202+
In case we want to print rtld_global_ro, PrettyPrint will get the text
203+
from struct rtld_global_ro and below, which would only include the
204+
#endif, and not the #ifdef. In case its not balanced we fallback to
205+
AST dumping. */
206+
if (NamedDecl *d = dyn_cast<NamedDecl>(decl)) {
207+
if (d->getName() == "rtld_global_ro") {
208+
llvm::outs() << "debug me \n";
209+
}
210+
}
211+
if (Has_Balanced_Ifdef(decl_source)) {
212+
Out << decl_source;
213+
} else {
214+
/* In case its not balanced, fallback to AST dump. */
215+
decl->print(Out, LangOpts);
216+
}
195217
}
196218
} else {
197219
/* Else, we fallback to AST Dumping. */

testsuite/small/ifdef-1.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
/* { dg-options "-DCE_EXTRACT_FUNCTIONS=function -DCE_NO_EXTERNALIZATION" }*/
2-
/* { dg-xfail } Tests fails because we currently can't track that the endif
3-
comes from #ifdef AAA. */
4-
52

63
//#define AA
74

testsuite/small/ifdef-2.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/* { dg-options "-DCE_EXTRACT_FUNCTIONS=f -DCE_NO_EXTERNALIZATION" }*/
2+
3+
#define SHARED
4+
5+
#define IS_IN(x) 0
6+
7+
#ifndef SHARED
8+
# define GLRO(name) _##name
9+
#else
10+
# if IS_IN (rtld)
11+
# define GLRO(name) _rtld_local_ro._##name
12+
# else
13+
# define GLRO(name) _rtld_global_ro._##name
14+
# endif
15+
struct rtld_global_ro
16+
{
17+
#endif
18+
int field1;
19+
#ifdef SHARED
20+
int field2;
21+
};
22+
#endif
23+
24+
25+
int f(struct rtld_global_ro *r)
26+
{
27+
return r->field1;
28+
}
29+
30+
/* { dg-final { scan-tree-dump "struct rtld_global_ro ?\n?{" } } */

0 commit comments

Comments
 (0)