Skip to content

Commit 50e7a74

Browse files
authored
Merge pull request #3592 from masatake/perl--revise-heredoc
Perl: skip string literals when collecting heredoc markers
2 parents 2fc641e + 6f5b873 commit 50e7a74

File tree

7 files changed

+225
-27
lines changed

7 files changed

+225
-27
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
--sort=no
2+
--kinds-Perl=+{heredoc}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
f0tag input.pl /^sub f0tag() {}$/;" s
2+
f1tag input.pl /^sub f1tag() {}$/;" s
3+
f2tag input.pl /^sub f2tag() {}$/;" s
4+
f3tag input.pl /^sub f3tag() {}$/;" s
5+
hereodc0tag input.pl /^print 'cat <<<heredoct0notag' . <<hereodc0tag;$/;" h
6+
f4tag input.pl /^sub f4tag() {}$/;" s
7+
hereodc1tag input.pl /^print "cat <<<heredoct1notag" . <<hereodc1tag;$/;" h
8+
f5tag input.pl /^sub f5tag() {}$/;" s
9+
hereodc2tag input.pl /^print `cat <<<heredoct1notag` . <<hereodc2tag;$/;" h
10+
f6tag input.pl /^sub f6tag() {}$/;" s
11+
heredoc3tag input.pl /^print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';$/;" h
12+
heredoc4tag input.pl /^print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';$/;" h
13+
f7tag input.pl /^sub f7tag() {}$/;" s
14+
f8tag input.pl /^sub f8tag() {}$/;" s
15+
f9tag input.pl /^sub f9tag() {}$/;" s
16+
five_sub input-0.pl /^sub five_sub() {$/;" s
17+
five_mark0 input-0.pl /^print 3 + 2 . <<five_mark0;$/;" h
18+
five_mark1 input-0.pl /^print 3 + 2 . <<~five_mark1;$/;" h
19+
five_mark2 input-0.pl /^print 3 + 2 . << "five_mark2";$/;" h
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
print 3 + 2 <<5;
2+
5
3+
;
4+
5+
sub five_sub() {
6+
return 5
7+
}
8+
print 3 + 2 << five_sub;
9+
five
10+
;
11+
12+
print 3 + 2 . <<five_mark0;
13+
a
14+
five_mark0
15+
16+
print 3 + 2 . <<~five_mark1;
17+
a
18+
five_mark1
19+
20+
print 3 + 2 << ~five_sub;
21+
five_sub;
22+
23+
print 3 + 2 . << "five_mark2";
24+
ox
25+
five_mark2
26+
27+
28+
29+
30+
31+
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Derrived from #3588 submitted by @petdance
2+
3+
sub f0tag() {}
4+
5+
my $x = '<<NOT_A_HEREDOC0';
6+
7+
sub f1tag() {}
8+
9+
print "<<NOT_A_HEREDOC0\n";
10+
11+
sub f2tag() {}
12+
13+
print `cat <<<BASH_HERE_STRING`;
14+
15+
sub f3tag() {}
16+
17+
print 'cat <<<heredoct0notag' . <<hereodc0tag;
18+
sub f0notag() {}
19+
hereodc0tag
20+
21+
sub f4tag() {}
22+
23+
print "cat <<<heredoct1notag" . <<hereodc1tag;
24+
sub f1notag() {}
25+
hereodc1tag
26+
27+
sub f5tag() {}
28+
29+
print `cat <<<heredoct1notag` . <<hereodc2tag;
30+
sub f2notag() {}
31+
hereodc2tag
32+
33+
sub f6tag() {}
34+
35+
print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';
36+
sub f3notag() {}
37+
heredoc3tag
38+
sub f4notag() {}
39+
heredoc4tag
40+
sub f7tag() {}
41+
42+
sub f8tag() {}
43+
44+
my $i = 1;
45+
print "a" . 3 << $i;
46+
47+
sub f9tag() {}

Units/parser-perl.r/perl-module.d/expected.tags

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ sort input.pl /^use sort qw(stable _quicksort _mergesort);$/;" M line:10 rol
1010
integer input.pl /^no integer;$/;" M line:12 roles:unused extras:reference
1111
strict input.pl /^no strict 'refs';$/;" M line:13 roles:unused extras:reference
1212
warnings input.pl /^no warnings;$/;" M line:14 roles:unused extras:reference
13+
5.006_001 input.pl /^use 5.006_001;$/;" M line:16 roles:used extras:reference
14+
5.006_001 input.pl /^no 5.006_001;$/;" M line:17 roles:unused extras:reference

Units/parser-perl.r/perl-module.d/input.pl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@
1212
no integer;
1313
no strict 'refs';
1414
no warnings;
15+
16+
use 5.006_001;
17+
no 5.006_001;

parsers/perl.c

Lines changed: 121 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,10 @@
2222
#include "routines.h"
2323
#include "selectors.h"
2424
#include "subparser.h"
25+
#include "trace.h"
2526
#include "vstring.h"
2627
#include "xtag.h"
2728

28-
#define TRACE_PERL_C 0
29-
#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf
30-
3129
/*
3230
* DATA DEFINITIONS
3331
*/
@@ -443,41 +441,123 @@ static unsigned char *readHereDocMarker (unsigned char *line,
443441
return cp;
444442
}
445443

446-
static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
447-
const unsigned char *line)
444+
enum stringType {
445+
STRING_TYPE_NONE = '\0',
446+
STRING_TYPE_SINGLEQ = '\'',
447+
STRING_TYPE_DOUBLEQ = '"',
448+
STRING_TYPE_BACKQ = '`',
449+
};
450+
451+
452+
static const unsigned char *escapeFromString (const unsigned char *line,
453+
const unsigned char *end,
454+
enum stringType stype)
455+
{
456+
bool in_escape = false;
457+
const unsigned char *cp = line;
458+
459+
switch (stype)
460+
{
461+
case STRING_TYPE_NONE:
462+
return line;
463+
case STRING_TYPE_SINGLEQ:
464+
case STRING_TYPE_DOUBLEQ:
465+
case STRING_TYPE_BACKQ:
466+
while ((end && cp < end) || (end == NULL && *cp != '\0'))
467+
{
468+
if (in_escape)
469+
{
470+
cp++;
471+
in_escape = false;
472+
}
473+
else if (*cp == '\\')
474+
{
475+
cp++;
476+
in_escape = true;
477+
}
478+
else if (*cp == (unsigned char)stype)
479+
{
480+
cp++;
481+
return cp;
482+
}
483+
else
484+
cp++;
485+
}
486+
return NULL;
487+
default:
488+
AssertNotReached ();
489+
return NULL;
490+
}
491+
}
492+
493+
static enum stringType isInString (const unsigned char *line,
494+
const unsigned char *end)
495+
{
496+
const unsigned char *cp = line;
497+
enum stringType t = STRING_TYPE_NONE;
498+
499+
while (cp && cp < end)
500+
{
501+
switch (*cp)
502+
{
503+
case '\'':
504+
case '\"':
505+
case '`':
506+
t = *cp;
507+
break;
508+
default:
509+
t = STRING_TYPE_NONE;
510+
break;
511+
}
512+
513+
cp++;
514+
if (t != STRING_TYPE_NONE)
515+
cp = escapeFromString (cp, end, t);
516+
}
517+
518+
return (cp == NULL)? t: STRING_TYPE_NONE;
519+
}
520+
521+
522+
static const unsigned char *collectHereDocMarker (struct hereDocMarkerManager *mgr,
523+
const unsigned char *line)
448524
{
449525
unsigned char *starter = (unsigned char*)strstr((char *)line, "<<");
450526
unsigned char *cp = NULL;
451527
bool indented = false;
452528
unsigned char quote_char = 0;
529+
bool space_seen = false;
453530

454531
if (starter == NULL)
455-
return;
532+
return NULL;
533+
534+
enum stringType stype;
535+
if ((stype = isInString(line, starter)) != STRING_TYPE_NONE)
536+
return escapeFromString (starter + 2, NULL, stype);
456537

457538
cp = starter + 2;
458539
while (isspace (*cp))
540+
{
541+
/* To avoid confusing with a shift operator, we track
542+
* spaces after the starter (<<). */
543+
space_seen = true;
459544
cp++;
545+
}
460546

461547
if (*cp == '\0')
462-
return;
463-
464-
/* Is shift operator? */
465-
if (isdigit (*cp))
466-
{
467-
/* Scan the rest of the string. */
468-
collectHereDocMarkers (mgr, ++cp);
469-
return;
470-
}
548+
return NULL;
471549

472550
if (*cp == '~') {
551+
if (space_seen)
552+
return cp + 1;
473553
indented = true;
474554
cp++;
475555
if (*cp == '\0')
476-
return;
556+
return NULL;
477557
while (isspace (*cp))
478558
cp++;
479559
if (*cp == '\0')
480-
return;
560+
return NULL;
481561
}
482562

483563
switch (*cp)
@@ -490,9 +570,13 @@ static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
490570
case '\\':
491571
cp++;
492572
if (*cp == '\0')
493-
return;
573+
return NULL;
494574
break;
495575
default:
576+
if (!isIdentifier1(*cp))
577+
return cp;
578+
if (space_seen)
579+
return cp;
496580
break;
497581
}
498582

@@ -509,7 +593,17 @@ static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
509593
hereDocMarkerDelete (marker);
510594

511595
if (*cp != '\0' && cp != last_cp)
512-
collectHereDocMarkers (mgr, cp);
596+
return cp;
597+
return NULL;
598+
}
599+
600+
static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
601+
const unsigned char *line)
602+
{
603+
const unsigned char *cp = line;
604+
const unsigned char *last = cp;
605+
while ((cp = collectHereDocMarker(mgr, cp)) != NULL)
606+
Assert(last < cp);
513607
}
514608

515609
static bool isInHereDoc (struct hereDocMarkerManager *mgr,
@@ -661,11 +755,9 @@ static void findPerlTags (void)
661755
while (isspace (*cp))
662756
cp++;
663757

664-
collectHereDocMarkers (&hdoc_mgr, cp);
665-
666758
if (strncmp((const char*) cp, "sub", (size_t) 3) == 0)
667759
{
668-
TRACE("this looks like a sub\n");
760+
TRACE_PRINT("this looks like a sub");
669761
cp += 3;
670762
kind = KIND_PERL_SUBROUTINE;
671763
spaceRequired = true;
@@ -690,7 +782,7 @@ static void findPerlTags (void)
690782
}
691783

692784
vString *module = NULL;
693-
while (isalnum(*cp) || *cp == ':' || *cp == '.') {
785+
while (isalnum(*cp) || *cp == ':' || *cp == '.' || *cp == '_') {
694786
if (!module)
695787
module = vStringNew();
696788
vStringPut(module, *cp);
@@ -750,7 +842,7 @@ static void findPerlTags (void)
750842
while (isspace (*cp))
751843
cp++;
752844
vString *module = NULL;
753-
while (isalnum(*cp) || *cp == ':' || *cp == '.') {
845+
while (isalnum(*cp) || *cp == ':' || *cp == '.' || *cp == '_') {
754846
if (!module)
755847
module = vStringNew();
756848
vStringPut(module, *cp);
@@ -811,14 +903,16 @@ static void findPerlTags (void)
811903
if ((int) *p == ':' && (int) *(p + 1) != ':')
812904
kind = KIND_PERL_LABEL;
813905
}
906+
if (kind != KIND_PERL_LABEL)
907+
collectHereDocMarkers (&hdoc_mgr, cp);
814908
}
815909
if (kind != KIND_PERL_NONE)
816910
{
817-
TRACE("cp0: %s\n", (const char *) cp);
911+
TRACE_PRINT("cp0: %s", (const char *) cp);
818912
if (spaceRequired && *cp && !isspace (*cp))
819913
continue;
820914

821-
TRACE("cp1: %s\n", (const char *) cp);
915+
TRACE_PRINT("cp1: %s", (const char *) cp);
822916
while (isspace (*cp))
823917
cp++;
824918

@@ -846,7 +940,7 @@ static void findPerlTags (void)
846940
vStringCatS (name, "STDOUT");
847941
}
848942

849-
TRACE("name: %s\n", vStringValue (name));
943+
TRACE_PRINT("name: %s", vStringValue (name));
850944

851945
if (0 == vStringLength(name)) {
852946
vStringClear(name);

0 commit comments

Comments
 (0)