Skip to content

Commit 4d1e23c

Browse files
committed
add spacecheck
1 parent d52c3e1 commit 4d1e23c

File tree

2 files changed

+197
-0
lines changed

2 files changed

+197
-0
lines changed

.github/workflows/checksrc.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ jobs:
4747
# shellcheck disable=SC2046
4848
shellcheck $(grep -l -E '^#!(/usr/bin/env bash|/bin/sh|/bin/bash)' $(git ls-files))
4949
50+
- name: 'spacecheck'
51+
run: scripts/spacecheck.pl
52+
5053
- name: 'codespell'
5154
run: |
5255
source ~/venv/bin/activate

scripts/spacecheck.pl

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
#!/usr/bin/env perl
2+
#***************************************************************************
3+
# _ _ ____ _
4+
# Project ___| | | | _ \| |
5+
# / __| | | | |_) | |
6+
# | (__| |_| | _ <| |___
7+
# \___|\___/|_| \_\_____|
8+
#
9+
# Copyright (C) Viktor Szakats
10+
#
11+
# This software is licensed as described in the file COPYING, which
12+
# you should have received as part of this distribution. The terms
13+
# are also available at https://curl.se/docs/copyright.html.
14+
#
15+
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
16+
# copies of the Software, and permit persons to whom the Software is
17+
# furnished to do so, under the terms of the COPYING file.
18+
#
19+
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20+
# KIND, either express or implied.
21+
#
22+
# SPDX-License-Identifier: curl
23+
#
24+
###########################################################################
25+
26+
use strict;
27+
use warnings;
28+
29+
my @tabs = (
30+
"Makefile",
31+
"\\.sln\$",
32+
"^testfiles/test.+\.txt",
33+
);
34+
35+
my @mixed_eol = (
36+
"^testfiles/test.+\.txt",
37+
);
38+
39+
my @need_crlf = (
40+
"\\.sln\$",
41+
);
42+
43+
my @space_at_eol = (
44+
"^testfiles/test.+\.txt",
45+
);
46+
47+
my @eol_at_eof = (
48+
"^testfiles/test.+\.txt",
49+
);
50+
51+
my @non_ascii_allowed = (
52+
'\xC3\xA4', # UTF-8 for https://codepoints.net/U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
53+
'\xC3\xA5', # UTF-8 for https://codepoints.net/U+00E5 LATIN SMALL LETTER A WITH RING ABOVE
54+
'\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
55+
);
56+
57+
my $non_ascii_allowed = join(', ', @non_ascii_allowed);
58+
59+
my @non_ascii = (
60+
"THANKS",
61+
);
62+
63+
sub fn_match {
64+
my ($filename, @masklist) = @_;
65+
66+
foreach my $mask (@masklist) {
67+
if($filename =~ $mask) {
68+
return 1;
69+
}
70+
}
71+
return 0;
72+
}
73+
74+
sub eol_detect {
75+
my ($content) = @_;
76+
77+
my $cr = () = $content =~ /\r/g;
78+
my $lf = () = $content =~ /\n/g;
79+
80+
if($cr > 0 && $lf == 0) {
81+
return "cr";
82+
}
83+
elsif($cr == 0 && $lf > 0) {
84+
return "lf";
85+
}
86+
elsif($cr == 0 && $lf == 0) {
87+
return "bin";
88+
}
89+
elsif($cr == $lf) {
90+
return "crlf";
91+
}
92+
93+
return "";
94+
}
95+
96+
my $issues = 0;
97+
98+
open my $git_ls_files, '-|', 'git ls-files' or die "Failed running git ls-files: $!";
99+
while(my $filename = <$git_ls_files>) {
100+
chomp $filename;
101+
102+
open my $fh, '<', $filename or die "Cannot open '$filename': $!";
103+
my $content = do { local $/; <$fh> };
104+
close $fh;
105+
106+
my @err = ();
107+
108+
if(!fn_match($filename, @tabs) &&
109+
$content =~ /\t/) {
110+
push @err, "content: has tab";
111+
}
112+
113+
my $eol = eol_detect($content);
114+
115+
if($eol eq "" &&
116+
!fn_match($filename, @mixed_eol)) {
117+
push @err, "content: has mixed EOL types";
118+
}
119+
120+
if($eol ne "crlf" &&
121+
fn_match($filename, @need_crlf)) {
122+
push @err, "content: must use CRLF EOL for this file type";
123+
}
124+
125+
if($eol ne "lf" && $content ne "" &&
126+
!fn_match($filename, @need_crlf) &&
127+
!fn_match($filename, @mixed_eol)) {
128+
push @err, "content: must use LF EOL for this file type";
129+
}
130+
131+
if(!fn_match($filename, @space_at_eol) &&
132+
$content =~ /[ \t]\n/) {
133+
my $line;
134+
for my $l (split(/\n/, $content)) {
135+
$line++;
136+
if($l =~ /[ \t]$/) {
137+
push @err, "line $line: trailing whitespace";
138+
}
139+
}
140+
}
141+
142+
if($content ne "" &&
143+
$content !~ /\n\z/ &&
144+
!fn_match($filename, @eol_at_eof)) {
145+
push @err, "content: has no EOL at EOF";
146+
}
147+
148+
if($content =~ /\n\n\z/ ||
149+
$content =~ /\r\n\r\n\z/) {
150+
push @err, "content: has multiple EOL at EOF";
151+
}
152+
153+
if($content =~ /\n\n\n\n/ ||
154+
$content =~ /\r\n\r\n\r\n\r\n/) {
155+
push @err, "content: has 3 or more consecutive empty lines";
156+
}
157+
158+
if($content =~ /([\x00-\x08\x0b\x0c\x0e-\x1f\x7f])/) {
159+
push @err, "content: has binary contents";
160+
}
161+
162+
if($filename !~ /tests\/data/) {
163+
# the tests have no allowed UTF bytes
164+
$content =~ s/[$non_ascii_allowed]//g;
165+
}
166+
167+
if(!fn_match($filename, @non_ascii) &&
168+
($content =~ /([\x80-\xff]+)/)) {
169+
my $non = $1;
170+
my $hex;
171+
for my $e (split(//, $non)) {
172+
$hex .= sprintf("%s%02x", $hex ? " ": "", ord($e));
173+
}
174+
my $line;
175+
for my $l (split(/\n/, $content)) {
176+
$line++;
177+
if($l =~ /([\x80-\xff]+)/) {
178+
push @err, "line $line: has non-ASCII: '$non' ($hex)";
179+
}
180+
}
181+
}
182+
183+
if(@err) {
184+
$issues++;
185+
foreach my $err (@err) {
186+
print "$filename: $err\n";
187+
}
188+
}
189+
}
190+
close $git_ls_files;
191+
192+
if($issues) {
193+
exit 1;
194+
}

0 commit comments

Comments
 (0)