-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathcheck-lang-encoding.sh
More file actions
executable file
·140 lines (118 loc) · 4.07 KB
/
check-lang-encoding.sh
File metadata and controls
executable file
·140 lines (118 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/bin/bash
# Script to verify language file encoding integrity
# This script can be run locally or as part of CI to ensure language files
# maintain proper character encoding.
LANG_DIR="src/main/resources/lang"
ERRORS=0
echo "Checking language file encoding integrity..."
echo "==========================================="
# Check if lang directory exists
if [ ! -d "$LANG_DIR" ]; then
echo "ERROR: Language directory $LANG_DIR not found!"
exit 1
fi
# Function to check file encoding
check_encoding() {
local file=$1
local expected_encoding=$2
local filename=$(basename "$file")
echo -n "Checking $filename... "
# Try to read file with expected encoding
if iconv -f "$expected_encoding" -t UTF-8 "$file" > /dev/null 2>&1; then
echo "✓ OK ($expected_encoding)"
else
echo "✗ FAILED (not valid $expected_encoding)"
ERRORS=$((ERRORS + 1))
fi
}
# Function to check for specific characters
check_characters() {
local file=$1
local encoding=$2
local chars=$3
local filename=$(basename "$file")
echo -n " Checking for special characters in $filename... "
# Convert file to UTF-8 for checking
local content=$(iconv -f "$encoding" -t UTF-8 "$file")
# Check if file contains expected characters
local found=false
for char in $chars; do
if echo "$content" | grep -q "$char"; then
found=true
break
fi
done
if [ "$found" = true ]; then
echo "✓ OK"
else
echo "⚠ WARNING (no special characters found - file might be ASCII only)"
fi
}
# Function to check for corruption patterns
check_corruption() {
local file=$1
local encoding=$2
local filename=$(basename "$file")
echo -n " Checking for corruption in $filename... "
# Convert to UTF-8 for checking
local content=$(iconv -f "$encoding" -t UTF-8 "$file" 2>/dev/null || echo "")
if [ -z "$content" ]; then
echo "✗ FAILED (cannot read file)"
ERRORS=$((ERRORS + 1))
return
fi
# Check for common HTML entity corruption patterns (but not valid HTML)
if echo "$content" | grep -E "<[EF][0-9A-F]{1,2}>" >/dev/null 2>&1; then
echo "✗ FAILED (HTML entity corruption detected)"
ERRORS=$((ERRORS + 1))
else
echo "✓ OK"
fi
}
# Check Portuguese file (mixed encoding - mostly UTF-8 with some Latin-1)
echo ""
echo "Portuguese (pt_BR):"
# Portuguese file historically has mixed encoding, so we just check it can be read
if [ -f "$LANG_DIR/lang_pt_BR.properties" ]; then
echo "Checking lang_pt_BR.properties... ✓ OK (exists)"
# Just verify file is readable and not completely corrupted
if head -100 "$LANG_DIR/lang_pt_BR.properties" | iconv -f UTF-8 -t UTF-8 > /dev/null 2>&1; then
echo " First 100 lines are valid UTF-8... ✓ OK"
else
echo " WARNING: File has mixed encoding (this is expected for this file)"
fi
else
echo "✗ FAILED (file not found)"
ERRORS=$((ERRORS + 1))
fi
# Check German file (ISO-8859-1)
echo ""
echo "German (de_DE):"
check_encoding "$LANG_DIR/lang_de_DE.properties" "ISO-8859-1"
check_characters "$LANG_DIR/lang_de_DE.properties" "ISO-8859-1" "ü ö ä ß"
check_corruption "$LANG_DIR/lang_de_DE.properties" "ISO-8859-1"
# Check French file (UTF-8)
echo ""
echo "French (fr_FR):"
check_encoding "$LANG_DIR/lang_fr_FR.properties" "UTF-8"
check_characters "$LANG_DIR/lang_fr_FR.properties" "UTF-8" "é è à ç"
check_corruption "$LANG_DIR/lang_fr_FR.properties" "UTF-8"
# Check English files (UTF-8 or ASCII)
echo ""
echo "English (en_US):"
check_encoding "$LANG_DIR/lang_en_US.properties" "UTF-8"
check_corruption "$LANG_DIR/lang_en_US.properties" "UTF-8"
echo ""
echo "English (en_GB):"
check_encoding "$LANG_DIR/lang_en_GB.properties" "UTF-8"
check_corruption "$LANG_DIR/lang_en_GB.properties" "UTF-8"
# Summary
echo ""
echo "==========================================="
if [ $ERRORS -eq 0 ]; then
echo "✓ All encoding checks passed!"
exit 0
else
echo "✗ $ERRORS encoding check(s) failed!"
exit 1
fi