-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnuke_emojis.sh
More file actions
190 lines (169 loc) · 7.46 KB
/
Copy pathnuke_emojis.sh
File metadata and controls
190 lines (169 loc) · 7.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/bin/bash
# ==============================================================================
# nuke_emojis.sh
# ==============================================================================
#
# This script interactively helps you remove emoji characters and normalize
# whitespace in your codebase. It's designed to be Git-aware, processing
# only tracked or untracked (but not ignored) text files.
#
# Key Features:
# - **Emoji Removal:** Strips `\p{Extended_Pictographic}` emojis, Zero Width
# Joiners (\x{200d}), and Variation Selectors (\x{fe0f}).
# - **Whitespace Normalization:** Collapses multiple horizontal spaces, trims
# leading/trailing whitespace from lines, and collapses multiple blank lines.
# - **Interactive Mode:** Prompts for confirmation for each file with a diff
# preview, allowing you to accept, skip, or apply to all remaining.
# - **Dry Run Mode:** Shows changes without modifying any files.
# - **Automatic Application:** Can apply changes to all files without prompting.
# - **Git Integration:** Respects `.gitignore` and can automatically `git add`
# modified files.
# - **Safety:** Creates `.bak` backups of original files and cleans up its
# temporary files automatically.
#
# Usage:
# ./nuke_emojis.sh # Interactive mode
# ./nuke_emojis.sh --dry-run / -d # Show changes, don't modify files
# ./nuke_emojis.sh --yes / -y # Apply all changes automatically
#
# Ensure you have `git`, `grep`, `perl`, and `mktemp` installed.
# It's always recommended to back up your repository before running:
# `git commit -am "Pre-emoji cleanup backup"`
#
# ==============================================================================
# Exit immediately if a command exits with a non-zero status.
set -e
# ANSI colors (self-contained)
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m'
strip_emoji_interactive() {
# --- Temporary File Cleanup Trap ---
# Array to store paths of temporary files created by mktemp
local tmp_files=()
# Define cleanup function to be executed on script exit
cleanup() {
echo -e "${BLUE}Cleaning up temporary files...${NC}"
for f in "${tmp_files[@]}"; do
if [[ -f "$f" ]]; then
rm -f "$f"
echo -e "${BLUE} Removed: $f${NC}"
fi
done
}
# Register the cleanup function to run on EXIT (normal or abnormal termination)
trap cleanup EXIT
# --- Parse Command Line Options ---
local auto_apply=false # Apply changes automatically (non-interactive)
local dry_run=false # Show diffs but do not modify files
local apply_all_remaining=false # Batch apply mode for subsequent files
# Loop through arguments to parse flags
while [[ $# -gt 0 ]]; do
case "$1" in
-y|--yes)
auto_apply=true
;;
-d|--dry-run)
dry_run=true
;;
*)
echo -e "${RED}Error: Unknown option '$1'${NC}"
echo "Usage: strip_emoji_interactive [--yes|-y] [--dry-run|-d]"
return 2 # Indicate an error
;;
esac
shift # Move to next argument
done
echo -e "${YELLOW}Starting emoji stripper.${NC}"
echo "Files will be checked based on Git status (tracked or untracked/non-ignored text files)."
if [[ "$dry_run" == "true" ]]; then
echo -e "${YELLOW}!!! DRY RUN MODE: No files will be modified. !!!${NC}"
else
echo "A .bak file will be created for each modified file."
fi
echo "----------------------------------------------------"
local files_processed=0
local files_cleaned=0
local exit_status=0 # Default to success
# Use git ls-files for comprehensive file discovery (tracked and untracked but not ignored).
# -z: null-separated output for safety with filenames containing spaces/newlines.
# -c: tracked files, -o: other (untracked) files, --exclude-standard: respects .gitignore.
git ls-files -z -c -o --exclude-standard |
while IFS= read -r -d '' file; do
# Speed optimization: Combine binary check and emoji regex check into one grep -IqP
# This performs one pass over the file instead of two.
if grep -IqP '\p{Extended_Pictographic}|\x{200d}|\x{fe0f}' "$file"; then
files_processed=$((files_processed + 1))
# Handle spaces and weird filenames more safely by using printf
printf "${BLUE}File: %s${NC}\n" "$file"
# Create a temporary "would-be" cleaned version
local tmp_clean="$(mktemp)"
# Push mktemp result into tmp_files array for cleanup
tmp_files+=("$tmp_clean")
# Apply Perl transformations to create the cleaned version in the temp file
perl -CSDA -pe \
's/\p{Extended_Pictographic}|\x{200d}|\x{fe0f}//g; # Strip emojis
s/\h{2,}/ /g; # Collapse multiple horizontal whitespace to single space
s/^\s+|\s+$//g; # Trim leading/trailing whitespace
s/\n{3,}/\n\n/g;' "$file" > "$tmp_clean"
# Show diff preview (side-by-side, colorized)
git --no-pager diff --no-index --color=always "$file" "$tmp_clean"
echo
local reply=""
if [[ "$dry_run" == "true" ]]; then
echo -e "${YELLOW}DRY RUN: Changes shown above. File not modified.${NC}"
reply="n" # In dry-run, always treat as "no" to actual modification
elif [[ "$auto_apply" == "true" || "$apply_all_remaining" == "true" ]]; then
reply="y" # Auto-apply if --yes or --all-remaining is set
else
read -rp "$(echo -e ${YELLOW}Apply these changes to this file? [y/N/a (all remaining)]${NC} ) " reply
fi
if [[ $reply =~ ^[Yy]$ ]]; then
if [[ "$dry_run" == "false" ]]; then
# Apply changes: Create a backup and move the cleaned temp file over the original.
cp "$file" "$file.bak"
mv "$tmp_clean" "$file"
echo -e "${GREEN}→ Cleaned: $file${NC}"
files_cleaned=$((files_cleaned + 1))
# Git staging awareness: Stage the modified file
git add "$file"
echo -e "${GREEN}→ Cleaned and staged: $file${NC}"
fi
elif [[ $reply =~ ^[Aa]$ ]]; then
apply_all_remaining=true
if [[ "$dry_run" == "false" ]]; then
cp "$file" "$file.bak"
mv "$tmp_clean" "$file"
echo -e "${GREEN}→ Cleaned: $file${NC}"
files_cleaned=$((files_cleaned + 1))
git add "$file"
echo -e "${GREEN}→ Cleaned and staged: $file${NC}"
fi
else
# Cleanup temp file directly if not moving it
# No need to remove via rm "$tmp_clean" here, trap will handle it on exit
echo -e "${RED}→ Skipped: $file${NC}"
exit_status=1 # User aborted at least one file
fi
echo "---------------------------------------"
fi # End of grep -IqP
done # End of while read file
echo -e "${GREEN}Emoji stripping interactive process completed.${NC}"
echo -e "${BLUE}Processed $files_processed files, cleaned $files_cleaned files.${NC}"
# Set final exit status based on operations
if [[ $files_processed -eq 0 && $exit_status -eq 0 ]]; then
# No relevant files found, or no changes needed, and no errors/aborts.
return 0
elif [[ $exit_status -ne 0 ]]; then
# Some files were skipped by user.
return 1
else
# All relevant files processed/cleaned successfully.
return 0
fi
}
# --- Main Execution ---
# Call the function with all command-line arguments passed to the script
strip_emoji_interactive "$@"