Skip to content

Commit e638053

Browse files
abhinavclaude
andcommitted
fix: Handle special characters in branch names (#945)
git-spice failed to work with branch names containing non-ASCII characters like θ, ✅, 👨‍💻, café, or 日本語. The issue occurred when reading branch metadata from the state storage, which stores data as Git tree objects. The root cause was that `git ls-tree` without the `-z` flag returns quoted and escaped filenames for "unusual" characters. For example, a branch named `feature/θ-theta` would be returned as `feature/\316\270-theta` (escaped UTF-8 bytes). This change updates both `ListTree` and `MakeTree` operations to use the `-z` flag, which uses NUL-byte termination and preserves UTF-8 characters exactly as they appear. Additionally, ShamHub's find-by-branch API pattern was updated from `{branch}` to `{branch...}` to match branch names containing forward slashes, which are valid Git branch names. Resolves #944 Co-Authored-By: Claude <noreply@anthropic.com>
1 parent e8cf606 commit e638053

File tree

7 files changed

+487
-8
lines changed

7 files changed

+487
-8
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
kind: Fixed
2+
body: >-
3+
Correctly handle unicode in branch names across various commands.
4+
These would previously be stored in state, but not be accessible.
5+
time: 2025-11-20T20:03:42.420684-08:00

internal/forge/shamhub/find.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212

1313
var (
1414
_ = shamhubRESTHandler("GET /{owner}/{repo}/change/{number}", (*ShamHub).handleGetChange)
15-
_ = shamhubRESTHandler("GET /{owner}/{repo}/changes/by-branch/{branch}", (*ShamHub).handleFindChangesByBranch)
15+
_ = shamhubRESTHandler("GET /{owner}/{repo}/changes/by-branch/{branch...}", (*ShamHub).handleFindChangesByBranch)
1616
)
1717

1818
type getChangeRequest struct {

internal/git/integration_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,75 @@ func TestIntegrationCommitListing(t *testing.T) {
112112
}, msgs)
113113
})
114114
}
115+
116+
func TestIntegrationPeelToCommit_specialChars(t *testing.T) {
117+
t.Parallel()
118+
119+
fixture, err := gittest.LoadFixtureScript([]byte(text.Dedent(`
120+
as 'Test <test@example.com>'
121+
at '2025-11-20T10:00:00Z'
122+
123+
git init
124+
git commit --allow-empty -m 'Initial commit'
125+
126+
# Create branches with special characters
127+
git checkout -b 'feature/θ-theta'
128+
git add theta.txt
129+
git commit -m 'Add theta feature'
130+
131+
git checkout -b 'feature/✅-checkmark'
132+
git add checkmark.txt
133+
git commit -m 'Add checkmark feature'
134+
135+
git checkout -b 'feature/👨‍💻-developer'
136+
git add developer.txt
137+
git commit -m 'Add developer feature'
138+
139+
git checkout -b 'feature/café'
140+
git add cafe.txt
141+
git commit -m 'Add café feature'
142+
143+
git checkout -b 'feature/日本語'
144+
git add japanese.txt
145+
git commit -m 'Add Japanese feature'
146+
147+
-- theta.txt --
148+
Theta content
149+
-- checkmark.txt --
150+
Checkmark content
151+
-- developer.txt --
152+
Developer content
153+
-- cafe.txt --
154+
Café content
155+
-- japanese.txt --
156+
Japanese content
157+
`)))
158+
require.NoError(t, err)
159+
160+
ctx := t.Context()
161+
repo, err := git.Open(ctx, fixture.Dir(), git.OpenOptions{
162+
Log: silogtest.New(t),
163+
})
164+
require.NoError(t, err)
165+
166+
tests := []struct {
167+
name string
168+
branch string
169+
}{
170+
{"Greek theta", "feature/θ-theta"},
171+
{"Unicode checkmark", "feature/✅-checkmark"},
172+
{"Combined emoji", "feature/👨‍💻-developer"},
173+
{"Accented character", "feature/café"},
174+
{"Japanese characters", "feature/日本語"},
175+
}
176+
177+
for _, tt := range tests {
178+
t.Run(tt.name, func(t *testing.T) {
179+
ctx := t.Context()
180+
hash, err := repo.PeelToCommit(ctx, tt.branch)
181+
require.NoError(t, err)
182+
assert.NotEmpty(t, hash)
183+
assert.NotEqual(t, git.ZeroHash, hash)
184+
})
185+
}
186+
}

internal/git/tree.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"strings"
1414

1515
"go.abhg.dev/gs/internal/must"
16+
"go.abhg.dev/gs/internal/scanutil"
1617
)
1718

1819
// Mode is the octal file mode of a Git tree entry.
@@ -72,7 +73,7 @@ type TreeEntry struct {
7273
// Returns the hash of the new tree and the number of entries written.
7374
func (r *Repository) MakeTree(ctx context.Context, ents iter.Seq2[TreeEntry, error]) (_ Hash, numEnts int, err error) {
7475
var stdout bytes.Buffer
75-
cmd := r.gitCmd(ctx, "mktree").Stdout(&stdout)
76+
cmd := r.gitCmd(ctx, "mktree", "-z").Stdout(&stdout)
7677
stdin, err := cmd.StdinPipe()
7778
if err != nil {
7879
return ZeroHash, numEnts, fmt.Errorf("pipe: %w", err)
@@ -111,9 +112,9 @@ func (r *Repository) MakeTree(ctx context.Context, ents iter.Seq2[TreeEntry, err
111112
return ZeroHash, numEnts, fmt.Errorf("name %q contains a slash", ent.Name)
112113
}
113114

114-
// mktree expects input in the form:
115-
// <mode> SP <type> SP <hash> TAB <name> NL
116-
_, err := fmt.Fprintf(stdin, "%s %s %s\t%s\n", ent.Mode, ent.Type, ent.Hash, ent.Name)
115+
// mktree -z expects input in the form:
116+
// <mode> SP <type> SP <hash> TAB <name> NUL
117+
_, err := fmt.Fprintf(stdin, "%s %s %s\t%s\x00", ent.Mode, ent.Type, ent.Hash, ent.Name)
117118
if err != nil {
118119
return ZeroHash, numEnts, fmt.Errorf("write: %w", err)
119120
}
@@ -153,6 +154,7 @@ func (r *Repository) ListTree(
153154
) iter.Seq2[TreeEntry, error] {
154155
args := []string{
155156
"ls-tree",
157+
"-z", // NUL-terminate entries for proper handling of special characters
156158
"--full-tree", // don't limit listing to the current working directory
157159
}
158160
if opts.Recurse {
@@ -162,14 +164,14 @@ func (r *Repository) ListTree(
162164

163165
return func(yield func(TreeEntry, error) bool) {
164166
cmd := r.gitCmd(ctx, args...)
165-
for line, err := range cmd.ScanLines(r.exec) {
167+
for line, err := range cmd.Scan(r.exec, scanutil.SplitNull) {
166168
if err != nil {
167169
yield(TreeEntry{}, fmt.Errorf("git ls-tree: %w", err))
168170
return
169171
}
170172

171-
// ls-tree output is in the form:
172-
// <mode> SP <type> SP <hash> TAB <name> NL
173+
// ls-tree -z output is in the form:
174+
// <mode> SP <type> SP <hash> TAB <name> NUL
173175
modeTypeHash, name, ok := bytes.Cut(line, []byte{'\t'})
174176
if !ok {
175177
r.log.Warnf("ls-tree: skipping invalid line: %q", line)

internal/git/tree_test.go

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ package git_test
22

33
import (
44
"bytes"
5+
"os"
6+
"os/exec"
7+
"path/filepath"
58
"testing"
69

710
"github.com/stretchr/testify/assert"
@@ -215,3 +218,130 @@ func TestIntegrationUpdateTree(t *testing.T) {
215218
assert.Empty(t, ents)
216219
})
217220
}
221+
222+
func TestIntegrationListTree_specialCharsFromFiles(t *testing.T) {
223+
t.Parallel()
224+
225+
ctx := t.Context()
226+
dir := t.TempDir()
227+
repo, _, err := git.Init(ctx, dir, git.InitOptions{
228+
Log: silogtest.New(t),
229+
})
230+
require.NoError(t, err)
231+
232+
// Create actual files with special characters in their names
233+
specialNames := []string{
234+
"θ-theta",
235+
"✅-checkmark",
236+
"👨‍💻-developer",
237+
"café",
238+
"日本語",
239+
}
240+
241+
// Create a subdirectory to hold the files
242+
featureDir := filepath.Join(dir, "feature")
243+
require.NoError(t, os.MkdirAll(featureDir, 0o755))
244+
245+
for _, name := range specialNames {
246+
filePath := filepath.Join(featureDir, name)
247+
require.NoError(t, os.WriteFile(filePath, []byte("content"), 0o644))
248+
}
249+
250+
// Add files to git index and commit
251+
addCmd := exec.Command("git", "add", "feature")
252+
addCmd.Dir = dir
253+
require.NoError(t, addCmd.Run())
254+
255+
commitCmd := exec.Command("git", "commit", "-m", "Add files with special characters")
256+
commitCmd.Dir = dir
257+
commitCmd.Env = append(os.Environ(),
258+
"GIT_AUTHOR_NAME=Test",
259+
"GIT_AUTHOR_EMAIL=test@example.com",
260+
"GIT_COMMITTER_NAME=Test",
261+
"GIT_COMMITTER_EMAIL=test@example.com",
262+
)
263+
require.NoError(t, commitCmd.Run())
264+
265+
// Get the commit hash
266+
revParseCmd := exec.Command("git", "rev-parse", "HEAD")
267+
revParseCmd.Dir = dir
268+
commitHashBytes, err := revParseCmd.Output()
269+
require.NoError(t, err)
270+
commitHash := git.Hash(string(bytes.TrimSpace(commitHashBytes)))
271+
272+
// Get the tree from the commit
273+
treeHash, err := repo.PeelToTree(ctx, commitHash.String())
274+
require.NoError(t, err)
275+
276+
// List the tree recursively
277+
ents, err := sliceutil.CollectErr(repo.ListTree(ctx, treeHash, git.ListTreeOptions{
278+
Recurse: true,
279+
}))
280+
require.NoError(t, err)
281+
282+
// Extract names from the returned entries
283+
var gotNames []string
284+
for _, ent := range ents {
285+
gotNames = append(gotNames, ent.Name)
286+
}
287+
288+
// Build expected names with feature/ prefix
289+
var expectedNames []string
290+
for _, name := range specialNames {
291+
expectedNames = append(expectedNames, "feature/"+name)
292+
}
293+
294+
// Verify all special character names are returned exactly as they were created
295+
assert.ElementsMatch(t, expectedNames, gotNames,
296+
"ListTree should return file names with special characters exactly as they were created")
297+
}
298+
299+
func TestIntegrationListTree_specialCharsFromMakeTree(t *testing.T) {
300+
t.Parallel()
301+
302+
ctx := t.Context()
303+
repo, _, err := git.Init(ctx, t.TempDir(), git.InitOptions{
304+
Log: silogtest.New(t),
305+
})
306+
require.NoError(t, err)
307+
308+
emptyFile, err := repo.WriteObject(ctx, git.BlobType, bytes.NewReader(nil))
309+
require.NoError(t, err)
310+
311+
// Create tree entries with special characters in their names
312+
// (without slashes since MakeTree doesn't allow them in individual entry names)
313+
specialNames := []string{
314+
"θ-theta",
315+
"✅-checkmark",
316+
"👨‍💻-developer",
317+
"café",
318+
"日本語",
319+
}
320+
321+
var entries []git.TreeEntry
322+
for _, name := range specialNames {
323+
entries = append(entries, git.TreeEntry{
324+
Type: git.BlobType,
325+
Name: name,
326+
Hash: emptyFile,
327+
})
328+
}
329+
330+
treeHash, numEnts, err := repo.MakeTree(ctx, sliceutil.All2[error](entries))
331+
require.NoError(t, err)
332+
assert.Equal(t, len(specialNames), numEnts)
333+
334+
// List the tree and verify names match exactly
335+
ents, err := sliceutil.CollectErr(repo.ListTree(ctx, treeHash, git.ListTreeOptions{}))
336+
require.NoError(t, err)
337+
338+
// Extract names from the returned entries
339+
var gotNames []string
340+
for _, ent := range ents {
341+
gotNames = append(gotNames, ent.Name)
342+
}
343+
344+
// Verify all special character names are returned exactly as they were created
345+
assert.ElementsMatch(t, specialNames, gotNames,
346+
"ListTree should return names with special characters exactly as they were created")
347+
}

internal/spice/state/storage/git_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,70 @@ func TestGitBackend_ConcurrentOperations(t *testing.T) {
186186
require.NoError(t, err)
187187
assert.Equal(t, "final_value", finalValue)
188188
}
189+
190+
func TestGitBackend_SpecialCharacterKeys(t *testing.T) {
191+
ctx := t.Context()
192+
repo, _, err := git.Init(ctx, t.TempDir(), git.InitOptions{
193+
Log: silogtest.New(t),
194+
})
195+
require.NoError(t, err)
196+
197+
backend := NewGitBackend(GitConfig{
198+
Repo: repo,
199+
Ref: "refs/data",
200+
AuthorName: "Test Author",
201+
AuthorEmail: "test@example.com",
202+
Log: silogtest.New(t),
203+
})
204+
205+
tests := []struct {
206+
name string
207+
key string
208+
value string
209+
}{
210+
{"Greek theta", "branches/feature/θ-theta", "theta-value"},
211+
{"Unicode checkmark", "branches/feature/✅-checkmark", "checkmark-value"},
212+
{"Combined emoji", "branches/feature/👨‍💻-developer", "developer-value"},
213+
{"Accented character", "branches/feature/café", "cafe-value"},
214+
{"Japanese characters", "branches/feature/日本語", "japanese-value"},
215+
}
216+
217+
db := NewDB(backend)
218+
219+
// Store all values
220+
for _, tt := range tests {
221+
t.Run("Set/"+tt.name, func(t *testing.T) {
222+
err := db.Set(ctx, tt.key, tt.value, "add "+tt.name)
223+
require.NoError(t, err)
224+
})
225+
}
226+
227+
// Retrieve all values
228+
for _, tt := range tests {
229+
t.Run("Get/"+tt.name, func(t *testing.T) {
230+
var got string
231+
err := backend.Get(ctx, tt.key, &got)
232+
require.NoError(t, err)
233+
assert.Equal(t, tt.value, got)
234+
})
235+
}
236+
237+
// List keys and verify all special character keys are present
238+
t.Run("Keys", func(t *testing.T) {
239+
keys, err := backend.Keys(ctx, "branches")
240+
require.NoError(t, err)
241+
242+
expectedKeys := []string{
243+
"feature/θ-theta",
244+
"feature/✅-checkmark",
245+
"feature/👨‍💻-developer",
246+
"feature/café",
247+
"feature/日本語",
248+
}
249+
250+
for _, expectedKey := range expectedKeys {
251+
assert.Contains(t, keys, expectedKey,
252+
"Keys list should contain %q", expectedKey)
253+
}
254+
})
255+
}

0 commit comments

Comments
 (0)