diff --git a/internal/dhtcrawler/persist.go b/internal/dhtcrawler/persist.go index 9229f462a..67909adbe 100644 --- a/internal/dhtcrawler/persist.go +++ b/internal/dhtcrawler/persist.go @@ -2,6 +2,8 @@ package dhtcrawler import ( "context" + "strconv" + "strings" "time" "github.com/bitmagnet-io/bitmagnet/internal/database/dao" @@ -163,25 +165,34 @@ func createTorrentModel( var filesCount model.NullUint filesStatus := model.FilesStatusSingle + files := make([]model.TorrentFile, 0, min(int(saveFilesThreshold), len(info.Files))) + if len(info.Files) > 0 { filesStatus = model.FilesStatusMulti - filesCount = model.NewNullUint(uint(len(info.Files))) - } + nonPaddingFiles := 0 - files := make([]model.TorrentFile, 0, min(int(saveFilesThreshold), len(info.Files))) + for i, file := range info.Files { + if isPaddingFile(file) { + continue + } - for i, file := range info.Files { - if i >= int(saveFilesThreshold) { - filesStatus = model.FilesStatusOverThreshold - break + if nonPaddingFiles >= int(saveFilesThreshold) { + filesStatus = model.FilesStatusOverThreshold + } else { + // Persist only the subset of files we keep locally; + // the DB entry count still reflects every non-padding file. + files = append(files, model.TorrentFile{ + InfoHash: hash, + Index: uint(i), + Path: file.DisplayPath(&info), + Size: uint(file.Length), + }) + } + + nonPaddingFiles++ } - files = append(files, model.TorrentFile{ - InfoHash: hash, - Index: uint(i), - Path: file.DisplayPath(&info), - Size: uint(file.Length), - }) + filesCount = model.NewNullUint(uint(nonPaddingFiles)) } var pieces model.TorrentPieces @@ -211,6 +222,33 @@ func createTorrentModel( }, nil } +func isPaddingFile(file metainfo.FileInfo) bool { + if strings.Contains(file.Attr, "p") { + return true + } + + pathSegments := file.BestPath() + if len(pathSegments) > 0 { + lastSegment := pathSegments[len(pathSegments)-1] + if strings.HasPrefix(lastSegment, "_____padding_file_") { + return true + } + } + + for i := 0; i < len(pathSegments)-1; i++ { + if pathSegments[i] != ".pad" { + continue + } + + if size, err := strconv.ParseInt(pathSegments[i+1], 10, 64); err == nil && size == file.Length { + // Recommended BEP47 path format: .pad/. + return true + } + } + + return false +} + const classifyBatchSize = 100 // runPersistSources waits on the persistSources channel for scraped torrents, and persists sources diff --git a/internal/dhtcrawler/persist_test.go b/internal/dhtcrawler/persist_test.go new file mode 100644 index 000000000..2ab09de37 --- /dev/null +++ b/internal/dhtcrawler/persist_test.go @@ -0,0 +1,92 @@ +package dhtcrawler + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/bitmagnet-io/bitmagnet/internal/model" + "github.com/bitmagnet-io/bitmagnet/internal/protocol" + "github.com/bitmagnet-io/bitmagnet/internal/protocol/metainfo" +) + +func TestCreateTorrentModelSkipsPaddingFiles(t *testing.T) { + t.Parallel() + + hash := protocol.MustParseID("0123456789abcdef0123456789abcdef01234567") + info := metainfo.Info{ + Name: "test", + Files: []metainfo.FileInfo{ + { + Path: []string{"foo.txt"}, + Length: 10, + }, + { + Path: []string{"attr", "padding.bin"}, + ExtendedFileAttrs: metainfo.ExtendedFileAttrs{ + Attr: "p", + }, + Length: 20, + }, + { + Path: []string{".pad", "30"}, + Length: 30, + }, + { + Path: []string{"bar", "baz.bin"}, + Length: 40, + }, + }, + PieceLength: 4, + } + + torrent, err := createTorrentModel(hash, info, false, 10) + require.NoError(t, err) + + require.Len(t, torrent.Files, 2) + assert.Equal(t, uint(0), torrent.Files[0].Index) + assert.Equal(t, uint(3), torrent.Files[1].Index) + + if assert.True(t, torrent.FilesCount.Valid) { + assert.Equal(t, uint(2), torrent.FilesCount.Uint) + } + assert.Equal(t, model.FilesStatusMulti, torrent.FilesStatus) +} + +func TestCreateTorrentModelCountsNonPaddingFilesPastThreshold(t *testing.T) { + t.Parallel() + + hash := protocol.MustParseID("abcdefabcdefabcdefabcdefabcdefabcdefabcd") + info := metainfo.Info{ + Name: "test", + Files: []metainfo.FileInfo{ + { + Path: []string{".pad", "1"}, + ExtendedFileAttrs: metainfo.ExtendedFileAttrs{ + Attr: "p", + }, + Length: 1, + }, + { + Path: []string{"a.bin"}, + Length: 10, + }, + { + Path: []string{"b.bin"}, + Length: 20, + }, + }, + PieceLength: 4, + } + + torrent, err := createTorrentModel(hash, info, false, 1) + require.NoError(t, err) + + assert.Equal(t, model.FilesStatusOverThreshold, torrent.FilesStatus) + if assert.True(t, torrent.FilesCount.Valid) { + assert.Equal(t, uint(2), torrent.FilesCount.Uint) + } + require.Len(t, torrent.Files, 1) + assert.Equal(t, uint(1), torrent.Files[0].Index) +} diff --git a/internal/protocol/metainfo/metainfo.go b/internal/protocol/metainfo/metainfo.go index 22a92b63b..191715471 100644 --- a/internal/protocol/metainfo/metainfo.go +++ b/internal/protocol/metainfo/metainfo.go @@ -3,3 +3,5 @@ package metainfo import mi "github.com/anacrolix/torrent/metainfo" type Info = mi.Info +type FileInfo = mi.FileInfo +type ExtendedFileAttrs = mi.ExtendedFileAttrs