diff --git a/app/dl/dl.go b/app/dl/dl.go index 5200274e1..aa6a1a4a8 100644 --- a/app/dl/dl.go +++ b/app/dl/dl.go @@ -32,6 +32,7 @@ type Options struct { SkipSame bool Template string URLs []string + Streams []string Files []string Include []string Exclude []string @@ -58,6 +59,16 @@ func Run(ctx context.Context, c *telegram.Client, kvd storage.Storage, opts Opti tclient.NewDefaultMiddlewares(ctx, viper.GetDuration(consts.FlagReconnectTimeout))...) defer multierr.AppendInvoke(&rerr, multierr.Close(pool)) + if len(opts.Streams) > 0 { + if len(opts.URLs) > 0 || len(opts.Files) > 0 { + return errors.New("--stream cannot be combined with --url or --file") + } + if opts.Serve { + return errors.New("--serve is not supported with --stream") + } + return downloadStreams(ctx, pool, opts, viper.GetInt(consts.FlagThreads), viper.GetInt(consts.FlagLimit)) + } + parsers := []parser{ {Data: opts.URLs, Parser: tmessage.FromURL(ctx, pool, kvd, opts.URLs)}, {Data: opts.Files, Parser: tmessage.FromFile(ctx, pool, kvd, opts.Files, true)}, diff --git a/app/dl/stream.go b/app/dl/stream.go new file mode 100644 index 000000000..b4ce929ed --- /dev/null +++ b/app/dl/stream.go @@ -0,0 +1,221 @@ +package dl + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/fatih/color" + "github.com/gabriel-vasile/mimetype" + "github.com/go-faster/errors" + gotddownloader "github.com/gotd/td/telegram/downloader" + "github.com/gotd/td/tg" + "golang.org/x/sync/errgroup" + + "github.com/iyear/tdl/core/dcpool" + coredownloader "github.com/iyear/tdl/core/downloader" + "github.com/iyear/tdl/core/util/fsutil" + "github.com/iyear/tdl/core/util/tutil" + "github.com/iyear/tdl/pkg/utils" +) + +type streamPayload struct { + DCID int `json:"dcId"` + Location streamFileLocation `json:"location"` + Size int64 `json:"size"` + MIMEType string `json:"mimeType"` + FileName string `json:"fileName"` +} + +type streamFileLocation struct { + Type string `json:"_"` + ID string `json:"id"` + AccessHash string `json:"access_hash"` + FileReference []byte `json:"file_reference"` +} + +func downloadStreams(ctx context.Context, pool dcpool.Pool, opts Options, threads int, limit int) error { + if err := os.MkdirAll(opts.Dir, 0o755); err != nil { + return errors.Wrap(err, "create dir") + } + + streams := make([]*streamPayload, 0, len(opts.Streams)) + for _, s := range opts.Streams { + payload, err := parseStreamPayload(s) + if err != nil { + return errors.Wrap(err, "parse stream") + } + streams = append(streams, payload) + } + + color.Green("All files will be downloaded to '%s' dir", opts.Dir) + + wg, wgctx := errgroup.WithContext(ctx) + wg.SetLimit(limit) + for _, payload := range streams { + payload := payload + wg.Go(func() error { + return downloadStream(wgctx, pool, opts, threads, payload) + }) + } + return wg.Wait() +} + +func downloadStream(ctx context.Context, pool dcpool.Pool, opts Options, threads int, payload *streamPayload) error { + location, err := payload.Location.TG() + if err != nil { + return errors.Wrap(err, "stream location") + } + + fileName := filepath.Base(strings.TrimSpace(payload.FileName)) + if fileName == "." || fileName == string(filepath.Separator) || fileName == "" { + fileName = payload.Location.ID + if ext := mimetype.Lookup(payload.MIMEType); ext != nil { + fileName += ext.Extension() + } + } + + if len(opts.Include) > 0 && !hasExt(opts.Include, fileName) { + return nil + } + if len(opts.Exclude) > 0 && hasExt(opts.Exclude, fileName) { + return nil + } + + finalPath := uniqueStreamPath(opts.Dir, fileName) + if opts.SkipSame { + if stat, err := os.Stat(finalPath); err == nil && stat.Size() == payload.Size { + return nil + } + } + + tempPath := finalPath + tempExt + tempFile, err := os.Create(tempPath) + if err != nil { + return errors.Wrap(err, "create file") + } + defer func() { + _ = tempFile.Close() + if _, err := os.Stat(tempPath); err == nil { + _ = os.Remove(tempPath) + } + }() + + client := pool.Client(ctx, payload.DCID) + if opts.Takeout { + client = pool.Takeout(ctx, payload.DCID) + } + + _, err = gotddownloader.NewDownloader(). + WithPartSize(coredownloader.MaxPartSize). + Download(client, location). + WithThreads(tutil.BestThreads(payload.Size, threads)). + Parallel(ctx, tempFile) + if err != nil { + if strings.Contains(err.Error(), "FILE_REFERENCE_EXPIRED") { + return errors.New("stream file reference expired; refresh the Telegram Web page and copy a fresh stream payload, or use the original message link") + } + return errors.Wrap(err, "download stream") + } + + if err := tempFile.Close(); err != nil { + return errors.Wrap(err, "close file") + } + + newPath := finalPath + if opts.RewriteExt { + mime, err := mimetype.DetectFile(tempPath) + if err != nil { + return errors.Wrap(err, "detect mime") + } + if ext := mime.Extension(); ext != "" && filepath.Ext(newPath) != ext { + newPath = filepath.Join(filepath.Dir(newPath), fsutil.GetNameWithoutExt(filepath.Base(newPath))+ext) + newPath = uniqueStreamPath(filepath.Dir(newPath), filepath.Base(newPath)) + } + } + + if err := os.Rename(tempPath, newPath); err != nil { + return errors.Wrap(err, "rename file") + } + + color.Green("Downloaded %s (%s)", newPath, utils.Byte.FormatBinaryBytes(payload.Size)) + return nil +} + +func parseStreamPayload(input string) (*streamPayload, error) { + raw := strings.TrimSpace(input) + if !strings.HasPrefix(raw, "stream/") { + return nil, errors.New(`stream payload must start with "stream/"`) + } + raw = strings.TrimPrefix(raw, "stream/") + + decoded, err := url.PathUnescape(raw) + if err != nil { + return nil, err + } + + var payload streamPayload + if err := json.Unmarshal([]byte(decoded), &payload); err != nil { + return nil, err + } + if payload.Location.Type != "inputDocumentFileLocation" { + return nil, fmt.Errorf("unsupported stream location %q", payload.Location.Type) + } + if payload.DCID == 0 { + return nil, errors.New("missing dcId") + } + if payload.Size <= 0 { + return nil, errors.New("missing size") + } + return &payload, nil +} + +func (s streamFileLocation) TG() (*tg.InputDocumentFileLocation, error) { + id, err := strconv.ParseInt(s.ID, 10, 64) + if err != nil { + return nil, errors.Wrap(err, "parse id") + } + accessHash, err := strconv.ParseInt(s.AccessHash, 10, 64) + if err != nil { + return nil, errors.Wrap(err, "parse access hash") + } + if len(s.FileReference) == 0 { + return nil, errors.New("missing file reference") + } + return &tg.InputDocumentFileLocation{ + ID: id, + AccessHash: accessHash, + FileReference: s.FileReference, + }, nil +} + +func hasExt(exts []string, name string) bool { + ext := filepath.Ext(name) + for _, candidate := range exts { + if fsutil.AddPrefixDot(candidate) == ext { + return true + } + } + return false +} + +func uniqueStreamPath(dir string, name string) string { + candidate := filepath.Join(dir, name) + if _, err := os.Stat(candidate); os.IsNotExist(err) { + return candidate + } + + ext := filepath.Ext(name) + stem := strings.TrimSuffix(name, ext) + for i := 1; ; i++ { + candidate = filepath.Join(dir, fmt.Sprintf("%s (%d)%s", stem, i, ext)) + if _, err := os.Stat(candidate); os.IsNotExist(err) { + return candidate + } + } +} diff --git a/app/dl/stream_test.go b/app/dl/stream_test.go new file mode 100644 index 000000000..6d863cb66 --- /dev/null +++ b/app/dl/stream_test.go @@ -0,0 +1,34 @@ +package dl + +import ( + "net/url" + "testing" + + "github.com/stretchr/testify/require" +) + +const testStreamJSON = `{"dcId":1,"location":{"_":"inputDocumentFileLocation","id":"5046488299653301873","access_hash":"6151466871329478284","file_reference":[1,2,3]},"size":209528276,"mimeType":"video/mp4","fileName":"video.mp4"}` + +func TestParseStreamPayload(t *testing.T) { + t.Run("encoded stream path", func(t *testing.T) { + payload, err := parseStreamPayload("stream/" + url.PathEscape(testStreamJSON)) + require.NoError(t, err) + require.Equal(t, 1, payload.DCID) + require.Equal(t, "5046488299653301873", payload.Location.ID) + require.Equal(t, "6151466871329478284", payload.Location.AccessHash) + require.Equal(t, []byte{1, 2, 3}, payload.Location.FileReference) + require.Equal(t, int64(209528276), payload.Size) + require.Equal(t, "video.mp4", payload.FileName) + }) + + t.Run("reject copied video tag", func(t *testing.T) { + input := `` + _, err := parseStreamPayload(input) + require.ErrorContains(t, err, `stream payload must start with "stream/"`) + }) +} + +func TestParseStreamPayloadValidation(t *testing.T) { + _, err := parseStreamPayload("stream/" + url.PathEscape(`{"dcId":1,"location":{"_":"inputPhotoFileLocation"},"size":1}`)) + require.ErrorContains(t, err, `unsupported stream location`) +} diff --git a/cmd/dl.go b/cmd/dl.go index 6540ee8e9..992a17e62 100644 --- a/cmd/dl.go +++ b/cmd/dl.go @@ -23,8 +23,8 @@ func NewDownload() *cobra.Command { Short: "Download anything from Telegram (protected) chat", GroupID: groupTools.ID, RunE: func(cmd *cobra.Command, args []string) error { - if len(opts.URLs) == 0 && len(opts.Files) == 0 { - return fmt.Errorf("no urls or files provided") + if len(opts.URLs) == 0 && len(opts.Files) == 0 && len(opts.Streams) == 0 { + return fmt.Errorf("no urls, files or streams provided") } opts.Template = viper.GetString(consts.FlagDlTemplate) @@ -46,6 +46,7 @@ func NewDownload() *cobra.Command { cmd.Flags().StringSliceVarP(&opts.URLs, "url", "u", []string{}, "telegram message links") cmd.Flags().StringSliceVarP(&opts.Files, file, "f", []string{}, "official client exported files") + cmd.Flags().StringArrayVar(&opts.Streams, "stream", []string{}, "Telegram Web stream payloads") cmd.Flags().String(consts.FlagDlTemplate, `{{ .DialogID }}_{{ .MessageID }}_{{ filenamify .FileName }}`, "download file name template") diff --git a/docs/content/en/guide/download.md b/docs/content/en/guide/download.md index 5d301f0e0..9ac182250 100644 --- a/docs/content/en/guide/download.md +++ b/docs/content/en/guide/download.md @@ -42,6 +42,21 @@ Please refer to [Export Messages](/guide/tools/export-messages) tdl dl -f result1.json -f result2.json {{< /command >}} +## From Telegram Web Streams: + +Telegram Web may expose a media element whose `src` starts with `stream/`. +Copy and pass that stream path directly: + +{{< command >}} +tdl dl --stream 'stream/%7B%22dcId%22%3A1...%7D' +{{< /command >}} + +{{< hint warning >}} +Telegram stream payloads contain temporary file references. If the reference has +expired, refresh Telegram Web and copy a fresh `stream/` value, or use the +original message link when available. +{{< /hint >}} + ## Combine Sources: {{< command >}} diff --git a/docs/content/zh/guide/download.md b/docs/content/zh/guide/download.md index 49451112c..f8a02f3de 100644 --- a/docs/content/zh/guide/download.md +++ b/docs/content/zh/guide/download.md @@ -39,6 +39,20 @@ tdl dl -u https://t.me/tdl/1 -u https://t.me/tdl/2 tdl dl -f result1.json -f result2.json {{< /command >}} +## 从 Telegram Web Stream 下载: + +Telegram Web 中的媒体元素可能包含以 `stream/` 开头的 `src`。 +复制并直接传入这个 stream 路径: + +{{< command >}} +tdl dl --stream 'stream/%7B%22dcId%22%3A1...%7D' +{{< /command >}} + +{{< hint warning >}} +Telegram stream payload 包含临时 file reference。如果已过期,请刷新 +Telegram Web 后复制新的 `stream/` 值;如果可以拿到原始消息链接,也可以优先使用消息链接。 +{{< /hint >}} + ## 合并下载: {{< command >}}