Skip to content

Make parsing log filenames future-proof #4518

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 10 additions & 21 deletions pkg/pillar/cmd/loguploader/loguploader.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ func doFetchSend(ctx *loguploaderContext, zipDir string, iter *int) int {
if !strings.HasSuffix(f.Name(), ".gz") {
continue
}
timestamp, err := types.GetTimestampFromGzipName(f.Name())
timestamp, err := types.GetTimestampFromFileName(f.Name())
if err != nil {
continue
}
Expand Down Expand Up @@ -722,22 +722,14 @@ func doFetchSend(ctx *loguploaderContext, zipDir string, iter *int) int {
}

func buildAppUUIDMap(fName string) {
var appUUID string
if strings.HasPrefix(fName, types.AppPrefix) && strings.HasSuffix(fName, ".gz") {
fStr1 := strings.TrimPrefix(fName, types.AppPrefix)
fStr := strings.Split(fStr1, types.AppSuffix)
if len(fStr) != 2 {
err := fmt.Errorf("app split is not 2")
log.Error(err)
return
}
appUUID = fStr[0]
appUUID, err := types.GetUUIDFromFileName(fName)
if err != nil {
log.Errorf("buildAppUUIDMap: cannot parse app log filename %s: %v", fName, err)
return
}

if len(appUUID) > 0 {
if _, ok := appGzipMap[appUUID]; !ok {
appGzipMap[appUUID] = true
}
if _, ok := appGzipMap[appUUID]; !ok {
appGzipMap[appUUID] = true
}
}

Expand Down Expand Up @@ -769,13 +761,10 @@ func sendToCloud(ctx *loguploaderContext, data []byte, iter int, fName string, f
var logsURL, appLogURL string
var sentFailed, serviceUnavailable bool
if isApp {
fStr1 := strings.TrimPrefix(fName, types.AppPrefix)
fStr := strings.Split(fStr1, types.AppSuffix)
if len(fStr) != 2 {
err := fmt.Errorf("app split is not 2")
log.Fatal(err)
appUUID, err := types.GetUUIDFromFileName(fName)
if err != nil {
return false, fmt.Errorf("sendToCloud: cannot parse app file name %s: %v", fName, err)
}
appUUID := fStr[0]
if ctx.zedcloudCtx.V2API {
appLogURL = fmt.Sprintf("apps/instanceid/%s/newlogs", appUUID)
} else {
Expand Down
2 changes: 1 addition & 1 deletion pkg/pillar/cmd/volumemgr/sizemgmt.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func getRemainingDiskSpace(ctxPtr *volumemgrContext) (uint64, error) {
// Everything in /persist except these directories/datasets counts
// as EVE overhead.
// Note that we also exclude /persist/newlog here since it maintains its own
// size limit (GlobalValueInt(types.LogRemainToSendMBytes)) the caller
// size limit (GlobalValueInt(types.MaxGzipLogMBytesInPersist)) the caller
// needs to consider as EVE overhead.
var excludeDirs = append(types.AppPersistPaths, types.NewlogDir)

Expand Down
2 changes: 1 addition & 1 deletion pkg/pillar/diskmetrics/usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func Dom0DiskReservedSize(log *base.LogObject, globalConfig *types.ConfigItemVal
(float64(dom0MinDiskUsagePercent) * 0.01))
staticMaxDom0DiskSize := uint64(globalConfig.GlobalValueInt(
types.Dom0DiskUsageMaxBytes))
newlogReserved := uint64(globalConfig.GlobalValueInt(types.LogRemainToSendMBytes))
newlogReserved := uint64(globalConfig.GlobalValueInt(types.MaxGzipLogMBytesInPersist))
// Always leave space for /persist/newlogd
maxDom0DiskSize := newlogReserved
// Select the larger of the current overhead usage and the configured
Expand Down
8 changes: 4 additions & 4 deletions pkg/pillar/types/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ const (
AppContainerStatsInterval GlobalSettingKey = "timer.appcontainer.stats.interval"
// VaultReadyCutOffTime global setting key
VaultReadyCutOffTime GlobalSettingKey = "timer.vault.ready.cutoff"
// LogRemainToSendMBytes Max gzip log files remain on device to be sent in Mbytes
LogRemainToSendMBytes GlobalSettingKey = "newlog.gzipfiles.ondisk.maxmegabytes"
// MaxGzipLogMBytesInPersist Max size of gzip log files in persist in Mbytes
MaxGzipLogMBytesInPersist GlobalSettingKey = "newlog.gzipfiles.ondisk.maxmegabytes"

// ForceFallbackCounter global setting key
ForceFallbackCounter = "force.fallback.counter"
Expand Down Expand Up @@ -972,8 +972,8 @@ func NewConfigItemSpecMap() ConfigItemSpecMap {
eveMemoryLimitInMiB, 0xFFFFFFFF)
// Limit manual vmm overhead override to 1 PiB
configItemSpecMap.AddIntItem(VmmMemoryLimitInMiB, 0, 0, uint32(1024*1024*1024))
// LogRemainToSendMBytes - Default is 2 Gbytes, minimum is 10 Mbytes
configItemSpecMap.AddIntItem(LogRemainToSendMBytes, 2048, 10, 0xFFFFFFFF)
// MaxGzipLogMBytesInPersist - Default is 2 Gbytes, minimum is 10 Mbytes
configItemSpecMap.AddIntItem(MaxGzipLogMBytesInPersist, 2048, 10, 0xFFFFFFFF)
configItemSpecMap.AddIntItem(DownloadMaxPortCost, 0, 0, 255)
configItemSpecMap.AddIntItem(BlobDownloadMaxRetries, 5, 1, 10)

Expand Down
2 changes: 1 addition & 1 deletion pkg/pillar/types/global_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func TestNewConfigItemSpecMap(t *testing.T) {
Dom0DiskUsageMaxBytes,
StorageZfsReserved,
ForceFallbackCounter,
LogRemainToSendMBytes,
MaxGzipLogMBytesInPersist,
DownloadMaxPortCost,
BlobDownloadMaxRetries,
KubevirtDrainTimeout,
Expand Down
60 changes: 45 additions & 15 deletions pkg/pillar/types/newlogtypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package types

import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -85,21 +86,50 @@ type NewlogMetrics struct {
AppMetrics logfileMetrics // App metrics
}

// GetTimestampFromGzipName - get timestamp from gzip file name
func GetTimestampFromGzipName(fName string) (time.Time, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I see, we still use this func in another package, edgeveiew:

timestamp, err := types.GetTimestampFromGzipName(f.Name())
and
timestamp, err := types.GetTimestampFromGzipName(l.Name())

So, we should be careful here...

// here are example file names:
// app.6656f860-7563-4bbf-8bba-051f5942982b.log.1730464687367.gz
// dev.log.keep.1730404601953.gz
// dev.log.upload.1730404601953.gz
// the timestamp is the number between the last two dots
nameParts := strings.Split(fName, ".")
if len(nameParts) < 2 {
return time.Time{}, fmt.Errorf("getTimestampFromGzipName: invalid log file name %s", fName)
var (
timestampRegex *regexp.Regexp
uuidRegex *regexp.Regexp
)

func init() {
// Regular expression to match a timestamp
timestampRegex = regexp.MustCompile(`^\d+$`)

// UUID regex pattern (supports v4 UUIDs like "123e4567-e89b-12d3-a456-426614174000")
uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`)
}

// GetTimestampFromFileName extracts a millisecond timestamp from a filename
func GetTimestampFromFileName(filename string) (time.Time, error) {
// Split the filename into parts using dots
parts := strings.Split(filename, ".")

// Check each part for a timestamp match
for _, part := range parts {
if timestampRegex.MatchString(part) {
// Convert the matched timestamp string to an integer
timestamp, err := strconv.ParseInt(part, 10, 64)
if err != nil {
return time.Time{}, fmt.Errorf("failed to parse timestamp: %s", err)
}
return time.Unix(0, timestamp*int64(time.Millisecond)), nil // Return the first valid timestamp found
}
}
timeStr := nameParts[len(nameParts)-2]
fTime, err := strconv.Atoi(timeStr)
if err != nil {
return time.Time{}, fmt.Errorf("getTimestampFromGzipName: %w", err)

return time.Time{}, fmt.Errorf("no timestamp found in filename: %s", filename)
}

// GetUUIDFromFileName extracts a UUID from a filename with dot-delimited parts
func GetUUIDFromFileName(filename string) (string, error) {
// Split the filename into parts using dots
parts := strings.Split(filename, ".")

// Check each part for a UUID match
for _, part := range parts {
if uuidRegex.MatchString(part) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or call the UUID parser function we use elsewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we mostly use uuid.FromString, which returns a UUID type and I'm fine with string

return part, nil // Return the first UUID found
}
}
return time.Unix(0, int64(fTime)*int64(time.Millisecond)), nil

return "", fmt.Errorf("no UUID found in filename: %s", filename)
}
186 changes: 186 additions & 0 deletions pkg/pillar/types/newlogtypes_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// Copyright (c) 2025 Zededa, Inc.
// SPDX-License-Identifier: Apache-2.0

package types

import (
"testing"
"time"

"github.com/onsi/gomega"
)

func TestGetTimestampFromFileName(t *testing.T) {
t.Parallel()
g := gomega.NewWithT(t)

tests := []struct {
name string
filename string
wantTime time.Time
wantError bool
}{
{
name: "Valid timestamp in filename",
filename: "dev.log.1731491904032.gz",
wantTime: time.Unix(0, 1731491904032*int64(time.Millisecond)),
wantError: false,
},
{
name: "Valid timestamp in regular filename",
filename: "dev.log.1731491904032",
wantTime: time.Unix(0, 1731491904032*int64(time.Millisecond)),
wantError: false,
},
{
name: "Valid timestamp with UUID",
filename: "app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
wantTime: time.Unix(0, 1731935033496*int64(time.Millisecond)),
wantError: false,
},
{
name: "Two timestamps in filename",
filename: "dev.log.1731935033496.123.gz",
wantTime: time.Unix(0, 1731935033496*int64(time.Millisecond)),
wantError: false,
},
{
name: "Invalid timestamp in filename",
filename: "dev.log.invalidtimestamp.gz",
wantTime: time.Time{},
wantError: true,
},
{
name: "No timestamp in filename",
filename: "dev.log.gz",
wantTime: time.Time{},
wantError: true,
},
{
name: "Old timestamp (short format) in filename",
filename: "dev.log.123.gz",
wantTime: time.Unix(0, 123*int64(time.Millisecond)),
wantError: false,
},
{
name: "Old timestamp (long format) in filename",
filename: "dev.log.0000000000123.gz",
wantTime: time.Unix(0, 123*int64(time.Millisecond)),
wantError: false,
},
}

for _, tt := range tests {
tt := tt // create a new variable to hold the value of tt to avoid being overwritten by the next iteration (needed until Go 1.23)
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
gotTime, err := GetTimestampFromFileName(tt.filename)
if tt.wantError {
g.Expect(err).To(gomega.HaveOccurred())
} else {
g.Expect(err).NotTo(gomega.HaveOccurred())
g.Expect(gotTime).To(gomega.Equal(tt.wantTime))
}
})
}
}

func FuzzGetTimestampFromFileName(f *testing.F) {
testcases := []string{
"dev.log.1731491904032.gz",
"app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
"dev.log.invalidtimestamp.gz",
"dev.log.gz",
"dev.log.123456789012.gz",
"dev.log.1234567890123456.gz",
}

for _, tc := range testcases {
f.Add(tc)
}

f.Fuzz(func(t *testing.T, filename string) {
_, _ = GetTimestampFromFileName(filename)
})
}

func TestGetUUIDFromFileName(t *testing.T) {
t.Parallel()
g := gomega.NewWithT(t)

tests := []struct {
name string
filename string
wantUUID string
wantError bool
}{
{
name: "Valid UUID in filename",
filename: "app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
wantUUID: "8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d",
wantError: false,
},
{
name: "Valid UUID in regular filename",
filename: "app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496",
wantUUID: "8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d",
wantError: false,
},
{
name: "Valid UUID with timestamp",
filename: "app.123e4567-e89b-12d3-a456-426614174000.log.1731935033496.gz",
wantUUID: "123e4567-e89b-12d3-a456-426614174000",
wantError: false,
},
{
name: "No UUID in filename",
filename: "dev.log.1731491904032.gz",
wantUUID: "",
wantError: true,
},
{
name: "Invalid UUID in filename",
filename: "app.invalid-uuid-string.log.1731935033496.gz",
wantUUID: "",
wantError: true,
},
{
name: "UUID at the end of filename",
filename: "app.log.1731935033496.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.gz",
wantUUID: "8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d",
wantError: false,
},
}

for _, tt := range tests {
tt := tt // create a new variable to hold the value of tt to avoid being overwritten by the next iteration (needed until Go 1.23)
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
gotUUID, err := GetUUIDFromFileName(tt.filename)
if tt.wantError {
g.Expect(err).To(gomega.HaveOccurred())
} else {
g.Expect(err).NotTo(gomega.HaveOccurred())
g.Expect(gotUUID).To(gomega.Equal(tt.wantUUID))
}
})
}
}

func FuzzGetUUIDFromFileName(f *testing.F) {
testcases := []string{
"app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
"app.123e4567-e89b-12d3-a456-426614174000.log.1731935033496.gz",
"dev.log.1731491904032.gz",
"app.invalid-uuid-string.log.1731935033496.gz",
"app.log.1731935033496.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.gz",
}

for _, tc := range testcases {
f.Add(tc)
}

f.Fuzz(func(t *testing.T, filename string) {
_, _ = GetUUIDFromFileName(filename)
})
}
Loading