Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/openapi/rest.json
Original file line number Diff line number Diff line change
Expand Up @@ -2711,6 +2711,14 @@
"transforms": {
"description": "Optional transformations to apply",
"properties": {
"lexicographic_sort": {
"description": "Sort all CSV files lexicographically before zipping. Use 'asc' for ascending or 'desc' for descending order.",
"enum": [
"asc",
"desc"
],
"type": "string"
},
"normalize_timezones": {
"description": "Normalize timezone names (e.g., US/Pacific -\u003e America/Los_Angeles)",
"type": "boolean"
Expand Down
27 changes: 27 additions & 0 deletions server/rest/feed_version_export.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ type ExportTransforms struct {
UseBasicRouteTypes bool `json:"use_basic_route_types,omitempty"`
// Entity value overrides (filename.entity_id.field = value)
SetValues map[string]string `json:"set_values,omitempty"`
// Lexicographic sort order for CSV files ("asc" or "desc"). If set, all CSV files will be sorted lexicographically before zipping.
LexicographicSort string `json:"lexicographic_sort,omitempty"`
}

// FeedVersionExportOpenAPIRequest defines OpenAPI schema for export endpoint
Expand Down Expand Up @@ -145,6 +147,13 @@ func (r FeedVersionExportOpenAPIRequest) RequestInfo() RequestInfo {
},
},
},
"lexicographic_sort": &oa.SchemaRef{
Value: &oa.Schema{
Type: &oa.Types{"string"},
Description: "Sort all CSV files lexicographically before zipping. Use 'asc' for ascending or 'desc' for descending order.",
Enum: []any{"asc", "desc"},
},
},
},
},
},
Expand Down Expand Up @@ -259,6 +268,17 @@ func feedVersionExportHandler(graphqlHandler http.Handler, w http.ResponseWriter
}
_ = cpResult

// Apply lexicographic sort if requested
if req.Transforms != nil && req.Transforms.LexicographicSort != "" {
if zipAdapter, ok := csvWriter.WriterAdapter.(*tlcsv.ZipWriterAdapter); ok {
if err := zipAdapter.SortCSVFiles(req.Transforms.LexicographicSort); err != nil {
log.For(ctx).Error().Err(err).Msg("failed to sort CSV files")
util.WriteJsonError(w, "failed to sort CSV files", http.StatusInternalServerError)
return
}
}
}

if err := csvWriter.Close(); err != nil {
log.For(ctx).Error().Err(err).Msg("failed to close CSV writer")
util.WriteJsonError(w, "failed to close CSV writer", http.StatusInternalServerError)
Expand Down Expand Up @@ -349,6 +369,13 @@ func validateExportRequest(req *FeedVersionExportRequest) error {
return util.NewBadRequestError("only 'gtfs_zip' format is currently supported", nil)
}

// Validate lexicographic sort order if provided
if req.Transforms != nil && req.Transforms.LexicographicSort != "" {
if req.Transforms.LexicographicSort != "asc" && req.Transforms.LexicographicSort != "desc" {
return util.NewBadRequestError(fmt.Sprintf("invalid lexicographic_sort value: %s (must be 'asc' or 'desc')", req.Transforms.LexicographicSort), nil)
Comment thread
irees marked this conversation as resolved.
Outdated
}
}

return nil
}

Expand Down
91 changes: 91 additions & 0 deletions server/rest/feed_version_export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,84 @@ func TestFeedVersionExportRequest(t *testing.T) {
}
})

t.Run("export with lexicographic sort ascending", func(t *testing.T) {
reqBody := FeedVersionExportRequest{
FeedVersionKeys: []string{caltrainFv},
Transforms: &ExportTransforms{
LexicographicSort: "asc",
},
}
rr := makeExportRequest(t, reqBody, asAdmin)

assert.Equal(t, 200, rr.Result().StatusCode, "status code")
validateZipResponse(t, rr, map[string]int{
"agency.txt": 1,
"calendar.txt": 27,
"calendar_dates.txt": 36,
"fare_attributes.txt": 6,
"fare_rules.txt": 216,
"routes.txt": 6,
"shapes.txt": 3008,
"stop_times.txt": 2853,
"stops.txt": 64,
"trips.txt": 185,
})
// Verify stops are sorted lexicographically (ascending)
if err := makeTempReader(t, rr.Body.Bytes(), func(t *testing.T, reader *tlcsv.Reader) {
var stopIds []string
for ent := range reader.Stops() {
stopIds = append(stopIds, ent.StopID.Val)
}
// Check that stops are sorted
for i := 1; i < len(stopIds); i++ {
if stopIds[i-1] > stopIds[i] {
t.Errorf("stops not sorted ascending: %s > %s", stopIds[i-1], stopIds[i])
}
}
}); err != nil {
t.Fatalf("test failed: %v", err)
}
})

t.Run("export with lexicographic sort descending", func(t *testing.T) {
reqBody := FeedVersionExportRequest{
FeedVersionKeys: []string{caltrainFv},
Transforms: &ExportTransforms{
LexicographicSort: "desc",
},
}
rr := makeExportRequest(t, reqBody, asAdmin)

assert.Equal(t, 200, rr.Result().StatusCode, "status code")
validateZipResponse(t, rr, map[string]int{
"agency.txt": 1,
"calendar.txt": 27,
"calendar_dates.txt": 36,
"fare_attributes.txt": 6,
"fare_rules.txt": 216,
"routes.txt": 6,
"shapes.txt": 3008,
"stop_times.txt": 2853,
"stops.txt": 64,
"trips.txt": 185,
})
// Verify stops are sorted lexicographically (descending)
if err := makeTempReader(t, rr.Body.Bytes(), func(t *testing.T, reader *tlcsv.Reader) {
var stopIds []string
for ent := range reader.Stops() {
stopIds = append(stopIds, ent.StopID.Val)
}
// Check that stops are sorted descending
for i := 1; i < len(stopIds); i++ {
if stopIds[i-1] < stopIds[i] {
t.Errorf("stops not sorted descending: %s < %s", stopIds[i-1], stopIds[i])
}
}
}); err != nil {
t.Fatalf("test failed: %v", err)
}
})

t.Run("export by feed version ID", func(t *testing.T) {
reqBody := FeedVersionExportRequest{
FeedVersionKeys: []string{fmt.Sprintf("%d", fvidBySha1[caltrainFv])}, // Using ID instead of SHA1
Expand Down Expand Up @@ -314,6 +392,19 @@ func TestFeedVersionExportRequest(t *testing.T) {
assert.Contains(t, rr.Body.String(), "does not allow redistribution", "error message")
})

t.Run("bad request - invalid lexicographic_sort value", func(t *testing.T) {
reqBody := FeedVersionExportRequest{
FeedVersionKeys: []string{caltrainFv},
Transforms: &ExportTransforms{
LexicographicSort: "invalid",
},
}
rr := makeExportRequest(t, reqBody, asAdmin)

assert.Equal(t, 400, rr.Result().StatusCode, "should be bad request")
assert.Contains(t, rr.Body.String(), "invalid lexicographic_sort", "error message")
})

t.Run("bad request - feed version not imported", func(t *testing.T) {
// This test would need a feed version that exists but hasn't been imported
// The test database may not have such a case, so this is a placeholder
Expand Down
109 changes: 109 additions & 0 deletions tlcsv/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,115 @@ func NewZipWriterAdapter(path string) *ZipWriterAdapter {
}
}

// SortCSVFiles sorts all CSV files in the temporary directory lexicographically.
// sortOrder should be "asc" for ascending or "desc" for descending.
// Only sorts .txt files that were actually written to the adapter.
func (adapter *ZipWriterAdapter) SortCSVFiles(sortOrder string) error {
// Collect filenames before closing files
var filenamesToSort []string
for filename := range adapter.DirAdapter.files {
if strings.HasSuffix(filename, ".txt") {
filenamesToSort = append(filenamesToSort, filename)
}
}

// Close all open files first
for filename, f := range adapter.DirAdapter.files {
if err := f.Close(); err != nil {
return fmt.Errorf("failed to close file %s: %w", filename, err)
}
delete(adapter.DirAdapter.files, filename)
}
Comment thread
irees marked this conversation as resolved.
Outdated

// Sort each CSV file that was written
for _, filename := range filenamesToSort {

fullPath := filepath.Join(adapter.DirAdapter.path, filename)

// Read all rows
file, err := os.Open(fullPath)
if err != nil {
return fmt.Errorf("failed to open file %s: %w", filename, err)
}

reader := csv.NewReader(file)
allRows, err := reader.ReadAll()
file.Close()
if err != nil {
return fmt.Errorf("failed to read file %s: %w", filename, err)
}
Comment thread
irees marked this conversation as resolved.
Outdated

if len(allRows) == 0 {
continue
}

// Separate header from data rows
header := allRows[0]
dataRows := allRows[1:]

// Sort data rows lexicographically (by first column, then second, etc.)
if sortOrder == "desc" {
sort.Slice(dataRows, func(i, j int) bool {
return compareRowsLexicographic(dataRows[j], dataRows[i]) < 0
})
} else {
sort.Slice(dataRows, func(i, j int) bool {
return compareRowsLexicographic(dataRows[i], dataRows[j]) < 0
})
}

// Write sorted rows back to file
file, err = os.Create(fullPath)
if err != nil {
return fmt.Errorf("failed to create file %s: %w", filename, err)
}

writer := csv.NewWriter(file)
if err := writer.Write(header); err != nil {
file.Close()
return fmt.Errorf("failed to write header to %s: %w", filename, err)
}
if err := writer.WriteAll(dataRows); err != nil {
file.Close()
return fmt.Errorf("failed to write rows to %s: %w", filename, err)
}
writer.Flush()
if err := writer.Error(); err != nil {
file.Close()
return fmt.Errorf("failed to flush writer for %s: %w", filename, err)
}
file.Close()
Comment thread
irees marked this conversation as resolved.
Outdated
}

return nil
}

// compareRowsLexicographic compares two CSV rows lexicographically.
// Returns -1 if row1 < row2, 0 if row1 == row2, 1 if row1 > row2.
func compareRowsLexicographic(row1, row2 []string) int {
maxLen := len(row1)
if len(row2) > maxLen {
maxLen = len(row2)
}
for i := 0; i < maxLen; i++ {
val1 := ""
val2 := ""
if i < len(row1) {
val1 = row1[i]
}
if i < len(row2) {
val2 = row2[i]
}
if val1 < val2 {
return -1
}
if val1 > val2 {
return 1
}
}
return 0
}

// Close creates a zip archive of all the written files at the specified destination.
func (adapter *ZipWriterAdapter) Close() error {
// Flush any buffered GeoJSON files first
Expand Down
Loading