Skip to content

Commit ca36ec7

Browse files
committed
Added support for CLDF sources in stats cmd
1 parent 00e00ee commit ca36ec7

7 files changed

Lines changed: 78 additions & 15 deletions

File tree

cldf/dataset.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"fmt"
66
"gocldf/internal/jsonutil"
7+
"gocldf/internal/pathutil"
78
"path/filepath"
89
"slices"
910
"strings"
@@ -75,6 +76,17 @@ func GetLoadedDataset(mdPath string, noChecks bool) (ds *Dataset, err error) {
7576
return ds, nil
7677
}
7778

79+
func (dataset *Dataset) TablePath(tbl *Table) (string, error) {
80+
res := filepath.Join(filepath.Dir(dataset.MetadataPath), tbl.Url)
81+
if !pathutil.PathExists(res) {
82+
res += ".zip"
83+
}
84+
if !pathutil.PathExists(res) {
85+
return "", errors.New("table path does not exist")
86+
}
87+
return res, nil
88+
}
89+
7890
func (dataset *Dataset) LoadData(noChecks bool) error {
7991
results := make(chan TableRead, len(dataset.Tables))
8092
for _, tbl := range dataset.Tables {

cldf/sources.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ func NewSource(entry *bibtex.BibEntry) *Source {
3333
}
3434

3535
type Sources struct {
36+
Path string
3637
Items []*Source
3738
FieldNames []string
3839
}
@@ -61,7 +62,7 @@ func normalizeBibtex(r io.Reader) (io.Reader, error) {
6162
}
6263

6364
func NewSources(p string) (sources *Sources, err error) {
64-
f, err := pathutil.Reader(p)
65+
pp, f, err := pathutil.Reader(p)
6566
if err != nil {
6667
return nil, err
6768
}
@@ -90,8 +91,7 @@ func NewSources(p string) (sources *Sources, err error) {
9091
}
9192
}
9293
}
93-
return &Sources{Items: res, FieldNames: fields}, nil
94-
94+
return &Sources{Path: pp, Items: res, FieldNames: fields}, nil
9595
}
9696

9797
func (s *Sources) SqlCreate() string {

cldf/table.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ func (tbl *Table) Read(dir string, dialect *Dialect, noChecks bool, ch chan<- Ta
159159
rows [][]string
160160
err error
161161
)
162-
r, err := pathutil.Reader(fp)
162+
_, r, err := pathutil.Reader(fp)
163163
if err != nil {
164164
ch <- TableRead{tbl.Url, err}
165165
return

cmd/stats.go

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import (
44
"encoding/json"
55
"fmt"
66
"gocldf/cldf"
7+
"gocldf/internal/pathutil"
78
"io"
9+
"path/filepath"
810
"text/tabwriter"
911

1012
"github.com/spf13/cobra"
@@ -34,15 +36,42 @@ func stats(out io.Writer, mdPath string, withMetadata bool) error {
3436
}
3537
w := tabwriter.NewWriter(out, 0, 0, 1, ' ', tabwriter.Debug)
3638
// noinspection GoUnhandledErrorResultInspection
37-
fmt.Fprintf(w, "%v\t%v\t%v\t%v\n", "Filename", "Component", "Rows", "FKs")
39+
fmt.Fprintf(w, "%v\t%v\t%v\t%v\n", "Filename", "Component", "Rows", "Size")
3840
// noinspection GoUnhandledErrorResultInspection
39-
fmt.Fprintf(w, "%v\t%v\t%v\t%v\n", "--------", "---------", "----", "---")
41+
fmt.Fprintf(w, "%v\t%v\t%v\t%v\n", "--------", "---------", "--------", "----------")
4042
for _, table := range ds.Tables {
43+
path, err := ds.TablePath(table)
44+
if err != nil {
45+
return err
46+
}
47+
size, err := pathutil.GetFormattedSize(path)
48+
if err != nil {
49+
return err
50+
}
4151
cname := ""
4252
if table.Comp != "" {
4353
cname = table.CanonicalName
4454
}
45-
fmt.Fprintf(w, "%v\t%v\t%v\t%v\n", table.Url, cname, len(table.Data), len(table.ForeignKeys))
55+
fmt.Fprintf(
56+
w,
57+
"%v\t%v\t%v\t%v\n",
58+
filepath.Base(path),
59+
cname,
60+
fmt.Sprintf("%8s", fmt.Sprintf("%v", len(table.Data))),
61+
fmt.Sprintf("%10s", size))
62+
}
63+
if ds.Sources != nil {
64+
size, err := pathutil.GetFormattedSize(ds.Sources.Path)
65+
if err != nil {
66+
return err
67+
}
68+
fmt.Fprintf(
69+
w,
70+
"%v\t%v\t%v\t%v\n",
71+
filepath.Base(ds.Sources.Path),
72+
"SourceTable",
73+
fmt.Sprintf("%8s", fmt.Sprintf("%v", len(ds.Sources.Items))),
74+
fmt.Sprintf("%10s", size))
4675
}
4776
// noinspection GoUnhandledErrorResultInspection
4877
w.Flush()

internal/pathutil/path.go

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,29 @@ import (
44
"archive/zip"
55
"bytes"
66
"errors"
7+
"fmt"
78
"io"
89
"os"
910
)
1011

12+
func GetFormattedSize(path string) (string, error) {
13+
info, err := os.Stat(path)
14+
if err != nil {
15+
return "", err
16+
}
17+
// Size() returns the size in bytes as an int64
18+
units := []string{"bytes", "KB", "MB", "GB"}
19+
size := float64(info.Size())
20+
21+
for _, unit := range units {
22+
if size < 1024.0 && size > -1024.0 {
23+
return fmt.Sprintf("%.1f%v", size, unit), nil
24+
}
25+
size = size / 1024.0
26+
}
27+
return fmt.Sprintf("%g%v", size, "TB"), nil
28+
}
29+
1130
func PathExists(path string) bool {
1231
_, err := os.Stat(path)
1332
if err == nil {
@@ -75,17 +94,17 @@ Usage:
7594
}
7695
}(reader)
7796
*/
78-
func Reader(p string) (r any, err error) {
97+
func Reader(p string) (pp string, r any, err error) {
7998
if !PathExists(p) {
8099
zippedBytes, err := readZipped(p + ".zip")
81100
if err != nil {
82-
return nil, err
101+
return "", nil, err
83102
}
84-
return bytes.NewReader(zippedBytes), nil
103+
return p + ".zip", bytes.NewReader(zippedBytes), nil
85104
}
86105
file, err := os.Open(p)
87106
if err != nil {
88-
return nil, err
107+
return "", nil, err
89108
}
90-
return file, nil
109+
return p, file, nil
91110
}

internal/pathutil/path_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
)
88

99
func read(fname string) (string, error) {
10-
r, err := Reader(filepath.Join("testdata", fname))
10+
_, r, err := Reader(filepath.Join("testdata", fname))
1111
if err != nil {
1212
return "", err
1313
}

test/test_regression.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import pathlib
88
import subprocess
99

10+
from clldutils.misc import format_size
11+
1012
PROJECTS = pathlib.Path(os.path.expanduser('~')) / "projects"
1113
DATASETS = [
1214
"grambank/grambank-cldf/cldf/StructureDataset-metadata.json",
@@ -28,11 +30,12 @@ def run():
2830
shutil.copy(wd / "gocldf", bin)
2931
assert bin.exists()
3032
for ds in DATASETS:
33+
out = temp / "db.sqlite"
3134
s = time.time()
3235
print("{} ...".format(ds))
33-
res = subprocess.check_output([str(bin), "createdb", str(PROJECTS / ds), "db.sqlite", "-f"])
36+
res = subprocess.check_output([str(bin), "createdb", str(PROJECTS / ds), str(out), "-f"])
3437
assert "Loaded" in res.decode("utf8")
35-
print("... {:.1f}s".format(time.time()-s))
38+
print("... {:.1f}s\t{}".format(time.time()-s, format_size(out.stat().st_size)))
3639

3740
if __name__ == "__main__":
3841
run()

0 commit comments

Comments
 (0)