Skip to content

Commit 8530a05

Browse files
authored
Merge pull request #11 from loleg/master
Remote loading of data packages
2 parents 14f6c81 + f94a09f commit 8530a05

File tree

9 files changed

+94
-16
lines changed

9 files changed

+94
-16
lines changed

README.md

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,25 @@ Initially this package is primarily targeted and depends on [TableSchema.jl](htt
2323

2424
:construction: This package is pre-release and under heavy development. Please visit the [issues page](https://github.com/frictionlessdata/datapackage-jl/issues) to contribute and make suggestions. For questions that need to a real time response, reach out via [Gitter](https://gitter.im/frictionlessdata/chat). Thanks! :construction:
2525

26+
Please visit [our wiki](https://github.com/frictionlessdata/datapackage-jl/wiki) for a list of related projects that we are tracking, and suggest use cases there or as enhancement [issues](https://github.com/frictionlessdata/datapackage-jl/issues).
27+
2628
# Usage
2729

28-
Please visit [our wiki](https://github.com/frictionlessdata/datapackage-jl/wiki) for a list of related projects that we are tracking, and suggest use cases there or as enhancement [issues](https://github.com/frictionlessdata/datapackage-jl/issues).
30+
Install *tableschema-jl* (temporarily obtained from GitHub until released):
31+
32+
`$ julia -e 'Pkg.clone("https://github.com/frictionlessdata/tableschema-jl")'`
33+
34+
You may also need to explicitly install *HTTP*:
35+
36+
`$ julia -e 'Pkg.add("HTTP")'`
37+
38+
Update to the latest version with:
39+
40+
`$ julia -e 'Pkg.update("TableSchema")'`
2941

30-
Install *tableschema-jl* first:
42+
Clone this repository and enter it:
3143

32-
`julia -e 'Pkg.clone("https://github.com/loleg/TableSchema.jl")'`
44+
`$ git clone https://github.com/frictionlessdata/datapackage-jl && cd datapackage-jl`
3345

3446
See *examples* folder and unit tests in [runtests.jl](test/runtests.jl) for current usage, e.g.:
3547

REQUIRE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
julia 0.6
2+
HTTP

examples/datahub.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
include("../src/DataPackage.jl")
2+
using DataPackage
3+
import DataPackage: Package, read
4+
5+
data_url = "https://datahub.io/core/pharmaceutical-drug-spending/datapackage.json"
6+
7+
# to load Data Package into storage
8+
package = Package(data_url)
9+
10+
# to load only tabular data
11+
resources = package.resources
12+
for resource in resources
13+
if resource.profile == "tabular-data-resource"
14+
data = read(resource)
15+
println(data)
16+
end
17+
end

examples/infer.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import DataPackage: add_resource
77

88
# Initialise a resource from a CSV file
99
# Source: https://opendata.swiss/en/dataset/kennzahlen-der-schweizer-pflegeheime
10-
res = Resource("data/health/hopitaux_ch_2016_Sample_fr.csv", false, "hopitaux")
10+
res = Resource("data/health/hopitaux_ch_2016_Sample_fr.csv", strict=false, name="hopitaux")
1111

1212
# Add this resource to a blank data package
1313
p = Package()

examples/remote.jl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
using TableSchema
2+
import TableSchema: read, validate
3+
4+
include("../src/DataPackage.jl")
5+
using DataPackage
6+
import DataPackage: Resource, add_resource, get_table, read
7+
8+
# Initialise a resource from a remote file
9+
REMOTE_URL = "https://raw.githubusercontent.com/frictionlessdata/tableschema-jl/master/data/data_simple.csv"
10+
res = Resource(REMOTE_URL, strict=false, name="sample")
11+
12+
# Add this resource to a blank data package
13+
p = Package()
14+
add_resource(p, res)
15+
16+
# Read the data from the table
17+
data = read(res)
18+
println( "The number of cells is ", length(data[:,1]) )
19+
println( "Column 1 is called ", res.schema.fields[1].name )
20+
println( join([ row for row in data[:,1] ], ", ", " and ") )
21+
22+
# Validate the table
23+
table = get_table(res)
24+
if validate(table); println("The table is valid according to the Schema"); end

src/DataPackage.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ using JSON
1717

1818
using TableSchema
1919
import TableSchema: Schema
20+
import TableSchema: read, infer, validate, is_empty
21+
22+
import HTTP: request
2023

2124
include("exceptions.jl")
2225
include("resource.jl")

src/package.jl

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,33 @@ mutable struct Package
1313
resources = []
1414
if haskey(d, "resources")
1515
for r in d["resources"]
16-
t = Resource(r, strict)
17-
push!(resources, t)
16+
if !isempty(r)
17+
t = Resource(r, strict=strict)
18+
push!(resources, t)
19+
end
1820
end
1921
end
2022
new(d, strict, [], resources)
2123
end
2224

2325
Package(filename::String, strict::Bool=false) =
24-
Package(JSON.parsefile(filename), strict)
26+
Package(fetch_json(filename), strict)
2527

2628
Package(strict::Bool=false) =
2729
Package(Dict(), strict)
2830
end
2931

32+
function fetch_json(filename::String)
33+
if match(r"^https?://", filename) !== nothing
34+
req = request("GET", filename)
35+
j = JSON.parse(req.body)
36+
else
37+
j = JSON.parsefile(filename)
38+
end
39+
isempty(j) && println("JSON could not be loaded")
40+
j
41+
end
42+
3043
add_resource(p::Package, r::Resource) =
3144
push!(p.resources, r)
3245

src/resource.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,22 @@ mutable struct Resource
1313
schema::Schema
1414
errors::Array{PackageError}
1515

16-
function Resource(d::Dict, strict::Bool=false)
16+
function Resource(d::Dict ; strict::Bool=false)
1717
schema = haskey(d, "schema") ?
18-
Schema(d["schema"], strict) : nothing
18+
Schema(d["schema"], strict) : Schema()
1919
name = haskey(d, "name") ?
2020
d["name"] : nothing
2121
path = haskey(d, "path") ?
2222
d["path"] : nothing
2323
profile = haskey(d, "profile") ?
2424
d["profile"] : nothing
2525
dialect = haskey(d, "dialect") ?
26-
d["dialect"] : nothing
26+
d["dialect"] : Dict()
2727

2828
new(d, strict, name, path, profile, dialect, schema, [])
2929
end
3030

31-
function Resource(path::String, strict::Bool=false, name::String=nothing)
31+
function Resource(path::String ; strict::Bool=false, name::String=nothing)
3232
name = isempty(name) ? path.split('/')[-1] : name
3333
new(
3434
Dict(), strict, name, path, "tabular-data-resource", Dict(), Schema(), []
@@ -48,14 +48,14 @@ function fields_to_descriptor(s::Schema)
4848
end
4949

5050
function get_table(r::Resource)
51-
if TableSchema.is_empty(r.schema)
51+
if is_empty(r.schema)
5252
s = Schema()
5353
t = Table(r.path)
54-
tr = TableSchema.read(t, cast=false)
55-
TableSchema.infer(s, tr, t.headers)
54+
tr = read(t, cast=false)
55+
infer(s, tr, t.headers)
5656
s.errors = []
5757
s.descriptor = fields_to_descriptor(s)
58-
TableSchema.validate(s, r.strict)
58+
validate(s, r.strict)
5959
r.schema = t.schema = s
6060
t
6161
else
@@ -66,7 +66,7 @@ end
6666
function read(r::Resource)
6767
if r.profile == "tabular-data-resource"
6868
t = get_table(r)
69-
TableSchema.read(t, cast=false)
69+
read(t, cast=false)
7070
else
7171
throw(ErrorException("Not supported"))
7272
end

test/read.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,12 @@
1818
@test data[2,1] == "London"
1919
end
2020

21+
@testset "Read remote data package" begin
22+
p = Package("https://raw.githubusercontent.com/frictionlessdata/datapackage-jl/master/data/cities/datapackage.json")
23+
r = get_resource(p, "cities")
24+
r.path = "https://raw.githubusercontent.com/frictionlessdata/datapackage-jl/master/data/cities/cities.csv"
25+
data = read(r)
26+
@test data[2,1] == "London"
27+
end
28+
2129
end

0 commit comments

Comments
 (0)