Herb-AI
diff --git a/‎src/data/DeepCoder_2016/DeepCoder_2016.jl‎
Lines changed: 95 additions & 0 deletions b/‎src/data/DeepCoder_2016/DeepCoder_2016.jl‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎src/data/DeepCoder_2016/README.md‎
Lines changed: 21 additions & 0 deletions b/‎src/data/DeepCoder_2016/README.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎src/data/DeepCoder_2016/base_grammar.jl‎
Lines changed: 48 additions & 0 deletions b/‎src/data/DeepCoder_2016/base_grammar.jl‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎src/data/DeepCoder_2016/citation.bib‎
Lines changed: 8 additions & 0 deletions b/‎src/data/DeepCoder_2016/citation.bib‎
Lines changed: 8 additions & 0 deletions
@@ -0,0 +1,95 @@
+module DeepCoder_2016
+
+using HerbCore
+using HerbSpecification
+using HerbGrammar
+
+using JSON
+
+include("data.jl")
+include("base_grammar.jl")
+include("grammars.jl")
+
+include("list_functions.jl")
+
+export 
+    parse_deepcoder_problem_and_grammar
+    base_grammar_deepcoder
+
+"""
+    parse_deepcoder_problem(filename::AbstractString, base_grammar::AbstractGrammar)::Problem
+    Parses a DeepCoder problem from a file given a base grammar.
+"""
+function parse_deepcoder_problem_and_grammar(filename::AbstractString,
+                                             base_grammar::AbstractGrammar)
+    raw = JSON.parsefile(filename)
+
+    examples = IOExample[]
+    for ex in raw["examples"]
+        args = split_inputs(ex["input"])
+        out  = normalize_value(ex["output"])
+        push!(examples, IOExample(args, out))
+    end
+
+    number = match(r"\d+", raw["name"])
+    number === nothing && error("Could not extract problem number from: $filename")
+    problem_name = "problem_" * lpad(number.match, 3, '0')
+    problem = Problem(problem_name, examples)
+
+    # infer from first example (DeepCoder tasks are consistent)
+    sig = infer_signature(examples[1].in)
+    start_nt = infer_output_nt(examples[1].out)
+
+    # combine base + extras
+    g = deepcopy(base_grammar)
+    add_extras!(g, sig, start_nt)
+
+    return problem, g
+end
+
+function split_inputs(raw_in)::Dict{Symbol,Any}
+    @assert raw_in isa Vector "DeepCoder 'input' must be an array"
+    n = length(raw_in)
+    @assert 1 <= n <= 2 "Expected 1 or 2 inputs, got $n"
+    
+    tojl(v) = v isa Vector ? map(Int, v) : Int(v)
+    
+    args = Dict{Symbol,Any}()
+    args[:_arg_1] = tojl(raw_in[1])
+    if n == 2
+        args[:_arg_2] = tojl(raw_in[2])
+    end
+    return args
+end
+
+function infer_signature(args::Dict{Symbol,Any})::Dict{Symbol,Symbol}
+    sig = Dict{Symbol,Symbol}()
+    for (k, v) in args
+        if v isa AbstractVector{<:Integer}
+            sig[k] = :ExprArr
+        elseif v isa Integer
+            sig[k] = :ExprNum
+        else
+            error("Unsupported input type for $(k): $(typeof(v))")
+        end
+    end
+    sig
+end
+
+function add_extras!(g::AbstractGrammar, sig::Dict{Symbol,Symbol}, start_nt::String)
+    add_rule!(g, make_sym_rule(:Start, start_nt))
+    for (arg, nt) in sig
+        add_rule!(g, make_sym_rule(nt, arg))
+    end
+    g
+end
+
+infer_output_nt(out)::String =  out isa AbstractVector{<:Any} ? "ExprArr" :
+                                out isa Integer                     ? "ExprNum" :
+                                error("Unsupported output type: $(typeof(out)): $out")
+                                
+normalize_value(x) = x isa Vector ? map(v -> Int(v), x) : Int(x)
+
+make_sym_rule(lhs::Symbol, rhs::Symbol)::Expr = Expr(:(=), lhs, rhs)
+
+end # module DeepCoder_2016
@@ -0,0 +1,21 @@
+# DeepCoder Benchmark
+
+The DeepCoder specializes in functional programs that manipulate lists. 
+Each problem is written as a set of input-output examples.
+
+The DeepCoder benchmark is derived from Balog et al. (2016) using the setup from Neo (Feng et al., 2018), as the evaluation benchmarks are not publicly available.
+Neo thus generated 100 benchmarks following this workflow:
+
+> We enumerate DSL programs with
+> at least 5 components and randomly generate inputs and the
+> corresponding output. This procedure is repeated for a fixed
+> number of times until we either obtain 5 valid input-output
+> examples or no examples have been found within the iter-
+> ation limit. In the latter case, we restart this process and
+> randomly search for a different program.
+
+See
+> Balog, M., Gaunt, A. L., Brockschmidt, M., Nowozin, S., & Tarlow, D. (2016). Deepcoder: Learning to write programs. arXiv preprint arXiv:1611.01989.
+and
+> Feng, Y., Martins, R., Bastani, O., & Dillig, I. (2018). Program synthesis using conflict-driven learning. ACM SIGPLAN Notices, 53(4), 420-435.
+
@@ -0,0 +1,48 @@
+base_grammar_deepcoder = @csgrammar begin
+    Int = |(-3:3)
+
+    ExprNum = Int
+
+    ExprNum = maximum(ExprArr)              := (length(x1) > 1, maximum(y) == maximum(x1), minimum(y) > minimum(x1))
+    ExprNum = minimum(ExprArr)              := (length(x1) > 1, maximum(y) < maximum(x1), minimum(y) == minimum(x1))
+    ExprNum = sum(ExprArr)                  := (length(x1) > 1)
+    ExprNum = first(ExprArr)                := (length(x1) > 1, maximum(y) <= maximum(x1), minimum(y) >= minimum(x1), first(y) == first(x1), last(y) == first(x1))
+    ExprNum = last(ExprArr)                 := (length(x1) > 1, maximum(y) <= maximum(x1), minimum(y) >= minimum(x1), first(y) == last(x1), last(y) == last(x1))
+    ExprNum = getindex(ExprArr, ExprNum)    := (length(x1) > 1, maximum(y) <= maximum(x1), minimum(y) >= minimum(x1), first(x2) > 0, length(x1) > first(x2))
+
+    ExprNum = countSt(ExprArr, Int)         := (length(x1) > 1, last(y) <= length(x1), last(y) >= 0)
+    ExprNum = countGt(ExprArr, Int)         := (length(x1) > 1, last(y) <= length(x1), last(y) >= 0)
+    ExprNum = countEq(ExprArr, Int)         := (length(x1) > 1, last(y) <= length(x1), last(y) >= 0)
+    ExprNum = countNeq(ExprArr, Int)        := (length(x1) > 1, last(y) <= length(x1), last(y) >= 0)
+    ExprNum = countMod(ExprArr, Int)        := (length(x1) > 1, last(y) <= length(x1), last(y) >= 0)
+    ExprNum = countNmod(ExprArr, Int)       := (length(x1) > 1, last(y) <= length(x1), last(y) >= 0)
+
+    ExprArr = drop(ExprArr, ExprNum)        := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1), last(y) == last(x1), first(x2) > 0, length(x1) > maximum(x2))
+    ExprArr = take(ExprArr, ExprNum)        := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1), first(y) == first(x1), first(x2) > 0, length(x1) > maximum(x2))
+    ExprArr = sort(ExprArr)                 := (length(y) == length(x1), maximum(y) == maximum(x1), minimum(y) == minimum(x1), first(y) == minimum(x1), last(y) == maximum(x1))
+    ExprArr = reverse(ExprArr)              := (length(y) == length(x1), maximum(y) == maximum(x1), minimum(y) == minimum(x1), first(y) == last(x1), last(y) == first(x1))
+
+    ExprArr = filterSt(ExprArr, Int)        := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1))
+    ExprArr = filterGt(ExprArr, Int)        := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1))
+    ExprArr = filterEq(ExprArr, Int)        := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1))
+    ExprArr = filterNeq(ExprArr, Int)       := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1))
+    ExprArr = filterMod(ExprArr, Int)       := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1))
+    ExprArr = filterNmod(ExprArr, Int)      := (length(y) < length(x1), maximum(y) <= maximum(x1), minimum(y) >= minimum(x1))
+
+    ExprArr = mapPlus(ExprArr, Int)         := (length(y) == length(x1))
+    ExprArr = mapMult(ExprArr, Int)         := (length(y) == length(x1))
+    ExprArr = mapDiv(ExprArr, Int)          := (length(y) == length(x1))
+    ExprArr = mapPow(ExprArr, Int)          := (length(y) == length(x1))
+
+    ExprArr = zipwithMax(ExprArr, ExprArr)      := (length(y) == length(x1), length(y) == length(x2))
+    ExprArr = zipwithMin(ExprArr, ExprArr)      := (length(y) == length(x1), length(y) == length(x2))
+    ExprArr = zipwithPlus(ExprArr, ExprArr)     := (length(y) == length(x1), length(y) == length(x2))
+    ExprArr = zipwithMinus(ExprArr, ExprArr)    := (length(y) == length(x1), length(y) == length(x2))
+    ExprArr = zipwithMult(ExprArr, ExprArr)     := (length(y) == length(x1), length(y) == length(x2))
+
+    ExprArr = scanl1Plus(ExprArr)           := (length(y) == length(x1), first(y) == first(x1))
+    ExprArr = scanl1Minus(ExprArr)          := (length(y) == length(x1), first(y) == first(x1))
+    ExprArr = scanl1Mult(ExprArr)           := (length(y) == length(x1), first(y) == first(x1))
+    ExprArr = scanl1Max(ExprArr)            := (length(y) == length(x1), first(y) == first(x1), maximum(y) == maximum(x1), minimum(y) >= minimum(x1), last(y) == maximum(x1))
+    ExprArr = scanl1Min(ExprArr)            := (length(y) == length(x1), first(y) == first(x1), maximum(y) <= maximum(x1), minimum(y) == minimum(x1), last(y) == minimum(x1))
+end
@@ -0,0 +1,8 @@
+@article{article,
+author = {Balog, Matej and Gaunt, Alexander and Brockschmidt, Marc and Nowozin, Sebastian and Tarlow, Daniel},
+year = {2016},
+month = {11},
+pages = {},
+title = {DeepCoder: Learning to Write Programs},
+doi = {10.48550/arXiv.1611.01989}
+}