Skip to content

Commit c734c51

Browse files
committed
Merge branch 'docs'
2 parents 994b008 + d47000c commit c734c51

5 files changed

Lines changed: 116 additions & 11 deletions

File tree

blue/cli.ml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,13 @@ let speclist =
4949
( "-t",
5050
Arg.Set_float _obs_time_delay,
5151
": Set the constant time delay between observations in seconds. Defaults \
52-
to 5.0 seconds." );
52+
to 5.0 seconds.\n" );
5353
( "-f",
5454
Arg.Set_float _acceptable_fraction,
5555
": Set the acceptable fraction of OK responses for a positive reward. \
5656
E.g. for a request rate of 20 RPS (Requests Per Second), give a \
5757
positive reward for response rates > (0.8 * 20), where 0.8 is the \
58-
acceptable fraction. Defaults to '0.8'." );
58+
acceptable fraction. Defaults to '0.8'.\n" );
5959
( "-i",
6060
Arg.Set_float _request_interval,
6161
": Set the client request interval (delay between requests) in seconds. \
@@ -76,12 +76,12 @@ let speclist =
7676
received by the client. Defaults to '/dev/ttyUSB0'. If a filesystem \
7777
address, a serial reader is initialised; if a network address in the \
7878
form '172.0.1.3:8081', a UDP reader is initialised. In the UDP case, \
79-
the IP address should be the IP *used by the sender*." );
79+
the IP address should be the IP *used by the sender*.\n" );
8080
( "-s",
8181
Arg.Set_float _rolling_window_secs,
8282
": Set the length of the rolling window used to evaluate the average OK \
8383
response rate indicated by the data received over the out-of-band \
84-
channel with the client. Defaults to 3.0." );
84+
channel with the client. Defaults to 3.0.\n" );
8585
]
8686

8787
let log_path () =

blue/countBasedPolicy.ml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,21 @@ let infer policy (state, reward) =
5858
Log.write_msg @@ Reward.string_of_state_reward (state, reward);
5959

6060
let config = config_of_state state in
61-
let policy' = push (config, reward) policy in
6261
let goal, _ = most_valuable policy in
63-
let chosen_eff =
62+
let chosen_eff, policy' =
6463
(* Exploration. *)
65-
if policy'.n_steps < Cli.n_exploration_steps () then System.random_eff ()
66-
else if (* Exploitation. *)
67-
config = goal then System.(Wait)
68-
else if config.green = goal.green then System.(ToggleRed)
69-
else System.(ToggleGreen)
64+
if policy.n_steps < Cli.n_exploration_steps () then
65+
let policy' = push (config, reward) policy in
66+
(System.random_eff (), policy')
67+
else
68+
(* Exploitation. *)
69+
let eff =
70+
if config = goal then System.(Wait)
71+
else if config.green = goal.green then System.(ToggleRed)
72+
else System.(ToggleGreen)
73+
in
74+
(* Freeze the policy, [policy' = policy]. *)
75+
(eff, policy)
7076
in
7177

7278
(* Log chosen effect. *)

doc/blue/dune

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
(documentation
22
(package blue))
3+
4+
(install
5+
(section doc)
6+
(files odoc-config.sexp)
7+
(package blue))

doc/blue/index.mld

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,93 @@
11
{0 The [blue] program and library}
2+
[blue] exposes an executable and a supporting library of helper functions. The executable {b initialises and runs an online learning agent} - repeatedly collecting information from the surrounding cyber system, and selecting actions that are executed, causing effects in the surrounding system.
3+
4+
[blue] is a major component of the {{:https://github.com/edchapman88/r3ace}R{^ 3}ACE} project: {e Replicable} and {e Reproducible} {e Real-World} Autonomous Cyber Environments.
5+
6+
[blue] is also an {b implementation of the {{!/markov/page-index}markov} interface library}. The [markov] library defines a {e reusable} abstract software interface for {b online learning agents} that interact with {b real systems}.
7+
8+
[blue] is implemented the way it is, such that the blue agent is faced with a specific decision problem - outlined as the {{:https://github.com/edchapman88/r3ace/#a-simple-decision-problem}Simple Decision Problem} in the R{^ 3}ACE docs.
9+
10+
{1 Usage}
11+
The [blue] executable exposes a documented Command Line Interface (CLI). To learn about the CLI options and arguments, run:
12+
{@bash[
13+
blue --help
14+
]}
15+
16+
The output is as follows:
17+
{@plaintext[
18+
Usage:
19+
blue [-l <log-dir>] [-e <n-exploration-steps>] [-t <obs-time-delay>] [-f <acceptable-fraction>] [-i <request-interval>] [-g <green-ip>] [-r <red-ip>] [-a <response-signal-addr>] [-s <rolling-window-secs>]
20+
21+
Example:
22+
blue -l /home/blue/blue_logs -i 0.05 -g 172.0.0.2 -r 172.0.0.3 -a /dev/ttyUSB0 -s 3.0
23+
24+
Options:
25+
-l : Optionally write a log file in the specified directory with information about the sequence of states observed and actions taken. If no log file is specified, the information is written to stdout.
26+
27+
-e : Set the number of observations used by the policy for exploration, after which actions are selected for exploitation. Defaults to 300.
28+
29+
-t : Set the constant time delay between observations in seconds. Defaults to 5.0 seconds.
30+
31+
-f : Set the acceptable fraction of OK responses for a positive reward. E.g. for a request rate of 20 RPS (Requests Per Second), give a positive reward for response rates > (0.8 * 20), where 0.8 is the acceptable fraction. Defaults to '0.8'.
32+
33+
-i : Set the client request interval (delay between requests) in seconds. Compared with the OK response rate reported by the client to determine the reward. Defaults to 1.0.
34+
35+
-g : Set the IP address of the client (the green host), defaults to '172.0.0.2'.
36+
37+
-r : Set the IP address of the adversary (the red host), defaults to '172.0.0.3'.
38+
39+
-a : Set the address of the out-of-band channel used by the client over which 1's and 0's are sent to indicate successful and failed responses received by the client. Defaults to '/dev/ttyUSB0'. If a filesystem address, a serial reader is initialised; if a network address in the form '172.0.1.3:8081', a UDP reader is initialised. In the UDP case, the IP address should be the IP *used by the sender*.
40+
41+
-s : Set the length of the rolling window used to evaluate the average OK response rate indicated by the data received over the out-of-band channel with the client. Defaults to 3.0.
42+
]}
43+
44+
{1 Installation}
45+
[blue] can be installed locally with:
46+
{[
47+
opam install .
48+
]}
49+
50+
[blue] is also installable as a Nix package (there is a {{:https://github.com/edchapman88/blue/blob/main/flake.nix}[flake.nix]} file at the root of the repository).
51+
52+
{1 Library Overview}
53+
{2 Defining the [Agent] module}
54+
The {{!Blue.MarkovCompressor}[MarkovCompressor]}, {{!Blue.Reward}[Reward]} and {{!Blue.CountBasedPolicy}[CountBasedPolicy]} modules implement the interface defined in the {{!/markov/page-index}markov} library. Together they parameterise the {{!/markov/Markov.Agent}[Markov.Agent]} module.
55+
56+
The source code for the [blue] executable is simply:
57+
{[
58+
open Blue
59+
60+
module Agent = Markov.Agent.Make (MarkovCompressor) (Reward) (CountBasedPolicy)
61+
62+
let () =
63+
Cli.arg_parse ();
64+
Agent.act (Agent.init_policy ())
65+
]}
66+
67+
A [Markov.Agent] module is instatiated, and an [Agent.act] loop commenced.
68+
69+
{2 The {{!Blue.System}[System]} module}
70+
The [System] module defines the particular functions used by the Agent to interact with the surrounding cyber system. It's implementation is {e specific} to the {{:https://github.com/edchapman88/r3ace#a-simple-decision-problem}Simple Decision Problem}, defined in the R{^ 3}ACE project.
71+
72+
{1 Defining, Training and Evaluating a new Agent}
73+
An Agent can be defined with {b any policy that implements the {{!/markov/Markov.Agent.RLPolicyType}[RLPolicyType]} interface} in the {{!/markov/page-index}markov} library.
74+
75+
For example, replacing the executable source code ({{:https://github.com/edchapman88/blue/blob/main/bin/main.ml}/bin/main.ml}) with:
76+
{[
77+
open Blue
78+
79+
module CleverPolicy = struct
80+
(* Your implementation here *) ...
81+
end
82+
83+
module Agent = Markov.Agent.Make (MarkovCompressor) (Reward) (CleverPolicy)
84+
85+
let () =
86+
Cli.arg_parse ();
87+
Agent.act (Agent.init_policy ())
88+
]}
89+
90+
Running this executable on {{:https://github.com/edchapman88/r3ace}R{^ 3}ACE} infrastructure will train or evaluate your [CleverPolicy].
91+
92+
{2 Training vs. Evaluating}
93+
This distinction comes down to whether or not the policy implementation is 'self-optimising' (e.g. the program mutates the policy based on the rewards returned by the reward function). In the case of the {{!Blue.CountBasedPolicy}[Blue.CountBasedPolicy]}, the policy 'trains' for a specific number of policy steps (configured in the [blue] CLI), after which the policy is frozen and the subsequent period could be considered policy evaluation.

doc/blue/odoc-config.sexp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
(libraries markov)
2+
(packages markov)

0 commit comments

Comments
 (0)