Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions .github/workflows/eera.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: actions/checkout@v2
with:
repository: ScottishCovidResponse/data_pipeline_api
path: data_pipeline_api

- name: Install Dependencies ( Ubuntu )
run: |
sudo apt-get update
Expand All @@ -39,6 +44,16 @@ jobs:
run : brew update && brew install gsl cppcheck lcov poppler htmldoc graphviz doxygen
if: matrix.os == 'macos-latest'

- name: Data pipeline Dependencies
run : |
sudo apt-get update && sudo apt-get install -y python3-setuptools python3-venv && sudo rm -rf /var/lib/apt/lists/*
python3 -m venv .venv
source .venv/bin/activate
pip install wheel
pip install -r data_pipeline_api/bindings/cpp/requirements.txt
pip install data_pipeline_api
if: matrix.os == 'ubuntu-20.04'

- name: Format Code ( Ubuntu GCC Master )
run: |
git config --local user.email "[email protected]"
Expand All @@ -56,18 +71,33 @@ jobs:
force: false
if: matrix.os == 'ubuntu-20.04' && matrix.config.compiler == 'gcc' && github.ref == 'refs/heads/master'

- name: Data pipeline API Compile
env:
CC: ${{ matrix.config.compiler }}
CXX: ${{ matrix.config.compilerpp }}
run : |
source .venv/bin/activate
cd data_pipeline_api/bindings/cpp
cmake -H. -Bbuild
cmake --build build
if: matrix.os == 'ubuntu-20.04'

- name: Compile
env:
CC: ${{ matrix.config.compiler }}
CXX: ${{ matrix.config.compilerpp }}
run: |
source .venv/bin/activate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the reason why the CI is currently failing for macOS is that you do this source .venv/bin/activate for both the macOS and Ubuntu builds, but the virtual environment has only been set up for Ubuntu in the "Data pipeline Dependencies" step.

I guess you would need to split the compile into two: one for MacOS, and one for Ubuntu, to fix it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a fair point. I'm not familiar enough with MacOS to know quite what to do to fix without needing to go starting looking things up. Hopefully it isn't too difficult.

mkdir build
cd build
cmake .. -DCODE_COVERAGE=ON -DCLANG_TIDY=ON
cmake .. -DCODE_COVERAGE=ON -DCLANG_TIDY=ON -DDATA_PIPELINE=$GITHUB_WORKSPACE/data_pipeline_api
make 2>&1 | tee clang_tidy_build_results.log

- name: Run regression tests
env:
PYTHONPATH: data_pipeline_api
run: |
source .venv/bin/activate
./scripts/RunRegressionTests.sh 1 24
if [ $? -eq 0 ]; then
echo "Regression tests completed successfully"
Expand All @@ -78,8 +108,12 @@ jobs:
fi

- name: Run unit tests
env:
PYTHONPATH: data_pipeline_api
run: |
./build/bin/Covid19EERAModel-unit_tests
source .venv/bin/activate
cd build
./bin/Covid19EERAModel-unit_tests
if [ $? -eq 0 ]; then
echo "Unit tests completed successfully"
exit 0
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ site/*
index.html
src/tclap/
.DS_Store

test/datapipeline/access-*.yaml
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set(PROJECT_NAME Covid19EERAModel)

project(${PROJECT_NAME} VERSION 0.10.0 LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for requiring C++17 support?

My concern would be that only relatively up-to-date compilers support it: people working on older systems may struggle to build this code if we require 17. But if we need a feature from it, then it's okay.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The datapipeline.hh file is currently using std::is_same_v which is a C++17 feature.

set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake_modules)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS "-DROOT_DIR=\\\"${CMAKE_SOURCE_DIR}\\\" -DVERSION=\\\"${PROJECT_VERSION}\\\" ")
Expand Down
77 changes: 73 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ The model requires a number of input files to run, in addition to the command li
| scot_age.csv | Proportion of health board populations in each age group | All |
| scot_data.csv | Timeseries of observed disease cases, by health board | Inference only|
| scot_deaths.csv | Timeseries of observed disease deaths, by health board | Inference only |
| scot_frail.csv | Probability of frailty, by age group | All |
| waifw_home.csv | Age Mixing Matrix (Home)| All |
| waifw_norm.csv | Age Mixing Matrix (All contact included)| All |
| waifw_sdist.csv | Age Mixing Matrix (Social Distancing)| All |
Expand Down Expand Up @@ -178,9 +177,6 @@ CSV file containing the proportion of people in each age group, per health board
#### scot_data.csv, scot_deaths.csv
CSV file containing the timeseries of cases and deaths, per health board. Each row corresponds to a different health board, while ach column is a day in the time series. The first column is the toal population of the health board.

#### scot_frail.csv
CSV file containing the probabilities of frailty for each age group, by health board. Each column is an age group. Each row is a health board, with the exception of the last row, which is for the whole of Scotland.

#### waifw_home.csv, waifw_norm.csv, waifw_sdist.csv
CSV files containing the age mixing matrices for people (1) isolating at home, (2) behaving normally, and (3) socially distancing.

Expand All @@ -193,6 +189,68 @@ Index,p_inf,p_hcw,c_hcw,d,q,p_s,rrd,intro, T_lat, juvp_s, T_inf, T_rec, T_sym, T
```
Each row in the file contains 17 entries: the first is the index of the row; the following 8 are the inferred posterior parameters; and the remaining 8 are model fixed parameters. The row selected for use in the prediction run will be that specified by the index argument on the command line (see Prediction Mode discussion below).

### Input - Data pipeline

The intention with the data pipeline is to obtain relevant input data from a shared remote source and to return any results similarly to a shared remote destination. The workflow involves a distinct download stage of the data before running the model and also an upload step after the model has completed.

The download is carried out using the `pipeline_download` script that is supplied with the [Data Pipeline API]([email protected]:ScottishCovidResponse/data_pipeline_api.git). See instruction in that repository for setting up.

To action the download, a config `.yaml` file must be supplied similar to this:

```
pipeline_download --config <path>/config.yaml
```

For this model, the following elements are expected to be available via the data pipeline download. An example `config.yaml` file is located in `test/datapipeline/config.yaml` in the git repository:

| Local item | Data pipeline |
| ------------- |:-------------:|
| \[Fixed Parameters\], T\_lat | "fixed-parameters/T\_lat", "T\_lat" |
| \[Fixed Parameters\], juvp\_s | "fixed-parameters/juvp\_s", "juvp\_s" |
| \[Fixed Parameters\], T\_inf | "fixed-parameters/T\_inf", "T\_inf" |
| \[Fixed Parameters\], T\_rec | "fixed-parameters/T\_rec", "T\_rec" |
| \[Fixed Parameters\], T\_sym | "fixed-parameters/T\_sym", "T\_sym" |
| \[Fixed Parameters\], T\_hos | "fixed-parameters/T\_hos", "T\_hos" |
| \[Fixed Parameters\], K | "fixed-parameters/K", "K" |
| \[Fixed Parameters\], inf\_asym | "fixed-parameters/inf\_asym", "inf\_asym" |
| \[Fixed Parameters\], totN\_hcw | "fixed-parameters/total\_hcw", "total\_hcw" |
| \[Fixed Parameters\], day\_shut | "fixed-parameters/day\_shut", "day\_shut" |
| \[Priors Settings\], prior\_pinf\_shape1 | "prior-distributions/pinf", "pinf", "alpha" |
| \[Priors Settings\], prior\_pinf\_shape2 | "prior-distributions/pinf", "pinf", "beta" |
| \[Priors Settings\], prior\_phcw\_shape1 | "prior-distributions/phcw", "phcw", "alpha" |
| \[Priors Settings\], prior\_phcw\_shape2 | "prior-distributions/phcw", "phcw", "beta" |
| \[Priors Settings\], prior\_chcw\_mean | "prior-distributions/chcw", "chcw", "lambda" |
| \[Priors Settings\], prior\_d\_shape1 | "prior-distributions/d", "d", "alpha" |
| \[Priors Settings\], prior\_d\_shape2 | "prior-distributions/d", "d", "beta" |
| \[Priors Settings\], prior\_q\_shape1 | "prior-distributions/q", "q", "alpha" |
| \[Priors Settings\], prior\_q\_shape2 | "prior-distributions/q", "q", "beta" |
| \[Priors Settings\], prior\_lambda\_shape1 | "prior-distributions/lambda", "lambda", "a" |
| \[Priors Settings\], prior\_lambda\_shape2 | "prior-distributions/lambda", "lambda", "b" |
| \[Priors Settings\], prior\_ps\_shape1 | "prior-distributions/ps", "ps", "alpha" |
| \[Priors Settings\], prior\_ps\_shape2 | "prior-distributions/ps", "ps", "beta" |
| \[Priors Settings\], prior\_rrd\_shape1 | "prior-distributions/rrd", "rrd", "k" |
| \[Priors Settings\], prior\_rrd\_shape2 | "prior-distributions/rrd", "rrd", "theta" |
| scot\_data.csv | "population-data/data\_for\_scotland", "data" |
| scot\_age.csv | "population-data/data\_for\_scotland", "age" |
| scot\_deaths.csv | "population-data/data\_for\_scotland", "deaths" |
| waifw\_norm.csv | "contact-data/who\_acquired\_infection\_from\_whom", "norm" |
| waifw\_home.csv | "contact-data/who\_acquired\_infection\_from\_whom", "home" |
| waifw\_sdist.csv | "contact-data/who\_acquired\_infection\_from\_whom", "sdist" |
| cfr\_byage.csv | "prob\_hosp\_and\_cfr/data\_for\_scotland", "cfr\_byage" |
| posterior\_parameters.csv | "posterior\_parameters/data\_for\_scotland", "posterior\_parameters" |

Once the data has been successfully downloaded the model may be run as specified above but with the addition of the `-c` option indicating to use the data pipeline for the above elements instead of local files.

This requires visibility of the `data_pipeline_api` for Python. If it has been installed via `pip` or `conda` this will already be the case, if the API has been cloned only then `PYTHONPATH` needs amending for this:
```
$ export PYTHONPATH=<clone path>/data_pipeline_api:$PYTHONPATH
```
The command is then:
```
$ build/bin/Covid19EERAModel -s original -m inference -c <path>/config.yaml
```
Once completed, results should be uploaded, which is TBD.

### Prediction mode
The model can be run in a prediction mode, where a fixed set of parameters is supplied to the model,
and the model is run for a fixed number of simulation steps.
Expand Down Expand Up @@ -268,6 +326,17 @@ The regression test script automatically configures each run in line with the ta

The default option uses local data to perform the run. The addition of a "-d" flag will switch the regression test to use the data pipeline locally stored test data instead.

This requires visibility of the `data_pipeline_api` for Python. If it has been installed via `pip` or `conda` this will already be the case, if the API has been clone only then `PYTHONPATH` needs amending for this:

```
$ export PYTHONPATH=<clone path>/data_pipeline_api:$PYTHONPATH
```
Then run as follows:

```
$ ./scripts/RunRegressionTests 4 9 -d
```

**Note:** The regression tests are an aid to refactoring with confidence: they should not be considered confirmation of the code's correctness. The reference outputs are updated periodically based on changes in the core model logic.

### Unit tests
Expand Down
6 changes: 5 additions & 1 deletion cmake/git_watcher.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ set(_state_variable_names
# >>>
# 1. Add the name of the additional git variable you're interested in monitoring
# to this list.
GIT_REMOTE_ORIGIN_URL
)


Expand Down Expand Up @@ -165,7 +166,10 @@ function(GetGitState _working_dir)
# "execute_process()" command. Be sure to set them in
# the environment using the same variable name you added
# to the "_state_variable_names" list.

RunGitCommand(config --get remote.origin.url)
if(exit_code EQUAL 0)
set(ENV{GIT_REMOTE_ORIGIN_URL} "${output}")
endif()
endfunction()


Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ find_package(GSL REQUIRED)
set(PRE_CONFIGURE_FILE "Git.cpp.in")
set(POST_CONFIGURE_FILE "${CMAKE_CURRENT_BINARY_DIR}/Git.cpp")
include(${CMAKE_SOURCE_DIR}/cmake/git_watcher.cmake)
add_library(git SHARED ${POST_CONFIGURE_FILE})
add_library(git STATIC ${POST_CONFIGURE_FILE})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the change to STATIC?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cmake for libdatapipeline.a is not building with '-fPIC' so needs STATIC here. A better fix might be to add '-fPIC'/SHARED there and keep SHARED here.

target_include_directories(git PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
add_dependencies(git check_git)

Expand All @@ -16,7 +16,7 @@ configure_file(${PRECONFIGURE_DEPENDENCY_FILE} ${POSTCONFIGURE_DEPENDENCY_FILE}
list(APPEND src_files ${POSTCONFIGURE_DEPENDENCY_FILE})

set (PROJECT_LIBS ${PROJECT_NAME}-lib)
add_library(${PROJECT_LIBS} SHARED ${src_files})
add_library(${PROJECT_LIBS} STATIC ${src_files})
target_link_libraries(${PROJECT_LIBS} PUBLIC GSL::gsl GSL::gslcblas)
target_include_directories(${PROJECT_LIBS} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${PROJECT_LIBS} PUBLIC ${TCLAP})
Expand Down
3 changes: 3 additions & 0 deletions src/Git.cpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,7 @@ std::string GitMetadata::CommitDate() {
}
std::string GitMetadata::Tag() {
return "@GIT_TAG@";
}
std::string GitMetadata::URL() {
return "@GIT_REMOTE_ORIGIN_URL@";
}
2 changes: 2 additions & 0 deletions src/Git.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,6 @@ class GitMetadata {
static std::string CommitDate();
// The commit tag
static std::string Tag();
// The Remote Origin URL
static std::string URL();
};
Loading