-
Notifications
You must be signed in to change notification settings - Fork 0
Andy bennett/684 continue adding datapipeline #91
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev-pipeline
Are you sure you want to change the base?
Changes from all commits
260a261
5af4aba
b64fe23
7c846b3
fa85135
57723cf
eda1a28
d5adf3a
d47fe72
8570e4c
93e66b1
ab90abe
4f509e3
5711255
a161257
3e0a7da
d5cb3db
186e789
b9434e1
23f9df8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,11 @@ jobs: | |
| steps: | ||
| - uses: actions/checkout@v2 | ||
|
|
||
| - uses: actions/checkout@v2 | ||
| with: | ||
| repository: ScottishCovidResponse/data_pipeline_api | ||
| path: data_pipeline_api | ||
|
|
||
| - name: Install Dependencies ( Ubuntu ) | ||
| run: | | ||
| sudo apt-get update | ||
|
|
@@ -39,6 +44,16 @@ jobs: | |
| run : brew update && brew install gsl cppcheck lcov poppler htmldoc graphviz doxygen | ||
| if: matrix.os == 'macos-latest' | ||
|
|
||
| - name: Data pipeline Dependencies | ||
| run : | | ||
| sudo apt-get update && sudo apt-get install -y python3-setuptools python3-venv && sudo rm -rf /var/lib/apt/lists/* | ||
| python3 -m venv .venv | ||
| source .venv/bin/activate | ||
| pip install wheel | ||
| pip install -r data_pipeline_api/bindings/cpp/requirements.txt | ||
| pip install data_pipeline_api | ||
| if: matrix.os == 'ubuntu-20.04' | ||
|
|
||
| - name: Format Code ( Ubuntu GCC Master ) | ||
| run: | | ||
| git config --local user.email "[email protected]" | ||
|
|
@@ -56,18 +71,33 @@ jobs: | |
| force: false | ||
| if: matrix.os == 'ubuntu-20.04' && matrix.config.compiler == 'gcc' && github.ref == 'refs/heads/master' | ||
|
|
||
| - name: Data pipeline API Compile | ||
| env: | ||
| CC: ${{ matrix.config.compiler }} | ||
| CXX: ${{ matrix.config.compilerpp }} | ||
| run : | | ||
| source .venv/bin/activate | ||
| cd data_pipeline_api/bindings/cpp | ||
| cmake -H. -Bbuild | ||
| cmake --build build | ||
| if: matrix.os == 'ubuntu-20.04' | ||
|
|
||
| - name: Compile | ||
| env: | ||
| CC: ${{ matrix.config.compiler }} | ||
| CXX: ${{ matrix.config.compilerpp }} | ||
| run: | | ||
| source .venv/bin/activate | ||
| mkdir build | ||
| cd build | ||
| cmake .. -DCODE_COVERAGE=ON -DCLANG_TIDY=ON | ||
| cmake .. -DCODE_COVERAGE=ON -DCLANG_TIDY=ON -DDATA_PIPELINE=$GITHUB_WORKSPACE/data_pipeline_api | ||
| make 2>&1 | tee clang_tidy_build_results.log | ||
|
|
||
| - name: Run regression tests | ||
| env: | ||
| PYTHONPATH: data_pipeline_api | ||
| run: | | ||
| source .venv/bin/activate | ||
| ./scripts/RunRegressionTests.sh 1 24 | ||
| if [ $? -eq 0 ]; then | ||
| echo "Regression tests completed successfully" | ||
|
|
@@ -78,8 +108,12 @@ jobs: | |
| fi | ||
|
|
||
| - name: Run unit tests | ||
| env: | ||
| PYTHONPATH: data_pipeline_api | ||
| run: | | ||
| ./build/bin/Covid19EERAModel-unit_tests | ||
| source .venv/bin/activate | ||
| cd build | ||
| ./bin/Covid19EERAModel-unit_tests | ||
| if [ $? -eq 0 ]; then | ||
| echo "Unit tests completed successfully" | ||
| exit 0 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,4 +10,4 @@ site/* | |
| index.html | ||
| src/tclap/ | ||
| .DS_Store | ||
|
|
||
| test/datapipeline/access-*.yaml | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ set(PROJECT_NAME Covid19EERAModel) | |
|
|
||
| project(${PROJECT_NAME} VERSION 0.10.0 LANGUAGES CXX) | ||
|
|
||
| set(CMAKE_CXX_STANDARD 11) | ||
| set(CMAKE_CXX_STANDARD 17) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for requiring C++17 support? My concern would be that only relatively up-to-date compilers support it: people working on older systems may struggle to build this code if we require 17. But if we need a feature from it, then it's okay.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The datapipeline.hh file is currently using std::is_same_v which is a C++17 feature. |
||
| set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake_modules) | ||
| set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
| set(CMAKE_CXX_FLAGS "-DROOT_DIR=\\\"${CMAKE_SOURCE_DIR}\\\" -DVERSION=\\\"${PROJECT_VERSION}\\\" ") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,7 +110,6 @@ The model requires a number of input files to run, in addition to the command li | |
| | scot_age.csv | Proportion of health board populations in each age group | All | | ||
| | scot_data.csv | Timeseries of observed disease cases, by health board | Inference only| | ||
| | scot_deaths.csv | Timeseries of observed disease deaths, by health board | Inference only | | ||
| | scot_frail.csv | Probability of frailty, by age group | All | | ||
| | waifw_home.csv | Age Mixing Matrix (Home)| All | | ||
| | waifw_norm.csv | Age Mixing Matrix (All contact included)| All | | ||
| | waifw_sdist.csv | Age Mixing Matrix (Social Distancing)| All | | ||
|
|
@@ -178,9 +177,6 @@ CSV file containing the proportion of people in each age group, per health board | |
| #### scot_data.csv, scot_deaths.csv | ||
| CSV file containing the timeseries of cases and deaths, per health board. Each row corresponds to a different health board, while ach column is a day in the time series. The first column is the toal population of the health board. | ||
|
|
||
| #### scot_frail.csv | ||
| CSV file containing the probabilities of frailty for each age group, by health board. Each column is an age group. Each row is a health board, with the exception of the last row, which is for the whole of Scotland. | ||
|
|
||
| #### waifw_home.csv, waifw_norm.csv, waifw_sdist.csv | ||
| CSV files containing the age mixing matrices for people (1) isolating at home, (2) behaving normally, and (3) socially distancing. | ||
|
|
||
|
|
@@ -193,6 +189,68 @@ Index,p_inf,p_hcw,c_hcw,d,q,p_s,rrd,intro, T_lat, juvp_s, T_inf, T_rec, T_sym, T | |
| ``` | ||
| Each row in the file contains 17 entries: the first is the index of the row; the following 8 are the inferred posterior parameters; and the remaining 8 are model fixed parameters. The row selected for use in the prediction run will be that specified by the index argument on the command line (see Prediction Mode discussion below). | ||
|
|
||
| ### Input - Data pipeline | ||
|
|
||
| The intention with the data pipeline is to obtain relevant input data from a shared remote source and to return any results similarly to a shared remote destination. The workflow involves a distinct download stage of the data before running the model and also an upload step after the model has completed. | ||
|
|
||
| The download is carried out using the `pipeline_download` script that is supplied with the [Data Pipeline API]([email protected]:ScottishCovidResponse/data_pipeline_api.git). See instruction in that repository for setting up. | ||
|
|
||
| To action the download, a config `.yaml` file must be supplied similar to this: | ||
|
|
||
| ``` | ||
| pipeline_download --config <path>/config.yaml | ||
| ``` | ||
|
|
||
| For this model, the following elements are expected to be available via the data pipeline download. An example `config.yaml` file is located in `test/datapipeline/config.yaml` in the git repository: | ||
|
|
||
| | Local item | Data pipeline | | ||
| | ------------- |:-------------:| | ||
| | \[Fixed Parameters\], T\_lat | "fixed-parameters/T\_lat", "T\_lat" | | ||
| | \[Fixed Parameters\], juvp\_s | "fixed-parameters/juvp\_s", "juvp\_s" | | ||
| | \[Fixed Parameters\], T\_inf | "fixed-parameters/T\_inf", "T\_inf" | | ||
| | \[Fixed Parameters\], T\_rec | "fixed-parameters/T\_rec", "T\_rec" | | ||
| | \[Fixed Parameters\], T\_sym | "fixed-parameters/T\_sym", "T\_sym" | | ||
| | \[Fixed Parameters\], T\_hos | "fixed-parameters/T\_hos", "T\_hos" | | ||
| | \[Fixed Parameters\], K | "fixed-parameters/K", "K" | | ||
| | \[Fixed Parameters\], inf\_asym | "fixed-parameters/inf\_asym", "inf\_asym" | | ||
| | \[Fixed Parameters\], totN\_hcw | "fixed-parameters/total\_hcw", "total\_hcw" | | ||
| | \[Fixed Parameters\], day\_shut | "fixed-parameters/day\_shut", "day\_shut" | | ||
| | \[Priors Settings\], prior\_pinf\_shape1 | "prior-distributions/pinf", "pinf", "alpha" | | ||
| | \[Priors Settings\], prior\_pinf\_shape2 | "prior-distributions/pinf", "pinf", "beta" | | ||
| | \[Priors Settings\], prior\_phcw\_shape1 | "prior-distributions/phcw", "phcw", "alpha" | | ||
| | \[Priors Settings\], prior\_phcw\_shape2 | "prior-distributions/phcw", "phcw", "beta" | | ||
| | \[Priors Settings\], prior\_chcw\_mean | "prior-distributions/chcw", "chcw", "lambda" | | ||
| | \[Priors Settings\], prior\_d\_shape1 | "prior-distributions/d", "d", "alpha" | | ||
| | \[Priors Settings\], prior\_d\_shape2 | "prior-distributions/d", "d", "beta" | | ||
| | \[Priors Settings\], prior\_q\_shape1 | "prior-distributions/q", "q", "alpha" | | ||
| | \[Priors Settings\], prior\_q\_shape2 | "prior-distributions/q", "q", "beta" | | ||
| | \[Priors Settings\], prior\_lambda\_shape1 | "prior-distributions/lambda", "lambda", "a" | | ||
| | \[Priors Settings\], prior\_lambda\_shape2 | "prior-distributions/lambda", "lambda", "b" | | ||
| | \[Priors Settings\], prior\_ps\_shape1 | "prior-distributions/ps", "ps", "alpha" | | ||
| | \[Priors Settings\], prior\_ps\_shape2 | "prior-distributions/ps", "ps", "beta" | | ||
| | \[Priors Settings\], prior\_rrd\_shape1 | "prior-distributions/rrd", "rrd", "k" | | ||
| | \[Priors Settings\], prior\_rrd\_shape2 | "prior-distributions/rrd", "rrd", "theta" | | ||
| | scot\_data.csv | "population-data/data\_for\_scotland", "data" | | ||
| | scot\_age.csv | "population-data/data\_for\_scotland", "age" | | ||
| | scot\_deaths.csv | "population-data/data\_for\_scotland", "deaths" | | ||
| | waifw\_norm.csv | "contact-data/who\_acquired\_infection\_from\_whom", "norm" | | ||
| | waifw\_home.csv | "contact-data/who\_acquired\_infection\_from\_whom", "home" | | ||
| | waifw\_sdist.csv | "contact-data/who\_acquired\_infection\_from\_whom", "sdist" | | ||
| | cfr\_byage.csv | "prob\_hosp\_and\_cfr/data\_for\_scotland", "cfr\_byage" | | ||
| | posterior\_parameters.csv | "posterior\_parameters/data\_for\_scotland", "posterior\_parameters" | | ||
|
|
||
| Once the data has been successfully downloaded the model may be run as specified above but with the addition of the `-c` option indicating to use the data pipeline for the above elements instead of local files. | ||
|
|
||
| This requires visibility of the `data_pipeline_api` for Python. If it has been installed via `pip` or `conda` this will already be the case, if the API has been cloned only then `PYTHONPATH` needs amending for this: | ||
| ``` | ||
| $ export PYTHONPATH=<clone path>/data_pipeline_api:$PYTHONPATH | ||
| ``` | ||
| The command is then: | ||
| ``` | ||
| $ build/bin/Covid19EERAModel -s original -m inference -c <path>/config.yaml | ||
| ``` | ||
| Once completed, results should be uploaded, which is TBD. | ||
|
|
||
| ### Prediction mode | ||
| The model can be run in a prediction mode, where a fixed set of parameters is supplied to the model, | ||
| and the model is run for a fixed number of simulation steps. | ||
|
|
@@ -268,6 +326,17 @@ The regression test script automatically configures each run in line with the ta | |
|
|
||
| The default option uses local data to perform the run. The addition of a "-d" flag will switch the regression test to use the data pipeline locally stored test data instead. | ||
|
|
||
| This requires visibility of the `data_pipeline_api` for Python. If it has been installed via `pip` or `conda` this will already be the case, if the API has been clone only then `PYTHONPATH` needs amending for this: | ||
|
|
||
| ``` | ||
| $ export PYTHONPATH=<clone path>/data_pipeline_api:$PYTHONPATH | ||
| ``` | ||
| Then run as follows: | ||
|
|
||
| ``` | ||
| $ ./scripts/RunRegressionTests 4 9 -d | ||
| ``` | ||
|
|
||
| **Note:** The regression tests are an aid to refactoring with confidence: they should not be considered confirmation of the code's correctness. The reference outputs are updated periodically based on changes in the core model logic. | ||
|
|
||
| ### Unit tests | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,7 @@ find_package(GSL REQUIRED) | |
| set(PRE_CONFIGURE_FILE "Git.cpp.in") | ||
| set(POST_CONFIGURE_FILE "${CMAKE_CURRENT_BINARY_DIR}/Git.cpp") | ||
| include(${CMAKE_SOURCE_DIR}/cmake/git_watcher.cmake) | ||
| add_library(git SHARED ${POST_CONFIGURE_FILE}) | ||
| add_library(git STATIC ${POST_CONFIGURE_FILE}) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the change to STATIC?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cmake for libdatapipeline.a is not building with '-fPIC' so needs STATIC here. A better fix might be to add '-fPIC'/SHARED there and keep SHARED here. |
||
| target_include_directories(git PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) | ||
| add_dependencies(git check_git) | ||
|
|
||
|
|
@@ -16,7 +16,7 @@ configure_file(${PRECONFIGURE_DEPENDENCY_FILE} ${POSTCONFIGURE_DEPENDENCY_FILE} | |
| list(APPEND src_files ${POSTCONFIGURE_DEPENDENCY_FILE}) | ||
|
|
||
| set (PROJECT_LIBS ${PROJECT_NAME}-lib) | ||
| add_library(${PROJECT_LIBS} SHARED ${src_files}) | ||
| add_library(${PROJECT_LIBS} STATIC ${src_files}) | ||
| target_link_libraries(${PROJECT_LIBS} PUBLIC GSL::gsl GSL::gslcblas) | ||
| target_include_directories(${PROJECT_LIBS} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) | ||
| target_link_libraries(${PROJECT_LIBS} PUBLIC ${TCLAP}) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the reason why the CI is currently failing for macOS is that you do this
source .venv/bin/activatefor both the macOS and Ubuntu builds, but the virtual environment has only been set up for Ubuntu in the "Data pipeline Dependencies" step.I guess you would need to split the compile into two: one for MacOS, and one for Ubuntu, to fix it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a fair point. I'm not familiar enough with MacOS to know quite what to do to fix without needing to go starting looking things up. Hopefully it isn't too difficult.