ScottishCovidResponse · andyb96247 · Aug 10, 2020 · Aug 10, 2020 · Aug 13, 2020 · Aug 13, 2020
diff --git a/.github/workflows/eera.yml b/.github/workflows/eera.yml
@@ -23,6 +23,11 @@ jobs:
     steps:
     - uses: actions/checkout@v2
 
+    - uses: actions/checkout@v2
+      with:
+        repository: ScottishCovidResponse/data_pipeline_api
+        path: data_pipeline_api
+
     - name: Install Dependencies ( Ubuntu )
       run: |
            sudo apt-get update
@@ -39,6 +44,16 @@ jobs:
       run : brew update && brew install gsl cppcheck lcov poppler htmldoc graphviz doxygen
       if: matrix.os == 'macos-latest'
 
+    - name: Data pipeline Dependencies
+      run : |
+            sudo apt-get update && sudo apt-get install -y python3-setuptools python3-venv && sudo rm -rf /var/lib/apt/lists/*
+            python3 -m venv .venv
+            source .venv/bin/activate
+            pip install wheel
+            pip install -r data_pipeline_api/bindings/cpp/requirements.txt
+            pip install data_pipeline_api
+      if: matrix.os == 'ubuntu-20.04'
+
     - name: Format Code ( Ubuntu GCC Master )
       run: |
            git config --local user.email "[email protected]"
@@ -56,18 +71,33 @@ jobs:
         force: false
       if: matrix.os == 'ubuntu-20.04' && matrix.config.compiler == 'gcc' && github.ref == 'refs/heads/master'
 
+    - name: Data pipeline API Compile
+      env:
+              CC: ${{ matrix.config.compiler }}
+              CXX: ${{ matrix.config.compilerpp }}
+      run : |
+            source .venv/bin/activate
+            cd data_pipeline_api/bindings/cpp
+            cmake -H. -Bbuild
+            cmake --build build
+      if: matrix.os == 'ubuntu-20.04'
+
     - name: Compile
       env:
               CC: ${{ matrix.config.compiler }}
               CXX: ${{ matrix.config.compilerpp }}
       run: |
+        source .venv/bin/activate
         mkdir build
         cd build
-        cmake .. -DCODE_COVERAGE=ON -DCLANG_TIDY=ON
+        cmake .. -DCODE_COVERAGE=ON -DCLANG_TIDY=ON -DDATA_PIPELINE=$GITHUB_WORKSPACE/data_pipeline_api
         make 2>&1 | tee clang_tidy_build_results.log
 
     - name: Run regression tests
+      env:
+            PYTHONPATH: data_pipeline_api
       run: |
+        source .venv/bin/activate
         ./scripts/RunRegressionTests.sh 1 24
         if [ $? -eq 0 ]; then
           echo "Regression tests completed successfully"
@@ -78,8 +108,12 @@ jobs:
         fi
 
     - name: Run unit tests
+      env:
+            PYTHONPATH: data_pipeline_api
       run: |
-        ./build/bin/Covid19EERAModel-unit_tests
+        source .venv/bin/activate
+        cd build
+        ./bin/Covid19EERAModel-unit_tests
         if [ $? -eq 0 ]; then
           echo "Unit tests completed successfully"
           exit 0

diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,4 @@ site/*
 index.html
 src/tclap/
 .DS_Store
-
+test/datapipeline/access-*.yaml
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,7 +4,7 @@ set(PROJECT_NAME Covid19EERAModel)
 
 project(${PROJECT_NAME} VERSION 0.10.0 LANGUAGES CXX)
 
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake_modules)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_FLAGS "-DROOT_DIR=\\\"${CMAKE_SOURCE_DIR}\\\" -DVERSION=\\\"${PROJECT_VERSION}\\\" ")

diff --git a/README.md b/README.md
@@ -110,7 +110,6 @@ The model requires a number of input files to run, in addition to the command li
 | scot_age.csv | Proportion of health board populations in each age group      | All |
 | scot_data.csv | Timeseries of observed disease cases, by health board      | Inference only|
 | scot_deaths.csv | Timeseries of observed disease deaths, by health board      | Inference only |
-| scot_frail.csv | Probability of frailty, by age group      | All |
 | waifw_home.csv | Age Mixing Matrix (Home)| All |
 | waifw_norm.csv | Age Mixing Matrix (All contact included)| All |
 | waifw_sdist.csv |  Age Mixing Matrix (Social Distancing)| All |
@@ -178,9 +177,6 @@ CSV file containing the proportion of people in each age group, per health board
 #### scot_data.csv, scot_deaths.csv
 CSV file containing the timeseries of cases and deaths, per health board. Each row corresponds to a different health board, while ach column is a day in the time series. The first column is the toal population of the health board.
 
-#### scot_frail.csv
-CSV file containing the probabilities of frailty for each age group, by health board. Each column is an age group. Each row is a health board, with the exception of the last row, which is for the whole of Scotland.
-
 #### waifw_home.csv, waifw_norm.csv, waifw_sdist.csv
 CSV files containing the age mixing matrices for people (1) isolating at home, (2) behaving normally, and (3) socially distancing. 
 
@@ -193,6 +189,68 @@ Index,p_inf,p_hcw,c_hcw,d,q,p_s,rrd,intro, T_lat, juvp_s, T_inf, T_rec, T_sym, T
 ```
 Each row in the file contains 17 entries: the first is the index of the row; the following 8 are the inferred posterior parameters; and the remaining 8 are model fixed parameters. The row selected for use in the prediction run will be that specified by the index argument on the command line (see Prediction Mode discussion below).
 
+### Input - Data pipeline
+
+The intention with the data pipeline is to obtain relevant input data from a shared remote source and to return any results similarly to a shared remote destination. The workflow involves a distinct download stage of the data before running the model and also an upload step after the model has completed.
+
+The download is carried out using the `pipeline_download` script that is supplied with the [Data Pipeline API]([email protected]:ScottishCovidResponse/data_pipeline_api.git). See instruction in that repository for setting up.
+
+To action the download, a config `.yaml` file must be supplied similar to this:
+
+```
+pipeline_download --config <path>/config.yaml
+```
+
+For this model, the following elements are expected to be available via the data pipeline download. An example `config.yaml` file is located in `test/datapipeline/config.yaml` in the git repository:
+
+| Local item        | Data pipeline           |
+| ------------- |:-------------:|
+| \[Fixed Parameters\], T\_lat | "fixed-parameters/T\_lat", "T\_lat" |
+| \[Fixed Parameters\], juvp\_s | "fixed-parameters/juvp\_s", "juvp\_s" |
+| \[Fixed Parameters\], T\_inf | "fixed-parameters/T\_inf", "T\_inf" |
+| \[Fixed Parameters\], T\_rec | "fixed-parameters/T\_rec", "T\_rec" |
+| \[Fixed Parameters\], T\_sym | "fixed-parameters/T\_sym", "T\_sym" |
+| \[Fixed Parameters\], T\_hos | "fixed-parameters/T\_hos", "T\_hos" |
+| \[Fixed Parameters\], K | "fixed-parameters/K", "K" |
+| \[Fixed Parameters\], inf\_asym | "fixed-parameters/inf\_asym", "inf\_asym" |
+| \[Fixed Parameters\], totN\_hcw   | "fixed-parameters/total\_hcw", "total\_hcw" |
+| \[Fixed Parameters\], day\_shut | "fixed-parameters/day\_shut", "day\_shut" |
+| \[Priors Settings\], prior\_pinf\_shape1 | "prior-distributions/pinf", "pinf", "alpha" |
+| \[Priors Settings\], prior\_pinf\_shape2 | "prior-distributions/pinf", "pinf", "beta" |
+| \[Priors Settings\], prior\_phcw\_shape1 | "prior-distributions/phcw", "phcw", "alpha" |
+| \[Priors Settings\], prior\_phcw\_shape2 | "prior-distributions/phcw", "phcw", "beta" |
+| \[Priors Settings\], prior\_chcw\_mean | "prior-distributions/chcw", "chcw", "lambda" |
+| \[Priors Settings\], prior\_d\_shape1 | "prior-distributions/d", "d", "alpha" |
+| \[Priors Settings\], prior\_d\_shape2 | "prior-distributions/d", "d", "beta" |
+| \[Priors Settings\], prior\_q\_shape1 | "prior-distributions/q", "q", "alpha" |
+| \[Priors Settings\], prior\_q\_shape2 | "prior-distributions/q", "q", "beta" |
+| \[Priors Settings\], prior\_lambda\_shape1 | "prior-distributions/lambda", "lambda", "a" |
+| \[Priors Settings\], prior\_lambda\_shape2 | "prior-distributions/lambda", "lambda", "b" |
+| \[Priors Settings\], prior\_ps\_shape1 | "prior-distributions/ps", "ps", "alpha" |
+| \[Priors Settings\], prior\_ps\_shape2 | "prior-distributions/ps", "ps", "beta" |
+| \[Priors Settings\], prior\_rrd\_shape1 | "prior-distributions/rrd", "rrd", "k" |
+| \[Priors Settings\], prior\_rrd\_shape2 | "prior-distributions/rrd", "rrd", "theta" |
+| scot\_data.csv       | "population-data/data\_for\_scotland", "data" |
+| scot\_age.csv        | "population-data/data\_for\_scotland", "age" |
+| scot\_deaths.csv     | "population-data/data\_for\_scotland", "deaths" |
+| waifw\_norm.csv      | "contact-data/who\_acquired\_infection\_from\_whom", "norm" |
+| waifw\_home.csv      | "contact-data/who\_acquired\_infection\_from\_whom", "home" |
+| waifw\_sdist.csv     | "contact-data/who\_acquired\_infection\_from\_whom", "sdist" |
+| cfr\_byage.csv       | "prob\_hosp\_and\_cfr/data\_for\_scotland", "cfr\_byage" |
+| posterior\_parameters.csv | "posterior\_parameters/data\_for\_scotland", "posterior\_parameters" |
+
+Once the data has been successfully downloaded the model may be run as specified above but with the addition of the `-c` option indicating to use the data pipeline for the above elements instead of local files.
+
+This requires visibility of the `data_pipeline_api` for Python. If it has been installed via `pip` or `conda` this will already be the case, if the API has been cloned only then `PYTHONPATH` needs amending for this:
+```
+$ export PYTHONPATH=<clone path>/data_pipeline_api:$PYTHONPATH
+```
+The command is then:
+```
+$ build/bin/Covid19EERAModel -s original -m inference -c <path>/config.yaml
+```
+Once completed, results should be uploaded, which is TBD.
+
 ### Prediction mode
 The model can be run in a prediction mode, where a fixed set of parameters is supplied to the model,
 and the model is run for a fixed number of simulation steps.
@@ -268,6 +326,17 @@ The regression test script automatically configures each run in line with the ta
 
 The default option uses local data to perform the run. The addition of a "-d" flag will switch the regression test to use the data pipeline locally stored test data instead.
 
+This requires visibility of the `data_pipeline_api` for Python. If it has been installed via `pip` or `conda` this will already be the case, if the API has been clone only then `PYTHONPATH` needs amending for this:
+
+```
+$ export PYTHONPATH=<clone path>/data_pipeline_api:$PYTHONPATH
+```
+Then run as follows:
+
+```
+$ ./scripts/RunRegressionTests 4 9 -d
+```
+
 **Note:** The regression tests are an aid to refactoring with confidence: they should not be considered confirmation of the code's correctness. The reference outputs are updated periodically based on changes in the core model logic.
 
 ### Unit tests

diff --git a/cmake/git_watcher.cmake b/cmake/git_watcher.cmake
@@ -88,6 +88,7 @@ set(_state_variable_names
     # >>>
     # 1. Add the name of the additional git variable you're interested in monitoring
     #    to this list.
+    GIT_REMOTE_ORIGIN_URL
 )
 
 
@@ -165,7 +166,10 @@ function(GetGitState _working_dir)
     #    "execute_process()" command. Be sure to set them in
     #    the environment using the same variable name you added
     #    to the "_state_variable_names" list.
-
+    RunGitCommand(config --get remote.origin.url)
+    if(exit_code EQUAL 0)
+        set(ENV{GIT_REMOTE_ORIGIN_URL} "${output}")
+    endif()
 endfunction()
 
 

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -6,7 +6,7 @@ find_package(GSL REQUIRED)
 set(PRE_CONFIGURE_FILE "Git.cpp.in")
 set(POST_CONFIGURE_FILE "${CMAKE_CURRENT_BINARY_DIR}/Git.cpp")
 include(${CMAKE_SOURCE_DIR}/cmake/git_watcher.cmake)
-add_library(git SHARED ${POST_CONFIGURE_FILE})
+add_library(git STATIC ${POST_CONFIGURE_FILE})
 target_include_directories(git PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 add_dependencies(git check_git)
 
@@ -16,7 +16,7 @@ configure_file(${PRECONFIGURE_DEPENDENCY_FILE} ${POSTCONFIGURE_DEPENDENCY_FILE}
 list(APPEND src_files ${POSTCONFIGURE_DEPENDENCY_FILE})
 
 set (PROJECT_LIBS ${PROJECT_NAME}-lib)
-add_library(${PROJECT_LIBS} SHARED ${src_files})
+add_library(${PROJECT_LIBS} STATIC ${src_files})
 target_link_libraries(${PROJECT_LIBS} PUBLIC GSL::gsl GSL::gslcblas)
 target_include_directories(${PROJECT_LIBS} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 target_link_libraries(${PROJECT_LIBS} PUBLIC ${TCLAP})

diff --git a/src/Git.cpp.in b/src/Git.cpp.in
@@ -44,4 +44,7 @@ std::string GitMetadata::CommitDate() {
 }
 std::string GitMetadata::Tag() {
     return "@GIT_TAG@";
+}
+std::string GitMetadata::URL() {
+    return "@GIT_REMOTE_ORIGIN_URL@";
 }
diff --git a/src/Git.h b/src/Git.h
@@ -47,4 +47,6 @@ class GitMetadata {
   static std::string CommitDate();
   // The commit tag
   static std::string Tag();
+  // The Remote Origin URL
+  static std::string URL();
 };