1+ #! /bin/bash
2+
3+ # demographic_events_validation.sh
4+ # Statistical validation of demographic event handling (admixture, ancient samples, etc.)
5+ # Compares tskit-integrated version against legacy baseline using nicestats
6+
7+ set -e
8+
9+ # Configuration
10+ SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd ) "
11+ ROOT_DIR=" $( cd " $SCRIPT_DIR /../.." && pwd ) "
12+ WORK_DIR=" $ROOT_DIR /work/demographic_validation_$( date +%Y%m%d_%H%M%S) "
13+ NREPS=${1:- 100} # Number of replicates (default 100)
14+
15+ # Colors for output
16+ RED=' \033[0;31m'
17+ GREEN=' \033[0;32m'
18+ YELLOW=' \033[1;33m'
19+ NC=' \033[0m' # No Color
20+
21+ # Ensure we're in the root directory
22+ cd " $ROOT_DIR "
23+
24+ echo " =========================================="
25+ echo " Demographic Events Statistical Validation"
26+ echo " =========================================="
27+ echo " Replicates: $NREPS "
28+ echo " Work directory: $WORK_DIR "
29+ echo
30+
31+ # Create work directory
32+ mkdir -p " $WORK_DIR "
33+
34+ # Build executables
35+ echo " Building executables..."
36+ make discoal_edited > /dev/null 2>&1
37+ make discoal_legacy_backup > /dev/null 2>&1
38+ make niceStats > /dev/null 2>&1
39+
40+ # Copy executables to work directory for consistency
41+ cp build/discoal_edited " $WORK_DIR /"
42+ cp build/discoal_legacy_backup " $WORK_DIR /"
43+ cp build/niceStats " $WORK_DIR /"
44+
45+ cd " $WORK_DIR "
46+
47+ # Function to run simulation and analyze with nicestats
48+ run_and_analyze () {
49+ local exec_name=$1
50+ local output_prefix=$2
51+ local params=" ${@: 3} "
52+
53+ echo -n " Running $exec_name with: $params ... "
54+
55+ # Run simulation
56+ ./$exec_name $params > ${output_prefix} .ms 2> ${output_prefix} .err
57+
58+ if [ $? -ne 0 ]; then
59+ echo -e " ${RED} FAILED${NC} "
60+ echo " Error output:"
61+ cat ${output_prefix} .err
62+ return 1
63+ fi
64+
65+ # Check for -1 values (lost ancestral material)
66+ if grep -q " ^-1" ${output_prefix} .ms; then
67+ echo -e " ${RED} FAILED${NC} - Found -1 values (lost ancestral material)"
68+ return 1
69+ fi
70+
71+ # Run nicestats
72+ ./niceStats < ${output_prefix} .ms > ${output_prefix} .stats 2> ${output_prefix} .stats.err
73+
74+ if [ $? -ne 0 ]; then
75+ echo -e " ${RED} FAILED${NC} - nicestats error"
76+ cat ${output_prefix} .stats.err
77+ return 1
78+ fi
79+
80+ echo -e " ${GREEN} OK${NC} "
81+ return 0
82+ }
83+
84+ # Function to compare statistics
85+ compare_stats () {
86+ local test_name=$1
87+ local legacy_stats=$2
88+ local edited_stats=$3
89+ local output_file=$4
90+
91+ echo " === $test_name ===" >> $output_file
92+ echo >> $output_file
93+
94+ # Extract all statistics from nicestats output
95+ # nicestats outputs: pi, ss, D, thetaH, H, tajd, fusf, fusfs, rm, rmmg, nhaps, hdiv, wallb, wallq, rosasR, sweepK
96+
97+ # Create temporary files for comparison
98+ grep -E " ^pi:|^ss:|^D:|^thetaH:|^H:|^tajd:|^fusf:|^fusfs:|^rm:|^rmmg:|^nhaps:|^hdiv:|^wallb:|^wallq:|^rosasR:|^sweepK:" $legacy_stats | sort > legacy_sorted.tmp
99+ grep -E " ^pi:|^ss:|^D:|^thetaH:|^H:|^tajd:|^fusf:|^fusfs:|^rm:|^rmmg:|^nhaps:|^hdiv:|^wallb:|^wallq:|^rosasR:|^sweepK:" $edited_stats | sort > edited_sorted.tmp
100+
101+ # Compare each statistic
102+ local all_match=true
103+
104+ while IFS=: read -r stat_name legacy_values; do
105+ # Get corresponding values from edited version
106+ edited_line=$( grep " ^$stat_name :" edited_sorted.tmp || echo " " )
107+
108+ if [ -z " $edited_line " ]; then
109+ echo " ERROR: Statistic '$stat_name ' missing in edited version" >> $output_file
110+ all_match=false
111+ continue
112+ fi
113+
114+ edited_values=$( echo " $edited_line " | cut -d: -f2-)
115+
116+ # Clean up whitespace
117+ legacy_values=$( echo " $legacy_values " | xargs)
118+ edited_values=$( echo " $edited_values " | xargs)
119+
120+ # Compare values
121+ if [ " $legacy_values " = " $edited_values " ]; then
122+ echo " ✓ $stat_name : MATCH" >> $output_file
123+ else
124+ echo " ✗ $stat_name : DIFFER" >> $output_file
125+ echo " Legacy: $legacy_values " >> $output_file
126+ echo " Edited: $edited_values " >> $output_file
127+ all_match=false
128+
129+ # Try to compute relative difference for numeric values
130+ # This is a simple comparison - could be enhanced with proper statistical tests
131+ if [[ " $legacy_values " =~ ^[0-9.]+$ ]] && [[ " $edited_values " =~ ^[0-9.]+$ ]]; then
132+ rel_diff=$( awk " BEGIN {if ($legacy_values != 0) print abs(($edited_values - $legacy_values ) / $legacy_values * 100); else print 0}" )
133+ echo " Relative difference: ${rel_diff} %" >> $output_file
134+ fi
135+ fi
136+ done < legacy_sorted.tmp
137+
138+ # Clean up temp files
139+ rm -f legacy_sorted.tmp edited_sorted.tmp
140+
141+ echo >> $output_file
142+
143+ if [ " $all_match " = true ]; then
144+ echo " Result: ALL STATISTICS MATCH ✓" >> $output_file
145+ return 0
146+ else
147+ echo " Result: STATISTICS DIFFER ✗" >> $output_file
148+ return 1
149+ fi
150+ }
151+
152+ # Test suite
153+ echo
154+ echo " Running demographic event tests..."
155+ echo
156+
157+ # Summary file
158+ SUMMARY_FILE=" summary.txt"
159+ echo " Demographic Events Validation Summary" > $SUMMARY_FILE
160+ echo " ====================================" >> $SUMMARY_FILE
161+ echo " Date: $( date) " >> $SUMMARY_FILE
162+ echo " Replicates: $NREPS " >> $SUMMARY_FILE
163+ echo >> $SUMMARY_FILE
164+
165+ # Track overall success
166+ all_tests_passed=true
167+
168+ # Test 1: Basic admixture event
169+ echo -e " \n${YELLOW} Test 1: Basic admixture event${NC} "
170+ echo " Parameters: 6 $NREPS 100 -t 2 -r 2.4 -p 3 2 2 2 -ed 1.0 0 1 -ed 5.0 1 2 -ea 0.02 0 1 0 0.15 -d 1300526888 1325944824"
171+
172+ if run_and_analyze " discoal_legacy_backup" " test1_legacy" 6 $NREPS 100 -t 2 -r 2.4 -p 3 2 2 2 -ed 1.0 0 1 -ed 5.0 1 2 -ea 0.02 0 1 0 0.15 -d 1300526888 1325944824 &&
173+ run_and_analyze " discoal_edited" " test1_edited" 6 $NREPS 100 -t 2 -r 2.4 -p 3 2 2 2 -ed 1.0 0 1 -ed 5.0 1 2 -ea 0.02 0 1 0 0.15 -d 1300526888 1325944824; then
174+ compare_stats " Test 1: Admixture Event" " test1_legacy.stats" " test1_edited.stats" " $SUMMARY_FILE "
175+ if [ $? -ne 0 ]; then all_tests_passed=false; fi
176+ else
177+ echo " Test 1: FAILED - Simulation error" >> $SUMMARY_FILE
178+ all_tests_passed=false
179+ fi
180+
181+ # Test 2: Ancient samples
182+ echo -e " \n${YELLOW} Test 2: Ancient samples${NC} "
183+ echo " Parameters: 10 $NREPS 100 -t 2 -r 2 -p 2 5 5 -M 1.0 -A 2 0 0.1 -A 2 1 0.2 -d 123 456"
184+
185+ if run_and_analyze " discoal_legacy_backup" " test2_legacy" 10 $NREPS 100 -t 2 -r 2 -p 2 5 5 -M 1.0 -A 2 0 0.1 -A 2 1 0.2 -d 123 456 &&
186+ run_and_analyze " discoal_edited" " test2_edited" 10 $NREPS 100 -t 2 -r 2 -p 2 5 5 -M 1.0 -A 2 0 0.1 -A 2 1 0.2 -d 123 456; then
187+ compare_stats " Test 2: Ancient Samples" " test2_legacy.stats" " test2_edited.stats" " $SUMMARY_FILE "
188+ if [ $? -ne 0 ]; then all_tests_passed=false; fi
189+ else
190+ echo " Test 2: FAILED - Simulation error" >> $SUMMARY_FILE
191+ all_tests_passed=false
192+ fi
193+
194+ # Test 3: Population merge
195+ echo -e " \n${YELLOW} Test 3: Population merge${NC} "
196+ echo " Parameters: 8 $NREPS 100 -t 3 -r 1.5 -p 2 4 4 -ed 0.5 0 1 -d 999 888"
197+
198+ if run_and_analyze " discoal_legacy_backup" " test3_legacy" 8 $NREPS 100 -t 3 -r 1.5 -p 2 4 4 -ed 0.5 0 1 -d 999 888 &&
199+ run_and_analyze " discoal_edited" " test3_edited" 8 $NREPS 100 -t 3 -r 1.5 -p 2 4 4 -ed 0.5 0 1 -d 999 888; then
200+ compare_stats " Test 3: Population Merge" " test3_legacy.stats" " test3_edited.stats" " $SUMMARY_FILE "
201+ if [ $? -ne 0 ]; then all_tests_passed=false; fi
202+ else
203+ echo " Test 3: FAILED - Simulation error" >> $SUMMARY_FILE
204+ all_tests_passed=false
205+ fi
206+
207+ # Test 4: Complex demographic scenario
208+ echo -e " \n${YELLOW} Test 4: Complex demographic scenario${NC} "
209+ echo " Parameters: 12 $NREPS 100 -t 4 -r 3 -p 4 3 3 3 3 -M 0.5 -en 0.1 0 2.0 -en 0.2 1 0.5 -ed 0.3 2 0 -ed 0.4 3 1 -ea 0.15 1 0 2 0.3 -d 111 222"
210+
211+ if run_and_analyze " discoal_legacy_backup" " test4_legacy" 12 $NREPS 100 -t 4 -r 3 -p 4 3 3 3 3 -M 0.5 -en 0.1 0 2.0 -en 0.2 1 0.5 -ed 0.3 2 0 -ed 0.4 3 1 -ea 0.15 1 0 2 0.3 -d 111 222 &&
212+ run_and_analyze " discoal_edited" " test4_edited" 12 $NREPS 100 -t 4 -r 3 -p 4 3 3 3 3 -M 0.5 -en 0.1 0 2.0 -en 0.2 1 0.5 -ed 0.3 2 0 -ed 0.4 3 1 -ea 0.15 1 0 2 0.3 -d 111 222; then
213+ compare_stats " Test 4: Complex Demographics" " test4_legacy.stats" " test4_edited.stats" " $SUMMARY_FILE "
214+ if [ $? -ne 0 ]; then all_tests_passed=false; fi
215+ else
216+ echo " Test 4: FAILED - Simulation error" >> $SUMMARY_FILE
217+ all_tests_passed=false
218+ fi
219+
220+ # Test 5: Migration with admixture
221+ echo -e " \n${YELLOW} Test 5: Migration with admixture${NC} "
222+ echo " Parameters: 8 $NREPS 100 -t 2.5 -r 2 -p 3 3 3 2 -m 0 1 2.0 -m 1 0 2.0 -m 1 2 1.0 -m 2 1 1.0 -ea 0.1 0 1 2 0.4 -d 777 666"
223+
224+ if run_and_analyze " discoal_legacy_backup" " test5_legacy" 8 $NREPS 100 -t 2.5 -r 2 -p 3 3 3 2 -m 0 1 2.0 -m 1 0 2.0 -m 1 2 1.0 -m 2 1 1.0 -ea 0.1 0 1 2 0.4 -d 777 666 &&
225+ run_and_analyze " discoal_edited" " test5_edited" 8 $NREPS 100 -t 2.5 -r 2 -p 3 3 3 2 -m 0 1 2.0 -m 1 0 2.0 -m 1 2 1.0 -m 2 1 1.0 -ea 0.1 0 1 2 0.4 -d 777 666; then
226+ compare_stats " Test 5: Migration + Admixture" " test5_legacy.stats" " test5_edited.stats" " $SUMMARY_FILE "
227+ if [ $? -ne 0 ]; then all_tests_passed=false; fi
228+ else
229+ echo " Test 5: FAILED - Simulation error" >> $SUMMARY_FILE
230+ all_tests_passed=false
231+ fi
232+
233+ # Final summary
234+ echo >> $SUMMARY_FILE
235+ echo " ====================================" >> $SUMMARY_FILE
236+ if [ " $all_tests_passed " = true ]; then
237+ echo " OVERALL RESULT: ALL TESTS PASSED ✓" >> $SUMMARY_FILE
238+ echo -e " \n${GREEN} All demographic event tests passed!${NC} "
239+ else
240+ echo " OVERALL RESULT: SOME TESTS FAILED ✗" >> $SUMMARY_FILE
241+ echo -e " \n${RED} Some demographic event tests failed!${NC} "
242+ fi
243+
244+ # Display summary
245+ echo
246+ echo " ====================================="
247+ cat $SUMMARY_FILE
248+ echo " ====================================="
249+
250+ # Save detailed results
251+ echo -e " \nDetailed results saved in: $WORK_DIR "
252+ echo " Summary file: $WORK_DIR /summary.txt"
253+
254+ # Return appropriate exit code
255+ if [ " $all_tests_passed " = true ]; then
256+ exit 0
257+ else
258+ exit 1
259+ fi
0 commit comments