test(android-test-app): unify presentation framework with evidence collection

Implement P0-P5 directives for operator clarity, consistent outcomes, and
easy evidence capture across all test phases.

Changes:
- alarm-test-lib.sh: Add evidence collection (capture_alarms, capture_logcat,
  capture_screenshot), verdict functions (verdict_pass/warn/fail), run directory
  management, and release gating support (RELEASE_GATE_PHASE3)

- test-phase1.sh: Refactor to unified framework with CLI modes (--setup,
  --run, --smoke, --all, --ci), micro-prompts, evidence capture, and verdict
  blocks for all 5 tests

- test-phase2.sh: Add evidence capture, verdict blocks, and STRICTNESS policy
  (soft/hard) for warn vs fail behavior

- test-phase3.sh: Add evidence capture, verdict blocks, release gating
  (--gate-phase3), and fatigue reduction (time estimates, automation hints)

- RUNBOOK-TESTING.md: New comprehensive operator guide (669 lines) covering
  prerequisites, all phases, evidence locations, verdict interpretation,
  common failures, and troubleshooting

All test scripts now use consistent UI helpers (section, substep, info, ok,
warn, error), standardized evidence collection, and clear verdict reporting.
Evidence is saved to timestamped run directories (runs/<RUN_ID>/) with alarms,
logs, and screenshots organized by test phase and scenario.

Tests pass with consistent presentation and reproducible evidence collection.
This commit is contained in:
Matthew Raymer
2025-12-24 12:01:16 +00:00
parent 973af9b688
commit ac39255672
5 changed files with 2373 additions and 870 deletions

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
# ========================================
# Phase 3 Testing Script Boot Recovery
@@ -10,11 +11,22 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/alarm-test-lib.sh"
# Initialize run directory (P1)
ensure_run_dir || {
error "Failed to initialize run directory"
exit 1
}
# Phase 3 specific configuration
# Log tags / patterns (matched to actual ReactivationManager logs)
BOOT_SCENARIO_VALUE="BOOT"
NONE_SCENARIO_VALUE="NONE"
# Release gating config (P4.1)
# 0 = advisory mode (default): failures become warnings, continue
# 1 = release-blocking mode: failures exit with nonzero
: "${RELEASE_GATE_PHASE3:=0}"
# Allow selecting specific tests on the command line (e.g. ./test-phase3.sh 1 3)
SELECTED_TESTS=()
@@ -39,16 +51,32 @@ extract_scenario_from_logs() {
test1_boot_future_alarms() {
section "TEST 1: Boot with Future Alarms"
echo "Purpose: Verify alarms are recreated on boot when schedules have future run times."
info "Purpose: Verify alarms are recreated on boot when schedules have future run times."
info "Expected time: 2-3 minutes (includes 30-60s reboot)"
info "Automatable: Partial (requires manual reboot confirmation)"
info "If you see: 'Boot recovery not detected' → Check boot receiver registration and BOOT_COMPLETED permission"
echo ""
pause
# Capture initial state
capture_alarms "phase3_test1_initial"
capture_logcat "phase3_test1_initial" "DNP" 50
substep "Step 1: Launch app & check plugin status"
launch_app
ui_prompt "In the app UI, verify plugin status:\n\n ⚙️ Plugin Settings: ✅ Configured\n 🔌 Native Fetcher: ✅ Configured\n\nIf either shows ❌ or 'Not configured', click 'Configure Plugin', wait until both are ✅, then press Enter."
ui_prompt "1) In the app UI, verify plugin status:
ui_prompt "Now schedule at least one future notification (e.g., click 'Test Notification' to schedule for a few minutes in the future)."
⚙️ Plugin Settings: ✅ Configured
🔌 Native Fetcher: ✅ Configured
If either shows ❌ or 'Not configured', click 'Configure Plugin', wait until both are ✅, then press Enter."
ui_prompt "2) Now schedule at least one future notification (e.g., click 'Test Notification' to schedule for a few minutes in the future)."
# Capture before reboot state
capture_alarms "phase3_test1_before_reboot"
substep "Step 2: Verify alarms are scheduled"
show_alarms
@@ -76,6 +104,12 @@ test1_boot_future_alarms() {
substep "Step 4: Collect boot recovery logs"
info "Collecting recovery logs from boot..."
sleep 2 # Give recovery a moment to complete
# Capture after reboot state
capture_alarms "phase3_test1_after_reboot"
capture_logcat "phase3_test1_after_reboot" "DNP-REACTIVATION" 250
capture_screenshot "phase3_test1_after_reboot"
local logs
logs="$(get_recovery_logs)"
echo "$logs"
@@ -96,8 +130,29 @@ test1_boot_future_alarms() {
echo " errors = ${errors}"
echo
# Determine verdict
local test1_passed=false
local test1_message=""
if [[ "$errors" -gt 0 ]]; then
error "Recovery reported errors>0 (errors=$errors)"
test1_message="Recovery reported errors (errors=$errors)"
fi
if [[ "$scenario" == "$BOOT_SCENARIO_VALUE" && "$rescheduled" -gt 0 ]]; then
ok "TEST 1 PASSED: Boot recovery detected and alarms rescheduled (scenario=$scenario, rescheduled=$rescheduled)."
test1_passed=true
test1_message="Boot recovery detected and alarms rescheduled (scenario=$scenario, rescheduled=$rescheduled)"
elif echo "$logs" | grep -qi "Starting boot recovery\|boot recovery"; then
if [[ "$rescheduled" -gt 0 ]]; then
ok "TEST 1 PASSED: Boot recovery ran and alarms rescheduled (rescheduled=$rescheduled)."
test1_passed=true
test1_message="Boot recovery ran and alarms rescheduled (rescheduled=$rescheduled)"
else
test1_message="Boot recovery ran but rescheduled=0. Check implementation or logs."
fi
else
test1_message="Boot recovery not clearly detected. Review logs and boot receiver implementation (scenario=${scenario:-<none>}, rescheduled=$rescheduled)"
fi
substep "Step 5: Verify alarms were recreated"
@@ -108,18 +163,22 @@ test1_boot_future_alarms() {
info "Plugin alarms after boot: $after_count (expected: 1)"
info "System/other alarms: $system_after (for context)"
if [[ "$scenario" == "$BOOT_SCENARIO_VALUE" && "$rescheduled" -gt 0 ]]; then
ok "TEST 1 PASSED: Boot recovery detected and alarms rescheduled (scenario=$scenario, rescheduled=$rescheduled)."
elif echo "$logs" | grep -qi "Starting boot recovery\|boot recovery"; then
if [[ "$rescheduled" -gt 0 ]]; then
ok "TEST 1 PASSED: Boot recovery ran and alarms rescheduled (rescheduled=$rescheduled)."
else
warn "TEST 1: Boot recovery ran but rescheduled=0. Check implementation or logs."
fi
else
warn "TEST 1: Boot recovery not clearly detected. Review logs and boot receiver implementation."
info "Scenario detected: ${scenario:-<none>}, rescheduled=$rescheduled"
if [[ "$after_count" -eq 0 && "$test1_passed" == "true" ]]; then
warn "Alarms were not recreated despite recovery success. Check alarm scheduling logic."
test1_message="Boot recovery succeeded but alarms not recreated (rescheduled=$rescheduled, after_count=$after_count)"
test1_passed=false
elif [[ "$after_count" -gt 0 && "$test1_passed" == "true" ]]; then
ok "Alarms successfully recreated after boot (after_count=$after_count)"
fi
# Emit verdict
if [[ "$test1_passed" == "true" ]]; then
verdict_pass "phase3_test1_boot_future_alarms" "$test1_message"
else
verdict_fail "phase3_test1_boot_future_alarms" "$test1_message"
fi
evidence_block "phase3_test1_boot_future_alarms"
}
# ------------------------------------------------------------------------------
@@ -129,22 +188,44 @@ test1_boot_future_alarms() {
test2_boot_past_alarms() {
section "TEST 2: Boot with Past Alarms"
echo "Purpose: Verify missed alarms are detected and next occurrence is scheduled on boot."
info "Purpose: Verify missed alarms are detected and next occurrence is scheduled on boot."
info "Expected time: 5-6 minutes (includes 3min wait + 30-60s reboot)"
info "Automatable: Partial (requires manual time advancement or wait)"
info "If you see: 'No missed alarms detected' → Verify alarm time actually passed before reboot"
info "Automation hint: Use 'adb shell date' to check current time, advance if needed"
echo ""
pause
# Capture initial state
capture_alarms "phase3_test2_initial"
capture_logcat "phase3_test2_initial" "DNP" 50
substep "Step 1: Launch app & ensure plugin configured"
launch_app
ui_prompt "In the app UI, verify plugin status:\n\n ⚙️ Plugin Settings: ✅ Configured\n 🔌 Native Fetcher: ✅ Configured\n\nIf needed, click 'Configure Plugin', then press Enter."
ui_prompt "1) In the app UI, verify plugin status:
ui_prompt "Click 'Test Notification' to schedule a notification for 2 minutes in the future.\n\nAfter scheduling, we'll wait for the alarm time to pass, then reboot."
⚙️ Plugin Settings: ✅ Configured
🔌 Native Fetcher: ✅ Configured
If needed, click 'Configure Plugin', then press Enter."
ui_prompt "2) Click 'Test Notification' to schedule a notification for 2 minutes in the future.
After scheduling, we'll wait for the alarm time to pass, then reboot."
# Capture before wait state
capture_alarms "phase3_test2_before_wait"
substep "Step 2: Wait for alarm time to pass"
info "Waiting 3 minutes for scheduled alarm time to pass..."
warn "You can manually advance system time if needed (requires root/emulator)"
sleep 180 # Wait 3 minutes
# Capture after wait state
capture_alarms "phase3_test2_after_wait"
substep "Step 3: Verify alarm time has passed"
info "Alarm time should now be in the past"
show_alarms
@@ -159,6 +240,12 @@ test2_boot_past_alarms() {
substep "Step 5: Collect boot recovery logs"
info "Collecting recovery logs from boot..."
sleep 2
# Capture after reboot state
capture_alarms "phase3_test2_after_reboot"
capture_logcat "phase3_test2_after_reboot" "DNP-REACTIVATION" 250
capture_screenshot "phase3_test2_after_reboot"
local logs
logs="$(get_recovery_logs)"
echo "$logs"
@@ -182,17 +269,33 @@ test2_boot_past_alarms() {
echo " errors = ${errors}"
echo
# Determine verdict
local test2_passed=false
local test2_message=""
if [[ "$errors" -gt 0 ]]; then
error "Recovery reported errors>0 (errors=$errors)"
test2_message="Recovery reported errors (errors=$errors)"
fi
if [[ "$missed" -ge 1 && "$rescheduled" -ge 1 ]]; then
ok "TEST 2 PASSED: Past alarms detected and next occurrence scheduled (missed=$missed, rescheduled=$rescheduled)."
test2_passed=true
test2_message="Past alarms detected and next occurrence scheduled (missed=$missed, rescheduled=$rescheduled)"
elif [[ "$missed" -ge 1 ]]; then
warn "TEST 2: Past alarms detected (missed=$missed) but rescheduled=$rescheduled. Check reschedule logic."
test2_message="Past alarms detected (missed=$missed) but rescheduled=$rescheduled. Check reschedule logic."
else
warn "TEST 2: No missed alarms detected. Verify alarm time actually passed before reboot."
test2_message="No missed alarms detected. Verify alarm time actually passed before reboot (missed=$missed, rescheduled=$rescheduled)"
fi
# Emit verdict
if [[ "$test2_passed" == "true" ]]; then
verdict_pass "phase3_test2_boot_past_alarms" "$test2_message"
else
verdict_fail "phase3_test2_boot_past_alarms" "$test2_message"
fi
evidence_block "phase3_test2_boot_past_alarms"
}
# ------------------------------------------------------------------------------
@@ -202,10 +305,17 @@ test2_boot_past_alarms() {
test3_boot_no_schedules() {
section "TEST 3: Boot with No Schedules"
echo "Purpose: Verify boot recovery handles empty database gracefully."
info "Purpose: Verify boot recovery handles empty database gracefully."
info "Expected time: 2-3 minutes (includes 30-60s reboot)"
info "Automatable: Yes"
info "If you see: 'rescheduled>0 on first launch' → Check that boot recovery isn't misfiring"
echo ""
pause
# Capture initial state (before uninstall)
capture_alarms "phase3_test3_initial"
substep "Step 1: Uninstall app to clear DB/state"
set +e
$ADB_BIN uninstall "$APP_ID" >/dev/null 2>&1
@@ -221,7 +331,7 @@ test3_boot_no_schedules() {
fi
info "Clearing logcat..."
$ADB_BIN logcat -c
clear_logs
ok "Logs cleared"
pause
@@ -235,6 +345,12 @@ test3_boot_no_schedules() {
substep "Step 4: Collect boot recovery logs"
info "Collecting recovery logs from boot..."
sleep 2
# Capture after reboot state
capture_alarms "phase3_test3_after_reboot"
capture_logcat "phase3_test3_after_reboot" "DNP-REACTIVATION" 250
capture_screenshot "phase3_test3_after_reboot"
local logs
logs="$(get_recovery_logs)"
echo "$logs"
@@ -251,20 +367,37 @@ test3_boot_no_schedules() {
echo " missed = ${missed}"
echo
# Determine verdict
local test3_passed=false
local test3_message=""
if [[ -z "$logs" ]]; then
ok "TEST 3 PASSED: No recovery logs when there are no schedules (safe behavior)."
return
fi
if echo "$logs" | grep -qiE "No schedules found|No schedules present"; then
test3_passed=true
test3_message="No recovery logs when there are no schedules (safe behavior)"
elif echo "$logs" | grep -qiE "No schedules found|No schedules present"; then
ok "TEST 3 PASSED: Explicit 'No schedules found' message logged with no rescheduling."
test3_passed=true
test3_message="Explicit 'No schedules found' message logged with no rescheduling"
elif [[ "$scenario" == "$NONE_SCENARIO_VALUE" && "$rescheduled" -eq 0 ]]; then
ok "TEST 3 PASSED: NONE scenario detected with no rescheduling."
test3_passed=true
test3_message="NONE scenario detected with no rescheduling (scenario=$scenario, rescheduled=$rescheduled)"
elif [[ "$rescheduled" -gt 0 ]]; then
warn "TEST 3: rescheduled>0 on first launch / empty DB. Check that boot recovery isn't misfiring."
test3_message="rescheduled>0 on first launch / empty DB. Check that boot recovery isn't misfiring (rescheduled=$rescheduled)"
else
info "TEST 3: Logs present but no rescheduling; review scenario handling to ensure it's explicit about NONE / NO_SCHEDULES."
test3_passed=true # Not a failure, just needs review
test3_message="Logs present but no rescheduling; review scenario handling to ensure it's explicit about NONE / NO_SCHEDULES (scenario=${scenario:-<none>}, rescheduled=$rescheduled)"
fi
# Emit verdict
if [[ "$test3_passed" == "true" ]]; then
verdict_pass "phase3_test3_boot_no_schedules" "$test3_message"
else
verdict_fail "phase3_test3_boot_no_schedules" "$test3_message"
fi
evidence_block "phase3_test3_boot_no_schedules"
}
# ------------------------------------------------------------------------------
@@ -274,16 +407,32 @@ test3_boot_no_schedules() {
test4_silent_boot_recovery() {
section "TEST 4: Silent Boot Recovery (App Never Opened)"
echo "Purpose: Verify boot recovery occurs even when the app is never opened after reboot."
info "Purpose: Verify boot recovery occurs even when the app is never opened after reboot."
info "Expected time: 2-3 minutes (includes 30-60s reboot)"
info "Automatable: Partial (requires manual verification that app was not opened)"
info "If you see: 'Boot recovery not detected' → Verify boot receiver is registered and has BOOT_COMPLETED permission"
echo ""
pause
# Capture initial state
capture_alarms "phase3_test4_initial"
capture_logcat "phase3_test4_initial" "DNP" 50
substep "Step 1: Launch app & ensure plugin configured"
launch_app
ui_prompt "In the app UI, verify plugin status:\n\n ⚙️ Plugin Settings: ✅ Configured\n 🔌 Native Fetcher: ✅ Configured\n\nIf needed, click 'Configure Plugin', then press Enter."
ui_prompt "1) In the app UI, verify plugin status:
ui_prompt "Click 'Test Notification' to schedule a notification for a few minutes in the future."
⚙️ Plugin Settings: ✅ Configured
🔌 Native Fetcher: ✅ Configured
If needed, click 'Configure Plugin', then press Enter."
ui_prompt "2) Click 'Test Notification' to schedule a notification for a few minutes in the future."
# Capture before reboot state
capture_alarms "phase3_test4_before_reboot"
substep "Step 2: Verify alarms are scheduled"
show_alarms
@@ -312,6 +461,12 @@ test4_silent_boot_recovery() {
substep "Step 4: Collect boot recovery logs (without opening app)"
info "Collecting recovery logs from boot (app was NOT opened)..."
sleep 2
# Capture after reboot state (without opening app)
capture_alarms "phase3_test4_after_reboot"
capture_logcat "phase3_test4_after_reboot" "DNP-REACTIVATION" 250
capture_screenshot "phase3_test4_after_reboot"
local logs
logs="$(get_recovery_logs)"
echo "$logs"
@@ -340,15 +495,37 @@ test4_silent_boot_recovery() {
info "Plugin alarms after boot (app never opened): $after_count (expected: 1)"
info "System/other alarms: $system_after (for context)"
# Determine verdict
local test4_passed=false
local test4_message=""
if [[ "$errors" -gt 0 ]]; then
error "Recovery reported errors>0 (errors=$errors)"
test4_message="Recovery reported errors (errors=$errors)"
fi
if [[ "$after_count" -gt 0 && "$rescheduled" -gt 0 ]]; then
ok "TEST 4 PASSED: Boot recovery occurred silently and alarms were recreated (rescheduled=$rescheduled) without app launch."
test4_passed=true
test4_message="Boot recovery occurred silently and alarms were recreated (rescheduled=$rescheduled, after_count=$after_count) without app launch"
elif [[ "$rescheduled" -gt 0 ]]; then
ok "TEST 4 PASSED: Boot recovery occurred silently (rescheduled=$rescheduled), but alarm count check unclear."
test4_passed=true
test4_message="Boot recovery occurred silently (rescheduled=$rescheduled), but alarm count unclear (after_count=$after_count)"
elif echo "$logs" | grep -qi "Starting boot recovery\|boot recovery"; then
warn "TEST 4: Boot recovery ran but alarms may not have been recreated. Check logs and implementation."
test4_message="Boot recovery ran but alarms may not have been recreated. Check logs and implementation (rescheduled=$rescheduled, after_count=$after_count)"
else
warn "TEST 4: Boot recovery not detected. Verify boot receiver is registered and has BOOT_COMPLETED permission."
test4_message="Boot recovery not detected. Verify boot receiver is registered and has BOOT_COMPLETED permission (scenario=${scenario:-<none>}, rescheduled=$rescheduled)"
fi
# Emit verdict
if [[ "$test4_passed" == "true" ]]; then
verdict_pass "phase3_test4_silent_boot_recovery" "$test4_message"
else
verdict_fail "phase3_test4_silent_boot_recovery" "$test4_message"
fi
evidence_block "phase3_test4_silent_boot_recovery"
}
# ------------------------------------------------------------------------------
@@ -356,32 +533,63 @@ test4_silent_boot_recovery() {
# ------------------------------------------------------------------------------
main() {
# Allow selecting specific tests: e.g. `./test-phase3.sh 1 3`
if [[ "$#" -gt 0 && ( "$1" == "-h" || "$1" == "--help" ) ]]; then
echo "Usage: $0 [TEST_IDS...]"
echo
echo "If no TEST_IDS are given, all tests (1, 2, 3, 4) will run."
echo "Examples:"
echo " $0 # run all tests"
echo " $0 1 # run only TEST 1"
echo " $0 2 3 # run only TEST 2 and TEST 3"
echo " $0 4 # run only TEST 4 (silent boot recovery)"
return 0
# Parse CLI args for --gate-phase3 flag
local gate_phase3=0
local test_args=()
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
echo "Usage: $0 [--gate-phase3] [TEST_IDS...]"
echo
echo "If no TEST_IDS are given, all tests (1, 2, 3, 4) will run."
echo
echo "Options:"
echo " --gate-phase3 Enable release gating (failures exit with non-zero)"
echo " Equivalent to: RELEASE_GATE_PHASE3=1 $0"
echo
echo "Environment:"
echo " RELEASE_GATE_PHASE3=0|1 Release gating mode (default: 0)"
echo " 0 = advisory (warn and continue)"
echo " 1 = release-blocking (fail and exit)"
echo
echo "Examples:"
echo " $0 # run all tests (advisory mode)"
echo " $0 1 # run only TEST 1 (advisory mode)"
echo " $0 --gate-phase3 # run all tests (release-blocking mode)"
echo " $0 --gate-phase3 2 3 # run TEST 2 and 3 (release-blocking mode)"
echo " RELEASE_GATE_PHASE3=1 $0 # same as --gate-phase3"
return 0
;;
--gate-phase3)
gate_phase3=1
shift
;;
*)
test_args+=("$1")
shift
;;
esac
done
# Set RELEASE_GATE_PHASE3 if flag was provided
if [[ "$gate_phase3" -eq 1 ]]; then
RELEASE_GATE_PHASE3=1
fi
SELECTED_TESTS=("$@")
SELECTED_TESTS=("${test_args[@]}")
echo
echo "========================================"
echo "Phase 3 Testing Script Boot Recovery"
echo "========================================"
echo
echo "This script will guide you through Phase 3 tests."
echo "You'll be prompted when UI interaction is needed."
echo
echo "⚠️ WARNING: This script will reboot the emulator multiple times."
echo " Each reboot takes 30-60 seconds."
echo
section "Phase 3 Testing Script Boot Recovery"
info "Mode: ${RELEASE_GATE_PHASE3:-0} (0=advisory, 1=release-blocking)"
info "Run ID: ${RUN_ID}"
info "Evidence directory: $(get_run_dir)"
echo ""
info "This script will guide you through Phase 3 tests."
info "You'll be prompted when UI interaction is needed."
echo ""
warn "⚠️ WARNING: This script will reboot the emulator multiple times."
info " Each reboot takes 30-60 seconds."
echo ""
pause
@@ -410,28 +618,23 @@ main() {
section "Testing Complete"
echo "Test Results Summary (see logs above for details):"
echo
echo "TEST 1: Boot with Future Alarms"
echo " - Check logs for scenario=$BOOT_SCENARIO_VALUE and rescheduled>0"
echo
echo "TEST 2: Boot with Past Alarms"
echo " - Check that missed>=1 and rescheduled>=1"
echo
echo "TEST 3: Boot with No Schedules"
echo " - Check that no recovery runs, or NONE scenario is logged with rescheduled=0"
echo
echo "TEST 4: Silent Boot Recovery"
echo " - Check that boot recovery occurred and alarms were recreated without app launch"
echo
info "Test Results Summary:"
echo ""
echo "All test verdicts are shown above with evidence locations."
echo "Review evidence in: $(get_run_dir)"
echo ""
echo "Release gating mode: ${RELEASE_GATE_PHASE3:-0}"
echo " - 0 (advisory): Failures become warnings, script continues"
echo " - 1 (release-blocking): Failures cause script to exit with non-zero"
echo ""
ok "Phase 3 testing script complete!"
echo
echo ""
echo "Next steps:"
echo " - Review logs above"
echo " - Capture snippets into PHASE3-EMULATOR-TESTING.md"
echo " - Update PHASE3-VERIFICATION.md and unified directive status matrix"
echo
echo " - Review evidence in: $(get_run_dir)"
echo " - Verify all test verdicts above"
echo " - Update documentation with test results"
echo ""
}
main "$@"