diff --git a/README.md b/README.md index c0e794952909051009f574a9094d61a491074b63..e44a0dbc88d44459bb93c704cb55068ffdb06666 100644 --- a/README.md +++ b/README.md @@ -15,3 +15,9 @@ reporting --------- Tools for generating usage reports (e.g. rough number of file downloads) of InvenioRDM-based websites. + + +health +------ + +Utilities for checking the general health of InvenioRDM instances. diff --git a/health/README.md b/health/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2625db764701270eb39a95626eb7704f6f2d5765 --- /dev/null +++ b/health/README.md @@ -0,0 +1,28 @@ +# Utilities for health-checking InvenioRDM + +These utilities provide means to check some health aspects of an InvenioRDM system from an outside perspective. + + +## `check-records.sh` + +This utility uses the REST API to list all available records and checks each record's landing page. +If any of the records' landing pages returns a failure status code, the record's identifier is printed alongside its landing page URL. +In the end, a short summary is shown about how many records were fine and how many resulted in failure. + + +### Prerequisites + +* `jq` +* `curl` + + +### Example + +```bash +# check the records in the InvenioRDM sandbox instance +./check-records.sh inveniordm.web.cern.ch + +# alternative to above +export BASE_URL=inveniordm.web.cern.ch +./check-records.sh +``` diff --git a/health/check-records.sh b/health/check-records.sh new file mode 100755 index 0000000000000000000000000000000000000000..ec09f6e05572aedda76803d0b90e798c9d80f34f --- /dev/null +++ b/health/check-records.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# +# simple script for checking all publicly available records +# if their landing pages are okay +# +# requires `curl` and `jq` to be installed + +base_url="${BASE_URL:-https://test.researchdata.tuwien.ac.at}" + +if [[ $# -gt 0 ]]; then + base_url="$1" +fi + +# prefix the base url with "https://" unless http/https is specified +if [[ ! "${base_url}" =~ ^https?:// ]]; then + base_url="https://${base_url}" +fi + +# fetch the list of records and bail if it doesn't work out +if ! response=$(curl -s --fail "${base_url}/api/records?size=100"); then + echo "ERROR: couldn't fetch list of records from ${base_url}" >&2 + exit 1 +fi +num_hits=$(echo "${response}" | jq .hits.total) + +num_ok=0 +num_fail=0 +num_total=0 + +offset=0 +for idx in $(seq "${num_hits}"); do + echo -ne "\r${idx}/${num_hits}... " + [[ ${idx} -eq ${num_hits} ]] && echo "done." + + # take pagination into account for the index + response_idx=$(( idx - offset - 1 )) + + # check if we need to fetch the next page of results + if [[ $(echo "${response}" | jq -r ".hits.hits[${response_idx}]") = "null" ]]; then + next_url=$(echo "${response}" | jq -r ".links.next") + response=$(curl -s "${next_url}") + offset=$(( idx - 1 )) + response_idx=0 + fi + + # get the record's PID and landing page URL + pid=$(echo "${response}" | jq -r ".hits.hits[${response_idx}].id") + landing_page_url=$(echo "${response}" | jq -r ".hits.hits[${response_idx}].links.self_html") + + # check the landing page + if curl -s --fail "${landing_page_url}" > /dev/null; then + num_ok=$(( num_ok + 1 )) + else + echo "FAIL: ${pid} (${landing_page_url})" >&2 + num_fail=$(( num_fail + 1 )) + fi + num_total=$(( num_total + 1 )) +done + +echo "${num_ok} out of ${num_total} records are okay (${num_fail} failed)" +exit "${num_fail}"