From 4673ad6c89bbdca632b22edfc2ef35486b7a635b Mon Sep 17 00:00:00 2001 From: Jelle van der Waa Date: Sat, 1 Jul 2023 15:21:32 +0200 Subject: feat(search): add subcommand to search across the packaging group Search for an expression across the GitLab packaging group. To use a filter, include it in your query. You may use wildcards (*) to use glob matching. Available filters for the blobs scope: path, extension. Every usage of the search command must be authenticated. Consult the 'pkgctl auth' command to authenticate with GitLab or view the authentication status. This command uses bats for pretty printing the results including line numbers and syntax highlighting. Component: pkgctl search Co-authored-by: Christian Heusel Co-authored-by: Levente Polyak --- README.md | 1 + contrib/completion/bash/devtools.in | 9 ++ contrib/completion/zsh/_devtools.in | 8 ++ doc/man/pkgctl-search.1.asciidoc | 58 ++++++++++ doc/man/pkgctl.1.asciidoc | 4 + src/lib/api/gitlab.sh | 174 ++++++++++++++++++++++++++-- src/lib/cache.sh | 22 ++++ src/lib/search.sh | 221 ++++++++++++++++++++++++++++++++++++ src/pkgctl.in | 9 ++ 9 files changed, 499 insertions(+), 7 deletions(-) create mode 100644 doc/man/pkgctl-search.1.asciidoc create mode 100644 src/lib/cache.sh create mode 100644 src/lib/search.sh diff --git a/README.md b/README.md index a1b6d42..6c36a37 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ Component: pkgctl db remove - arch-install-scripts - awk - bash +- bats - binutils - coreutils - diffutils diff --git a/contrib/completion/bash/devtools.in b/contrib/completion/bash/devtools.in index b974257..155bb7e 100644 --- a/contrib/completion/bash/devtools.in +++ b/contrib/completion/bash/devtools.in @@ -139,6 +139,7 @@ _pkgctl_cmds=( diff release repo + search version ) _pkgctl_args=( @@ -331,6 +332,14 @@ _pkgctl_repo_web_args=( _pkgctl_repo_web_opts() { _filedir -d; } +_pkgctl_search_args=( + --json + --no-default-filter + -h --help +) +_pkgctl_search_opts() { :; } + + _pkgctl_diff_args=( -l --list -d --diffoscope diff --git a/contrib/completion/zsh/_devtools.in b/contrib/completion/zsh/_devtools.in index 35ab2dc..120b47a 100644 --- a/contrib/completion/zsh/_devtools.in +++ b/contrib/completion/zsh/_devtools.in @@ -139,6 +139,13 @@ _pkgctl_repo_web_args=( '*:git_dir:_files -/' ) +_pkgctl_search_args=( + '--json[Enable printing results in JSON]' + '--no-default-filter[Do not apply default filter (like -path:keys/pgp/*.asc)]' + '(-h --help)'{-h,--help}'[Display usage]' + '1:query' +) + _arch_nspawn_args=( '-C[Location of a pacman config file]:pacman_config:_files -g "*.conf(.)"' '-M[Location of a makepkg config file]:makepkg_config:_files -g "*.conf(.)"' @@ -252,6 +259,7 @@ _pkgctl_cmds=( "diff[Compare package files using different modes]" "release[Release step to commit, tag and upload build artifacts]" "repo[Manage Git packaging repositories and their configuration]" + "search[Search for an expression across the GitLab packaging group]" "version[Show pkgctl version information]" ) diff --git a/doc/man/pkgctl-search.1.asciidoc b/doc/man/pkgctl-search.1.asciidoc new file mode 100644 index 0000000..fb79b88 --- /dev/null +++ b/doc/man/pkgctl-search.1.asciidoc @@ -0,0 +1,58 @@ +pkgctl-search(1) +================ + +Name +---- +pkgctl-search - Search for an expression across the GitLab packaging group + +Synopsis +-------- +pkgctl search [OPTIONS] QUERY + +Description +----------- + +Search for an expression across the GitLab packaging group. + +To use a filter, include it in your query. You may use wildcards (*) to +use glob matching. + +Available filters for the blobs scope: path, extension + +Every usage of the search command must be authenticated. Consult the +'pkgctl auth' command to authenticate with GitLab or view the authentication +status. + +Search Tips +----------- + + Syntax Description Example + ─────────────────────────────────────── + " Exact search "gem sidekiq" + ~ Fuzzy search J~ Doe + | Or display | banner + + And display +banner + - Exclude display -banner + * Partial bug error 50* + \ Escape \*md + # Issue ID #23456 + ! Merge request !23456 + +Options +------- + +*--json*:: + Enable printing results in JSON + +*--no-default-filter*:: + Do not apply default filter (like -path:keys/pgp/*.asc) + +*-h, --help*:: + Show a help text + +See Also +-------- + +linkman:pkgctl-auth[1] + +include::include/footer.asciidoc[] diff --git a/doc/man/pkgctl.1.asciidoc b/doc/man/pkgctl.1.asciidoc index 74edf68..1164561 100644 --- a/doc/man/pkgctl.1.asciidoc +++ b/doc/man/pkgctl.1.asciidoc @@ -44,6 +44,9 @@ pkgctl release:: pkgctl repo:: Manage Git packaging repositories and their configuration +pkgctl search:: + Search for an expression across the GitLab packaging group + pkgctl version:: Show pkgctl version information @@ -56,6 +59,7 @@ linkman:pkgctl-db[1] linkman:pkgctl-diff[1] linkman:pkgctl-release[1] linkman:pkgctl-repo[1] +linkman:pkgctl-search[1] linkman:pkgctl-version[1] include::include/footer.asciidoc[] diff --git a/src/lib/api/gitlab.sh b/src/lib/api/gitlab.sh index e5f4237..e4b8a9d 100644 --- a/src/lib/api/gitlab.sh +++ b/src/lib/api/gitlab.sh @@ -13,13 +13,63 @@ source "${_DEVTOOLS_LIBRARY_DIR}"/lib/config.sh set -e +graphql_api_call() { + local outfile=$1 + local request=$2 + local node_type=$3 + local data=$4 + local hasNextPage cursor + + # empty token + if [[ -z "${GITLAB_TOKEN}" ]]; then + msg_error " api call failed: No token provided" + return 1 + fi + + [[ -z ${WORKDIR:-} ]] && setup_workdir + api_workdir=$(mktemp --tmpdir="${WORKDIR}" --directory pkgctl-gitlab-api.XXXXXXXXXX) + + # normalize graphql data and prepare query + data="${data//\"/\\\"}" + data='{ + "query": "'"${data}"'" + }' + data="${data//$'\t'/ }" + data="${data//$'\n'/}" + + cursor="" + hasNextPage=true + while [[ ${hasNextPage} == true ]]; do + data=$(sed -E 's|after: \\"[a-zA-Z0-9]*\\"|after: \\"'"${cursor}"'\\"|' <<< "${data}") + result="${api_workdir}/result.${cursor}" + + if ! curl --request "${request}" \ + --url "https://${GITLAB_HOST}/api/graphql" \ + --header "Authorization: Bearer ${GITLAB_TOKEN}" \ + --header "Content-Type: application/json" \ + --data "${data}" \ + --output "${result}" \ + --silent; then + msg_error " api call failed: $(cat "${outfile}")" + return 1 + fi + + hasNextPage=$(jq --raw-output ".data | .${node_type} | .pageInfo | .hasNextPage" < "${result}") + cursor=$(jq --raw-output ".data | .${node_type} | .pageInfo | .endCursor" < "${result}") + + cp "${result}" "${api_workdir}/tmp" + jq ".data.${node_type}.nodes" "${api_workdir}/tmp" > "${result}" + done + + jq --slurp add "${api_workdir}"/result.* > "${outfile}" + return 0 +} gitlab_api_call() { local outfile=$1 local request=$2 local endpoint=$3 local data=${4:-} - local error # empty token if [[ -z "${GITLAB_TOKEN}" ]]; then @@ -38,27 +88,102 @@ gitlab_api_call() { return 1 fi + if ! gitlab_check_api_errors "${outfile}"; then + return 1 + fi + + return 0 +} + +gitlab_api_call_paged() { + local outfile=$1 + local request=$2 + local endpoint=$3 + local data=${4:-} + local result header + + # empty token + if [[ -z "${GITLAB_TOKEN}" ]]; then + msg_error " api call failed: No token provided" + return 1 + fi + + [[ -z ${WORKDIR:-} ]] && setup_workdir + api_workdir=$(mktemp --tmpdir="${WORKDIR}" --directory pkgctl-gitlab-api.XXXXXXXXXX) + + next_page=1 + while [[ -n "${next_page}" ]]; do + result="${api_workdir}/result.${next_page}" + header="${api_workdir}/header" + if ! curl --request "${request}" \ + --get \ + --url "https://${GITLAB_HOST}/api/v4/${endpoint}&per_page=100&page=${next_page}" \ + --header "PRIVATE-TOKEN: ${GITLAB_TOKEN}" \ + --header "Content-Type: application/json" \ + --data-urlencode "${data}" \ + --dump-header "${header}" \ + --output "${result}" \ + --silent; then + msg_error " api call failed: $(cat "${result}")" + return 1 + fi + + if ! gitlab_check_api_errors "${result}"; then + return 1 + fi + + next_page=$(grep "x-next-page" "${header}" | tr -d '\r' | awk '{ print $2 }') + done + + jq --slurp add "${api_workdir}"/result.* > "${outfile}" + return 0 +} + +gitlab_check_api_errors() { + local file=$1 + local error + + # search API only returns an array, no errors + if [[ $(jq --raw-output 'type' < "${file}") == "array" ]]; then + return 0 + fi + # check for general purpose api error - if error=$(jq --raw-output --exit-status '.error' < "${outfile}"); then + if error=$(jq --raw-output --exit-status '.error' < "${file}"); then msg_error " api call failed: ${error}" return 1 fi # check for api specific error messages - if ! jq --raw-output --exit-status '.id' < "${outfile}" >/dev/null; then - if jq --raw-output --exit-status '.message | keys[]' < "${outfile}" &>/dev/null; then + if ! jq --raw-output --exit-status '.id' < "${file}" >/dev/null; then + if jq --raw-output --exit-status '.message | keys[]' < "${file}" &>/dev/null; then while read -r error; do msg_error " api call failed: ${error}" - done < <(jq --raw-output --exit-status '.message|to_entries|map("\(.key) \(.value[])")[]' < "${outfile}") - elif error=$(jq --raw-output --exit-status '.message' < "${outfile}"); then + done < <(jq --raw-output --exit-status '.message|to_entries|map("\(.key) \(.value[])")[]' < "${file}") + elif error=$(jq --raw-output --exit-status '.message' < "${file}"); then msg_error " api call failed: ${error}" fi return 1 fi - return 0 } +graphql_check_api_errors() { + local file=$1 + local error + + # early exit if we do not have errors + if ! jq --raw-output --exit-status '.errors[]' < "${file}" &>/dev/null; then + return 0 + fi + + # check for api specific error messages + while read -r error; do + msg_error " api call failed: ${error}" + done < <(jq --raw-output --exit-status '.errors[].message' < "${file}") + return 1 +} + gitlab_api_get_user() { local outfile username @@ -81,6 +206,23 @@ gitlab_api_get_user() { return 0 } +gitlab_api_get_project_name_mapping() { + local query=$1 + local outfile + + [[ -z ${WORKDIR:-} ]] && setup_workdir + outfile=$(mktemp --tmpdir="${WORKDIR}" pkgctl-gitlab-api.XXXXXXXXXX) + + # query user details + if ! graphql_api_call "${outfile}" POST projects "${query}"; then + msg_warn " Invalid token provided?" + exit 1 + fi + + cat "${outfile}" + return 0 +} + # Convert arbitrary project names to GitLab valid path names. # # GitLab has several limitations on project and group names and also maintains @@ -130,3 +272,21 @@ gitlab_api_create_project() { printf "%s" "${path}" return 0 } + +# TODO: parallelize +# https://docs.gitlab.com/ee/api/search.html#scope-blobs +gitlab_api_search() { + local search=$1 + local outfile + + [[ -z ${WORKDIR:-} ]] && setup_workdir + outfile=$(mktemp --tmpdir="${WORKDIR}" pkgctl-gitlab-api.XXXXXXXXXX) + + if ! gitlab_api_call_paged "${outfile}" GET "/groups/archlinux%2fpackaging%2fpackages/search?scope=blobs" "search=${search}"; then + return 1 + fi + + cat "${outfile}" + + return 0 +} diff --git a/src/lib/cache.sh b/src/lib/cache.sh new file mode 100644 index 0000000..24056fa --- /dev/null +++ b/src/lib/cache.sh @@ -0,0 +1,22 @@ +#!/hint/bash +# +# SPDX-License-Identifier: GPL-3.0-or-later + +[[ -z ${DEVTOOLS_INCLUDE_CACHE_SH:-} ]] || return 0 +DEVTOOLS_INCLUDE_CACHE_SH=1 + +set -e + +readonly XDG_DEVTOOLS_CACHE_DIR="${XDG_CACHE_HOME:-$HOME/.cache}/devtools" + +get_cache_file() { + local filename=$1 + local path="${XDG_DEVTOOLS_CACHE_DIR}/${filename}" + + mkdir --parents -- "$(dirname -- "$path")" + if [[ ! -f ${path} ]]; then + touch -- "${path}" + fi + + printf '%s' "${path}" +} diff --git a/src/lib/search.sh b/src/lib/search.sh new file mode 100644 index 0000000..cf64db3 --- /dev/null +++ b/src/lib/search.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-3.0-or-later + +[[ -z ${DEVTOOLS_INCLUDE_SEARCH_SH:-} ]] || return 0 +DEVTOOLS_INCLUDE_SEARCH_SH=1 + +_DEVTOOLS_LIBRARY_DIR=${_DEVTOOLS_LIBRARY_DIR:-@pkgdatadir@} +# shellcheck source=src/lib/common.sh +source "${_DEVTOOLS_LIBRARY_DIR}"/lib/common.sh +# shellcheck source=src/lib/cache.sh +source "${_DEVTOOLS_LIBRARY_DIR}"/lib/cache.sh +# shellcheck source=src/lib/api/gitlab.sh +source "${_DEVTOOLS_LIBRARY_DIR}"/lib/api/gitlab.sh + +source /usr/share/makepkg/util/message.sh + +set -eo pipefail + + +pkgctl_search_usage() { + local -r COMMAND=${_DEVTOOLS_COMMAND:-${BASH_SOURCE[0]##*/}} + cat <<- _EOF_ + Usage: ${COMMAND} [OPTIONS] QUERY + + Search for an expression across the GitLab packaging group. + + To use a filter, include it in your query. You may use wildcards (*) to + use glob matching. + + Available filters for the blobs scope: path, extension + + Every usage of the search command must be authenticated. Consult the + 'pkgctl auth' command to authenticate with GitLab or view the + authentication status. + + SEARCH TIPS + Syntax Description Example + ─────────────────────────────────────── + " Exact search "gem sidekiq" + ~ Fuzzy search J~ Doe + | Or display | banner + + And display +banner + - Exclude display -banner + * Partial bug error 50* + \\ Escape \\*md + # Issue ID #23456 + ! Merge request !23456 + + OPTIONS + --json Enable printing results in JSON + --no-default-filter Do not apply default filter (like -path:keys/pgp/*.asc) + -h, --help Show this help text + + EXAMPLES + $ ${COMMAND} linux + $ ${COMMAND} '"pytest -v" +PYTHONPATH' +_EOF_ +} + +pkgctl_search() { + if (( $# < 1 )); then + pkgctl_search_usage + exit 0 + fi + + # options + local search + local formatter=pretty + local use_default_filter=1 + + # variables + local default_filter="-path:keys/pgp/*.asc" + local graphql_lookup_batch=200 + local output result query entries from until length + local project_name_cache_file project_name_lookup project_ids project_id project_name project_slice + local mapping_output path startline data + + while (( $# )); do + case $1 in + -h|--help) + pkgctl_search_usage + exit 0 + ;; + --json) + formatter=json + shift + ;; + --no-default-filter) + use_default_filter=0 + shift + ;; + --) + shift + break + ;; + -*) + die "invalid argument: %s" "$1" + ;; + *) + break + ;; + esac + done + + if (( $# == 0 )); then + pkgctl_search_usage + exit 1 + fi + + # assign search parameter + search="${*}" + if (( use_default_filter )); then + search+=" ${default_filter}" + fi + + stat_busy "Querying gitlab search api" + output=$(gitlab_api_search "${search}") + stat_done + + project_name_cache_file=$(get_cache_file gitlab/project_id_to_name) + lock 11 "${project_name_cache_file}" "Locking project name cache" + mapfile -t project_ids < <( + jq --raw-output '[.[].project_id] | unique[]' <<< "${output}" | \ + grep --invert-match --file <(awk '{ print $1 }' < "${project_name_cache_file}" )) + + stat_busy "Querying project names" + local entries="${#project_ids[@]}" + local until=0 + while (( until < entries )); do + from=${until} + until=$(( until + graphql_lookup_batch )) + if (( until > entries )); then + until=${entries} + fi + length=$(( until - from )) + + project_slice=("${project_ids[@]:${from}:${length}}") + printf -v projects '"gid://gitlab/Project/%s",' "${project_slice[@]}" + query='{ + projects(after: "" ids: ['"${projects}"']) { + pageInfo { + startCursor + endCursor + hasNextPage + } + nodes { + id + name + } + } + }' + mapping_output=$(gitlab_api_get_project_name_mapping "${query}") + + # update cache + while read -r project_id project_name; do + printf "%s %s\n" "${project_id}" "${project_name}" >> "${project_name_cache_file}" + done < <(jq --raw-output \ + '.[] | "\(.id | rindex("/") as $lastSlash | .[$lastSlash+1:]) \(.name)"' \ + <<< "${mapping_output}") + done + stat_done + + # read project_id to name mapping from cache + declare -A project_name_lookup=() + while read -r project_id project_name; do + project_name_lookup[${project_id}]=${project_name} + done < "${project_name_cache_file}" + + # close project name cache lock + lock_close 11 + + # output mode JSON + if [[ ${formatter} == json ]]; then + jq --from-file <( + for project_id in $(jq '.[].project_id' <<< "${output}"); do + project_name=${project_name_lookup[${project_id}]} + printf 'map(if .project_id == %s then . + {"project_name": "%s"} else . end) | ' \ + "${project_id}" "${project_name}" + done + printf . + ) <<< "${output}" + exit 0 + fi + + # pretty print each result + while read -r result; do + # read properties from search result + mapfile -t data < <(jq --raw-output ".data" <<< "${result}") + { read -r project_id; read -r path; read -r startline; } < <( + jq --raw-output ".project_id, .path, .startline" <<< "${result}" + ) + project_name=${project_name_lookup[${project_id}]} + + # remove trailing newline for multiline results + if (( ${#data[@]} > 1 )) && [[ ${data[-1]} == "" ]]; then + unset "data[${#data[@]}-1]" + fi + + # prepend empty lines to match startline + if (( startline > 1 )); then + mapfile -t data < <( + printf '%.0s\n' $(seq 1 "$(( startline - 1 ))") + printf "%s\n" "${data[@]}" + ) + fi + + bat \ + --file-name="${project_name}/${path}" \ + --line-range "${startline}:" \ + --paging=never \ + --force-colorization \ + --map-syntax "PKGBUILD:Bourne Again Shell (bash)" \ + --map-syntax ".SRCINFO:INI" \ + --map-syntax "*install:Bourne Again Shell (bash)" \ + --map-syntax "*sysusers*:Bourne Again Shell (bash)" \ + --map-syntax "*tmpfiles*:Bourne Again Shell (bash)" \ + --map-syntax "*.hook:INI" \ + <(printf "%s\n" "${data[@]}") + done < <(jq --compact-output '.[]' <<< "${output}") +} diff --git a/src/pkgctl.in b/src/pkgctl.in index ad215ac..10a2348 100644 --- a/src/pkgctl.in +++ b/src/pkgctl.in @@ -25,6 +25,7 @@ usage() { diff Compare package files using different modes release Release step to commit, tag and upload build artifacts repo Manage Git packaging repositories and their configuration + search Search for an expression across the GitLab packaging group version Show pkgctl version information OPTIONS @@ -96,6 +97,14 @@ while (( $# )); do pkgctl_release "$@" exit 0 ;; + search) + _DEVTOOLS_COMMAND+=" $1" + shift + # shellcheck source=src/lib/release.sh + source "${_DEVTOOLS_LIBRARY_DIR}"/lib/search.sh + pkgctl_search "$@" + exit 0 + ;; version|--version|-V) _DEVTOOLS_COMMAND+=" $1" shift -- cgit v1.2.3-70-g09d2