From f8df4cf6bb9f37f800fbbb089163f7edd0703703 Mon Sep 17 00:00:00 2001 From: Rob Taylor Date: Wed, 11 Mar 2026 02:44:12 +0000 Subject: [PATCH] fix: Fix dpkg registration and tar permissions on cache restore After cache restore, dpkg had no record of the installed packages because: 1. Only preinst/postinst scripts were cached from /var/lib/dpkg/info/, missing .list, .md5sums, .conffiles, and other metadata files 2. The dpkg status database (/var/lib/dpkg/status) was never updated This meant dpkg -s, apt list --installed, and anything checking package state would not see the restored packages. Fix: - Cache all /var/lib/dpkg/info/.* files (not just install scripts) - Save each package's dpkg status entry to a .dpkg-status file - On restore, append status entries to /var/lib/dpkg/status (skipping packages that are already registered) Additionally: - Include directories in tar archives so that tar preserves their ownership and permissions on restore (prevents 0077 umask issues on GPU runners) - Include architecture qualifier (e.g., :i386) from apt's Unpacking log in get_installed_packages, so multi-arch variants get separate cache entries instead of being deduplicated - When registering restored packages with dpkg, compare cached vs installed versions and handle upgrades by replacing the old status entry Co-developed-by: Claude Code v2.1.58 (claude-opus-4-6) --- install_and_cache_pkgs.sh | 27 +++++++++++----- lib.sh | 8 ++--- restore_pkgs.sh | 68 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 90 insertions(+), 13 deletions(-) diff --git a/install_and_cache_pkgs.sh b/install_and_cache_pkgs.sh index 1a544ad..70636ac 100755 --- a/install_and_cache_pkgs.sh +++ b/install_and_cache_pkgs.sh @@ -101,24 +101,35 @@ for installed_package in ${installed_packages}; do read package_name package_ver < <(get_package_name_ver "${installed_package}") log " * Caching ${package_name} to ${cache_filepath}..." - # Pipe all package files (no folders), including symlinks, their targets, and installation control data to Tar. - tar -cf "${cache_filepath}" -C / --verbatim-files-from --files-from <( - { dpkg -L "${package_name}" && - get_install_script_filepath "" "${package_name}" "preinst" && - get_install_script_filepath "" "${package_name}" "postinst" ; } | + # Pipe all package files, directories, and symlinks (plus symlink targets + # and dpkg metadata) to Tar. Directories are included so that tar + # preserves their ownership and permissions on restore — without them, + # tar auto-creates parent directories using the current umask, which on + # some runners (e.g. GPU-optimized images) defaults to 0077, leaving + # restored trees inaccessible to non-root users. + tar -cf "${cache_filepath}" -C / --no-recursion --verbatim-files-from --files-from <( + { dpkg -L "${package_name}" | grep -vxF -e '/.' -e '.' -e '/' && + # Include all dpkg info files for this package (list, md5sums, + # conffiles, triggers, preinst, postinst, prerm, postrm, etc.) + # so dpkg recognizes the package after cache restore. + ls -1 /var/lib/dpkg/info/${package_name}.* 2>/dev/null && + ls -1 /var/lib/dpkg/info/${package_name}:*.* 2>/dev/null ; } | while IFS= read -r f; do - if test -f "${f}" -o -L "${f}"; then - get_tar_relpath "${f}" + if [ -f "${f}" ] || [ -L "${f}" ] || [ -d "${f}" ]; then + echo "${f#/}" if [ -L "${f}" ]; then target="$(readlink -f "${f}")" if [ -f "${target}" ]; then - get_tar_relpath "${target}" + echo "${target#/}" fi fi fi done ) + # Save the dpkg status entry so we can register the package on restore. + dpkg -s "${package_name}" > "${cache_dir}/${installed_package}.dpkg-status" 2>/dev/null || true + log " done (compressed size $(du -h "${cache_filepath}" | cut -f1))." fi diff --git a/lib.sh b/lib.sh index 755d939..b0cb54a 100755 --- a/lib.sh +++ b/lib.sh @@ -54,14 +54,14 @@ function get_install_script_filepath { # The list of colon delimited action syntax pairs with each pair equals # delimited. : :... ############################################################################### -function get_installed_packages { +function get_installed_packages { local install_log_filepath="${1}" - local regex="^Unpacking ([^ :]+)([^ ]+)? (\[[^ ]+\]\s)?\(([^ )]+)" - local dep_packages="" + local regex="^Unpacking ([^ :]+)([^ ]+)? (\[[^ ]+\]\s)?\(([^ )]+)" + local dep_packages="" while read -r line; do # ${regex} should be unquoted since it isn't a literal. if [[ "${line}" =~ ${regex} ]]; then - dep_packages="${dep_packages}${BASH_REMATCH[1]}=${BASH_REMATCH[4]} " + dep_packages="${dep_packages}${BASH_REMATCH[1]}${BASH_REMATCH[2]}=${BASH_REMATCH[4]} " else log_err "Unable to parse package name and version from \"${line}\"" exit 2 diff --git a/restore_pkgs.sh b/restore_pkgs.sh index 4556265..b336bc7 100755 --- a/restore_pkgs.sh +++ b/restore_pkgs.sh @@ -50,7 +50,7 @@ for cached_filepath in ${cached_filepaths}; do sudo tar -xf "${cached_filepath}" -C "${cache_restore_root}" > /dev/null log " done" - # Execute install scripts if available. + # Execute install scripts if available. if test ${execute_install_scripts} == "true"; then # May have to add more handling for extracting pre-install script before extracting all files. # Keeping it simple for now. @@ -59,3 +59,69 @@ for cached_filepath in ${cached_filepaths}; do fi done log "done" + +log_empty_line + +# Register packages with dpkg so they appear as installed. +# The tar extraction restores dpkg info files (list, md5sums, etc.) but the +# main status database (/var/lib/dpkg/status) also needs updating. +dpkg_status_dir="${cache_dir}" +status_files=$(ls -1 "${dpkg_status_dir}"/*.dpkg-status 2>/dev/null || true) +if test -n "${status_files}"; then + log "Registering restored packages with dpkg..." + dpkg_status_path="${cache_restore_root}var/lib/dpkg/status" + for status_file in ${status_files}; do + pkg_name=$(grep '^Package:' "${status_file}" | head -1 | sed 's/^Package: //') + cached_ver=$(grep '^Version:' "${status_file}" | head -1 | sed 's/^Version: //') + cached_arch=$(grep '^Architecture:' "${status_file}" | head -1 | sed 's/^Architecture: //') + + # Build architecture-qualified name for dpkg queries. + dpkg_query_name="${pkg_name}" + if [ -n "${cached_arch}" ] && [ "${cached_arch}" != "all" ]; then + dpkg_query_name="${pkg_name}:${cached_arch}" + fi + + if dpkg -s "${dpkg_query_name}" > /dev/null 2>&1; then + existing_status=$(dpkg -s "${dpkg_query_name}" 2>/dev/null | grep '^Status:' | head -1) + existing_ver=$(dpkg -s "${dpkg_query_name}" 2>/dev/null | grep '^Version:' | head -1 | sed 's/^Version: //') + + if echo "${existing_status}" | grep -q 'install ok installed'; then + if [ "${existing_ver}" = "${cached_ver}" ]; then + log "- ${dpkg_query_name} already at version ${cached_ver}, skipping." + continue + fi + # Package is installed at a different version (was upgraded during + # the original install). Remove the old dpkg status entry so we can + # replace it with the cached (upgraded) version. + log "- ${dpkg_query_name} updating from ${existing_ver} to ${cached_ver}..." + sudo python3 -c " +import sys +pkg, arch, path = sys.argv[1], sys.argv[2], sys.argv[3] +with open(path, 'r') as f: + content = f.read() +entries = content.split('\n\n') +kept = [] +for entry in entries: + if not entry.strip(): + continue + lines = entry.strip().split('\n') + match_pkg = any(l == 'Package: ' + pkg for l in lines) + match_arch = any(l == 'Architecture: ' + arch for l in lines) + if match_pkg and match_arch: + continue + kept.append(entry.strip()) +with open(path, 'w') as f: + f.write('\n\n'.join(kept)) + if kept: + f.write('\n\n') +" "${pkg_name}" "${cached_arch}" "${dpkg_status_path}" + fi + fi + + # Append the status entry (with blank line separator) to the dpkg database. + echo "" | sudo tee -a "${dpkg_status_path}" > /dev/null + cat "${status_file}" | sudo tee -a "${dpkg_status_path}" > /dev/null + log "- ${dpkg_query_name} registered." + done + log "done" +fi