fix: Fix dpkg registration and tar permissions on cache restore

After cache restore, dpkg had no record of the installed packages because:
1. Only preinst/postinst scripts were cached from /var/lib/dpkg/info/,
   missing .list, .md5sums, .conffiles, and other metadata files
2. The dpkg status database (/var/lib/dpkg/status) was never updated

This meant dpkg -s, apt list --installed, and anything checking package
state would not see the restored packages.

Fix:
- Cache all /var/lib/dpkg/info/<package>.* files (not just install scripts)
- Save each package's dpkg status entry to a .dpkg-status file
- On restore, append status entries to /var/lib/dpkg/status (skipping
  packages that are already registered)

Additionally:
- Include directories in tar archives so that tar preserves their ownership
  and permissions on restore (prevents 0077 umask issues on GPU runners)
- Include architecture qualifier (e.g., :i386) from apt's Unpacking log
  in get_installed_packages, so multi-arch variants get separate cache
  entries instead of being deduplicated
- When registering restored packages with dpkg, compare cached vs installed
  versions and handle upgrades by replacing the old status entry

Co-developed-by: Claude Code v2.1.58 (claude-opus-4-6)
This commit is contained in:
Rob Taylor 2026-03-11 02:44:12 +00:00
parent acb598e5dd
commit f8df4cf6bb
3 changed files with 90 additions and 13 deletions

View file

@ -101,24 +101,35 @@ for installed_package in ${installed_packages}; do
read package_name package_ver < <(get_package_name_ver "${installed_package}")
log " * Caching ${package_name} to ${cache_filepath}..."
# Pipe all package files (no folders), including symlinks, their targets, and installation control data to Tar.
tar -cf "${cache_filepath}" -C / --verbatim-files-from --files-from <(
{ dpkg -L "${package_name}" &&
get_install_script_filepath "" "${package_name}" "preinst" &&
get_install_script_filepath "" "${package_name}" "postinst" ; } |
# Pipe all package files, directories, and symlinks (plus symlink targets
# and dpkg metadata) to Tar. Directories are included so that tar
# preserves their ownership and permissions on restore — without them,
# tar auto-creates parent directories using the current umask, which on
# some runners (e.g. GPU-optimized images) defaults to 0077, leaving
# restored trees inaccessible to non-root users.
tar -cf "${cache_filepath}" -C / --no-recursion --verbatim-files-from --files-from <(
{ dpkg -L "${package_name}" | grep -vxF -e '/.' -e '.' -e '/' &&
# Include all dpkg info files for this package (list, md5sums,
# conffiles, triggers, preinst, postinst, prerm, postrm, etc.)
# so dpkg recognizes the package after cache restore.
ls -1 /var/lib/dpkg/info/${package_name}.* 2>/dev/null &&
ls -1 /var/lib/dpkg/info/${package_name}:*.* 2>/dev/null ; } |
while IFS= read -r f; do
if test -f "${f}" -o -L "${f}"; then
get_tar_relpath "${f}"
if [ -f "${f}" ] || [ -L "${f}" ] || [ -d "${f}" ]; then
echo "${f#/}"
if [ -L "${f}" ]; then
target="$(readlink -f "${f}")"
if [ -f "${target}" ]; then
get_tar_relpath "${target}"
echo "${target#/}"
fi
fi
fi
done
)
# Save the dpkg status entry so we can register the package on restore.
dpkg -s "${package_name}" > "${cache_dir}/${installed_package}.dpkg-status" 2>/dev/null || true
log " done (compressed size $(du -h "${cache_filepath}" | cut -f1))."
fi

8
lib.sh
View file

@ -54,14 +54,14 @@ function get_install_script_filepath {
# The list of colon delimited action syntax pairs with each pair equals
# delimited. <name>:<version> <name>:<version>...
###############################################################################
function get_installed_packages {
function get_installed_packages {
local install_log_filepath="${1}"
local regex="^Unpacking ([^ :]+)([^ ]+)? (\[[^ ]+\]\s)?\(([^ )]+)"
local dep_packages=""
local regex="^Unpacking ([^ :]+)([^ ]+)? (\[[^ ]+\]\s)?\(([^ )]+)"
local dep_packages=""
while read -r line; do
# ${regex} should be unquoted since it isn't a literal.
if [[ "${line}" =~ ${regex} ]]; then
dep_packages="${dep_packages}${BASH_REMATCH[1]}=${BASH_REMATCH[4]} "
dep_packages="${dep_packages}${BASH_REMATCH[1]}${BASH_REMATCH[2]}=${BASH_REMATCH[4]} "
else
log_err "Unable to parse package name and version from \"${line}\""
exit 2

View file

@ -50,7 +50,7 @@ for cached_filepath in ${cached_filepaths}; do
sudo tar -xf "${cached_filepath}" -C "${cache_restore_root}" > /dev/null
log " done"
# Execute install scripts if available.
# Execute install scripts if available.
if test ${execute_install_scripts} == "true"; then
# May have to add more handling for extracting pre-install script before extracting all files.
# Keeping it simple for now.
@ -59,3 +59,69 @@ for cached_filepath in ${cached_filepaths}; do
fi
done
log "done"
log_empty_line
# Register packages with dpkg so they appear as installed.
# The tar extraction restores dpkg info files (list, md5sums, etc.) but the
# main status database (/var/lib/dpkg/status) also needs updating.
dpkg_status_dir="${cache_dir}"
status_files=$(ls -1 "${dpkg_status_dir}"/*.dpkg-status 2>/dev/null || true)
if test -n "${status_files}"; then
log "Registering restored packages with dpkg..."
dpkg_status_path="${cache_restore_root}var/lib/dpkg/status"
for status_file in ${status_files}; do
pkg_name=$(grep '^Package:' "${status_file}" | head -1 | sed 's/^Package: //')
cached_ver=$(grep '^Version:' "${status_file}" | head -1 | sed 's/^Version: //')
cached_arch=$(grep '^Architecture:' "${status_file}" | head -1 | sed 's/^Architecture: //')
# Build architecture-qualified name for dpkg queries.
dpkg_query_name="${pkg_name}"
if [ -n "${cached_arch}" ] && [ "${cached_arch}" != "all" ]; then
dpkg_query_name="${pkg_name}:${cached_arch}"
fi
if dpkg -s "${dpkg_query_name}" > /dev/null 2>&1; then
existing_status=$(dpkg -s "${dpkg_query_name}" 2>/dev/null | grep '^Status:' | head -1)
existing_ver=$(dpkg -s "${dpkg_query_name}" 2>/dev/null | grep '^Version:' | head -1 | sed 's/^Version: //')
if echo "${existing_status}" | grep -q 'install ok installed'; then
if [ "${existing_ver}" = "${cached_ver}" ]; then
log "- ${dpkg_query_name} already at version ${cached_ver}, skipping."
continue
fi
# Package is installed at a different version (was upgraded during
# the original install). Remove the old dpkg status entry so we can
# replace it with the cached (upgraded) version.
log "- ${dpkg_query_name} updating from ${existing_ver} to ${cached_ver}..."
sudo python3 -c "
import sys
pkg, arch, path = sys.argv[1], sys.argv[2], sys.argv[3]
with open(path, 'r') as f:
content = f.read()
entries = content.split('\n\n')
kept = []
for entry in entries:
if not entry.strip():
continue
lines = entry.strip().split('\n')
match_pkg = any(l == 'Package: ' + pkg for l in lines)
match_arch = any(l == 'Architecture: ' + arch for l in lines)
if match_pkg and match_arch:
continue
kept.append(entry.strip())
with open(path, 'w') as f:
f.write('\n\n'.join(kept))
if kept:
f.write('\n\n')
" "${pkg_name}" "${cached_arch}" "${dpkg_status_path}"
fi
fi
# Append the status entry (with blank line separator) to the dpkg database.
echo "" | sudo tee -a "${dpkg_status_path}" > /dev/null
cat "${status_file}" | sudo tee -a "${dpkg_status_path}" > /dev/null
log "- ${dpkg_query_name} registered."
done
log "done"
fi