[cli/core/downloader/utils] Add download reordering optimization

This is an optimization that aims to fix issues with some titles
such as World War Z that have lots of duplicated files resulting
in a very high runtime cache requirement.

The basic idea is to group files that share lots of chunks together
so the data can be removed from the cache sooner.

For most games this has little to no effect. For some titles with heavy
duplication it can reduce the RAM usage significantly however. For
instance the RAM requirements for World War Z are reduced from 5.3 GiB
to 499 MiB.

Partially fixes #17
This commit is contained in:
derrod 2020-05-04 13:59:04 +02:00
parent 3f27ea0296
commit 69383c4788
3 changed files with 74 additions and 13 deletions

View file

@ -274,6 +274,7 @@ class LegendaryCLI:
logger.info('Preparing download...') logger.info('Preparing download...')
# todo use status queue to print progress from CLI # todo use status queue to print progress from CLI
# This has become a little ridiculous hasn't it?
dlm, analysis, igame = self.core.prepare_download(game=game, base_game=base_game, base_path=args.base_path, dlm, analysis, igame = self.core.prepare_download(game=game, base_game=base_game, base_path=args.base_path,
force=args.force, max_shm=args.shared_memory, force=args.force, max_shm=args.shared_memory,
max_workers=args.max_workers, game_folder=args.game_folder, max_workers=args.max_workers, game_folder=args.game_folder,
@ -284,7 +285,8 @@ class LegendaryCLI:
platform_override=args.platform_override, platform_override=args.platform_override,
file_prefix_filter=args.file_prefix, file_prefix_filter=args.file_prefix,
file_exclude_filter=args.file_exclude_prefix, file_exclude_filter=args.file_exclude_prefix,
file_install_tag=args.install_tag) file_install_tag=args.install_tag,
dl_optimizations=args.order_opt)
# game is either up to date or hasn't changed, so we have nothing to do # game is either up to date or hasn't changed, so we have nothing to do
if not analysis.dl_size: if not analysis.dl_size:
@ -467,6 +469,8 @@ def main():
type=str, help='Exclude files starting with <prefix> (case insensitive)') type=str, help='Exclude files starting with <prefix> (case insensitive)')
install_parser.add_argument('--install-tag', dest='install_tag', action='store', metavar='<tag>', install_parser.add_argument('--install-tag', dest='install_tag', action='store', metavar='<tag>',
type=str, help='Only download files with the specified install tag (testing)') type=str, help='Only download files with the specified install tag (testing)')
install_parser.add_argument('--enable-reordering', dest='order_opt', action='store_true',
help='Enable reordering to attempt to optimize RAM usage during download')
launch_parser.add_argument('--offline', dest='offline', action='store_true', launch_parser.add_argument('--offline', dest='offline', action='store_true',
default=False, help='Skip login and launch game without online authentication') default=False, help='Skip login and launch game without online authentication')

View file

@ -25,6 +25,7 @@ from legendary.models.exceptions import *
from legendary.models.game import * from legendary.models.game import *
from legendary.models.json_manifest import JSONManifest from legendary.models.json_manifest import JSONManifest
from legendary.models.manifest import Manifest, ManifestMeta from legendary.models.manifest import Manifest, ManifestMeta
from legendary.utils.game_workarounds import is_opt_enabled
# ToDo: instead of true/false return values for success/failure actually raise an exception that the CLI/GUI # ToDo: instead of true/false return values for success/failure actually raise an exception that the CLI/GUI
@ -361,8 +362,8 @@ class LegendaryCore:
game_folder: str = '', override_manifest: str = '', game_folder: str = '', override_manifest: str = '',
override_old_manifest: str = '', override_base_url: str = '', override_old_manifest: str = '', override_base_url: str = '',
platform_override: str = '', file_prefix_filter: str = '', platform_override: str = '', file_prefix_filter: str = '',
file_exclude_filter: str = '', file_install_tag: str = '' file_exclude_filter: str = '', file_install_tag: str = '',
) -> (DLManager, AnalysisResult, ManifestMeta): dl_optimizations: bool = False) -> (DLManager, AnalysisResult, ManifestMeta):
# load old manifest # load old manifest
old_manifest = None old_manifest = None
@ -408,10 +409,6 @@ class LegendaryCore:
install_path = os.path.join(base_path, game_folder) install_path = os.path.join(base_path, game_folder)
# todo move this somewhere else so the directory only gets created once the download is started
if not os.path.exists(install_path):
os.makedirs(install_path)
self.log.info(f'Install path: {install_path}') self.log.info(f'Install path: {install_path}')
if not force: if not force:
@ -432,13 +429,20 @@ class LegendaryCore:
if not max_shm: if not max_shm:
max_shm = self.lgd.config.getint('Legendary', 'max_memory', fallback=1024) max_shm = self.lgd.config.getint('Legendary', 'max_memory', fallback=1024)
if dl_optimizations or is_opt_enabled(game.app_name):
self.log.info('Download order optimizations are enabled.')
process_opt = True
else:
process_opt = False
dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q, dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q,
max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers) max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers)
anlres = dlm.run_analysis(manifest=new_manifest, old_manifest=old_manifest, anlres = dlm.run_analysis(manifest=new_manifest, old_manifest=old_manifest,
patch=not disable_patching, resume=not force, patch=not disable_patching, resume=not force,
file_prefix_filter=file_prefix_filter, file_prefix_filter=file_prefix_filter,
file_exclude_filter=file_exclude_filter, file_exclude_filter=file_exclude_filter,
file_install_tag=file_install_tag) file_install_tag=file_install_tag,
processing_optimization=process_opt)
prereq = None prereq = None
if new_manifest.meta.prereq_ids: if new_manifest.meta.prereq_ids:

View file

@ -242,7 +242,8 @@ class DLManager(Process):
def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None, def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
patch=True, resume=True, file_prefix_filter=None, patch=True, resume=True, file_prefix_filter=None,
file_exclude_filter=None, file_install_tag=None) -> AnalysisResult: file_exclude_filter=None, file_install_tag=None,
processing_optimization=False) -> AnalysisResult:
""" """
Run analysis on manifest and old manifest (if not None) and return a result Run analysis on manifest and old manifest (if not None) and return a result
with a summary resources required in order to install the provided manifest. with a summary resources required in order to install the provided manifest.
@ -253,6 +254,8 @@ class DLManager(Process):
:param resume: Continue based on resume file if it exists :param resume: Continue based on resume file if it exists
:param file_prefix_filter: Only download files that start with this prefix :param file_prefix_filter: Only download files that start with this prefix
:param file_exclude_filter: Exclude files with this prefix from download :param file_exclude_filter: Exclude files with this prefix from download
:param file_install_tag: Only install files with the specified tag
:param processing_optimization: Attempt to optimize processing order and RAM usage
:return: AnalysisResult :return: AnalysisResult
""" """
@ -324,9 +327,19 @@ class DLManager(Process):
analysis_res.unchanged = len(mc.unchanged) analysis_res.unchanged = len(mc.unchanged)
self.log.debug(f'{analysis_res.unchanged} unchanged files') self.log.debug(f'{analysis_res.unchanged} unchanged files')
if processing_optimization and len(manifest.file_manifest_list.elements) > 8_000:
self.log.warning('Manifest contains too many files, processing optimizations will be disabled.')
processing_optimization = False
elif processing_optimization:
self.log.info('Processing order optimization is enabled, analysis may take a few seconds longer...')
# count references to chunks for determining runtime cache size later # count references to chunks for determining runtime cache size later
references = Counter() references = Counter()
for fm in manifest.file_manifest_list.elements: file_to_chunks = defaultdict(set)
fmlist = sorted(manifest.file_manifest_list.elements,
key=lambda a: a.filename.lower())
for fm in fmlist:
# chunks of unchanged files are not downloaded so we can skip them # chunks of unchanged files are not downloaded so we can skip them
if fm.filename in mc.unchanged: if fm.filename in mc.unchanged:
analysis_res.unchanged += fm.file_size analysis_res.unchanged += fm.file_size
@ -334,6 +347,46 @@ class DLManager(Process):
for cp in fm.chunk_parts: for cp in fm.chunk_parts:
references[cp.guid_num] += 1 references[cp.guid_num] += 1
if processing_optimization:
file_to_chunks[fm.filename].add(cp.guid_num)
if processing_optimization:
# reorder the file manifest list to group files that share many chunks
# 5 is mostly arbitrary but has shown in testing to be a good choice
min_overlap = 5
# enumerate the file list to try and find a "partner" for
# each file that shares the most chunks with it.
partners = dict()
filenames = [fm.filename for fm in fmlist]
for num, filename in enumerate(filenames[:int((len(filenames)+1)/2)]):
chunks = file_to_chunks[filename]
max_overlap = min_overlap
for other_file in filenames[num+1:]:
overlap = len(chunks & file_to_chunks[other_file])
if overlap > max_overlap:
partners[filename] = other_file
max_overlap = overlap
# iterate over all the files again and this time around
_fmlist = []
processed = set()
for fm in fmlist:
if fm.filename in processed:
continue
_fmlist.append(fm)
processed.add(fm.filename)
# try to find the file's "partner"
partner = partners.get(fm.filename, None)
if not partner or partner in processed:
continue
partner_fm = manifest.file_manifest_list.get_file_by_path(partner)
_fmlist.append(partner_fm)
processed.add(partner)
fmlist = _fmlist
# determine reusable chunks and prepare lookup table for reusable ones # determine reusable chunks and prepare lookup table for reusable ones
re_usable = defaultdict(dict) re_usable = defaultdict(dict)
@ -367,8 +420,7 @@ class DLManager(Process):
# run through the list of files and create the download jobs and also determine minimum # run through the list of files and create the download jobs and also determine minimum
# runtime cache requirement by simulating adding/removing from cache during download. # runtime cache requirement by simulating adding/removing from cache during download.
self.log.debug('Creating filetasks and chunktasks...') self.log.debug('Creating filetasks and chunktasks...')
for current_file in sorted(manifest.file_manifest_list.elements, for current_file in fmlist:
key=lambda a: a.filename.lower()):
# skip unchanged and empty files # skip unchanged and empty files
if current_file.filename in mc.unchanged: if current_file.filename in mc.unchanged:
continue continue
@ -440,7 +492,8 @@ class DLManager(Process):
if analysis_res.min_memory > self.max_shared_memory: if analysis_res.min_memory > self.max_shared_memory:
shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB' shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB'
required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01f} MiB' required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01f} MiB'
raise MemoryError(f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}') raise MemoryError(f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}. '
f'Try running legendary with the --enable-reordering flag to reduce memory usage.')
# calculate actual dl and patch write size. # calculate actual dl and patch write size.
analysis_res.dl_size = \ analysis_res.dl_size = \