From 455b6ff5ecb23dcc45fe511e57385dbe8e2197af Mon Sep 17 00:00:00 2001 From: Roman513 Date: Wed, 25 Dec 2024 17:02:28 +0400 Subject: [PATCH] Make reading from existing files optional with fallback, provide cmd parameter and config option --- README.md | 4 +++ legendary/cli.py | 3 +++ legendary/core.py | 30 +++++++++++++++++----- legendary/downloader/mp/manager.py | 40 ++++++++++++++++-------------- 4 files changed, 53 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 32fa4af..a5cedbd 100644 --- a/README.md +++ b/README.md @@ -459,6 +459,8 @@ optional arguments: --exclude Exclude files starting with (case insensitive) --install-tag Only download files with the specified install tag + --read-files Read duplicated parts from already saved files, do not + keep them in RAM --enable-reordering Enable reordering optimization to reduce RAM requirements during download (may have adverse results for some titles) @@ -670,6 +672,8 @@ log_level = debug max_memory = 2048 ; maximum number of worker processes when downloading (fewer workers will be slower, but also use less system resources) max_workers = 8 +; Enables reading duplicated data from files during download (decreases RAM usage but increases disk I/O) +read_files = false ; default install directory install_dir = /mnt/tank/games ; locale override, must be in RFC 1766 format (e.g. "en-US") diff --git a/legendary/cli.py b/legendary/cli.py index 9d8430d..0dbc33b 100644 --- a/legendary/cli.py +++ b/legendary/cli.py @@ -971,6 +971,7 @@ class LegendaryCLI: file_prefix_filter=args.file_prefix, file_exclude_filter=args.file_exclude_prefix, file_install_tag=args.install_tag, + read_files=args.read_files, dl_optimizations=args.order_opt, dl_timeout=args.dl_timeout, repair=args.repair_mode, @@ -2768,6 +2769,8 @@ def main(): type=str, help='Exclude files starting with (case insensitive)') install_parser.add_argument('--install-tag', dest='install_tag', action='append', metavar='', type=str, help='Only download files with the specified install tag') + install_parser.add_argument('--read-files', dest='read_files', action='store_true', + help='Read duplicated parts from already saved files, do not keep them in memory') install_parser.add_argument('--enable-reordering', dest='order_opt', action='store_true', help='Enable reordering optimization to reduce RAM requirements ' 'during download (may have adverse results for some titles)') diff --git a/legendary/core.py b/legendary/core.py index 04344e3..4e57475 100644 --- a/legendary/core.py +++ b/legendary/core.py @@ -1327,6 +1327,7 @@ class LegendaryCore: override_old_manifest: str = '', override_base_url: str = '', platform: str = 'Windows', file_prefix_filter: list = None, file_exclude_filter: list = None, file_install_tag: list = None, + read_files: bool = False, dl_optimizations: bool = False, dl_timeout: int = 10, repair: bool = False, repair_use_latest: bool = False, disable_delta: bool = False, override_delta_manifest: str = '', @@ -1487,6 +1488,9 @@ class LegendaryCore: if not max_shm: max_shm = self.lgd.config.getint('Legendary', 'max_memory', fallback=2048) + if not read_files: + read_files = self.lgd.config.getboolean('Legendary', 'read_files', fallback=False) + if dl_optimizations or is_opt_enabled(game.app_name, new_manifest.meta.build_version): self.log.info('Download order optimizations are enabled.') process_opt = True @@ -1499,12 +1503,26 @@ class LegendaryCore: dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q, max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers, dl_timeout=dl_timeout, bind_ip=bind_ip) - anlres = dlm.run_analysis(manifest=new_manifest, old_manifest=old_manifest, - patch=not disable_patching, resume=not force, - file_prefix_filter=file_prefix_filter, - file_exclude_filter=file_exclude_filter, - file_install_tag=file_install_tag, - processing_optimization=process_opt) + + analysis_kwargs = dict( + old_manifest=old_manifest, + patch=not disable_patching, resume=not force, + file_prefix_filter=file_prefix_filter, + file_exclude_filter=file_exclude_filter, + file_install_tag=file_install_tag, + processing_optimization=process_opt + ) + + try: + anlres = dlm.run_analysis(manifest=new_manifest, **analysis_kwargs, read_files=read_files) + except MemoryError: + if read_files: + raise + self.log.warning('Memory error encountered, retrying with file read enabled...') + dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q, + max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers, + dl_timeout=dl_timeout, bind_ip=bind_ip) + anlres = dlm.run_analysis(manifest=new_manifest, **analysis_kwargs, read_files=True) prereq = None if new_manifest.meta.prereq_ids: diff --git a/legendary/downloader/mp/manager.py b/legendary/downloader/mp/manager.py index 2df74c1..b360d45 100644 --- a/legendary/downloader/mp/manager.py +++ b/legendary/downloader/mp/manager.py @@ -82,6 +82,7 @@ class DLManager(Process): def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None, patch=True, resume=True, file_prefix_filter=None, file_exclude_filter=None, file_install_tag=None, + read_files=False, processing_optimization=False) -> AnalysisResult: """ Run analysis on manifest and old manifest (if not None) and return a result @@ -94,6 +95,7 @@ class DLManager(Process): :param file_prefix_filter: Only download files that start with this prefix :param file_exclude_filter: Exclude files with this prefix from download :param file_install_tag: Only install files with the specified tag + :param read_files: Allow reading from already finished files :param processing_optimization: Attempt to optimize processing order and RAM usage :return: AnalysisResult """ @@ -320,25 +322,27 @@ class DLManager(Process): # determine whether a chunk part is currently in written files reusable_written = defaultdict(dict) - cur_written_cps = defaultdict(list) - for cur_file in fmlist: - cur_file_cps = dict() - cur_file_offset = 0 - for cp in cur_file.chunk_parts: - key = (cp.guid_num, cp.offset, cp.size) - for wr_file_name, wr_file_offset, wr_cp_offset, wr_cp_end_offset in cur_written_cps[cp.guid_num]: - # check if new chunk part is wholly contained in a written chunk part - cur_cp_end_offset = cp.offset + cp.size - if wr_cp_offset <= cp.offset and wr_cp_end_offset >= cur_cp_end_offset: - references[cp.guid_num] -= 1 - reuse_offset = wr_file_offset + (cp.offset - wr_cp_offset) - reusable_written[cur_file.filename][key] = (wr_file_name, reuse_offset) - break - cur_file_cps[cp.guid_num] = (cur_file.filename, cur_file_offset, cp.offset, cp.offset + cp.size) - cur_file_offset += cp.size + if read_files: + self.log.debug('Analyzing manifest for re-usable chunks in saved files...') + cur_written_cps = defaultdict(list) + for cur_file in fmlist: + cur_file_cps = dict() + cur_file_offset = 0 + for cp in cur_file.chunk_parts: + key = (cp.guid_num, cp.offset, cp.size) + for wr_file_name, wr_file_offset, wr_cp_offset, wr_cp_end_offset in cur_written_cps[cp.guid_num]: + # check if new chunk part is wholly contained in a written chunk part + cur_cp_end_offset = cp.offset + cp.size + if wr_cp_offset <= cp.offset and wr_cp_end_offset >= cur_cp_end_offset: + references[cp.guid_num] -= 1 + reuse_offset = wr_file_offset + (cp.offset - wr_cp_offset) + reusable_written[cur_file.filename][key] = (wr_file_name, reuse_offset) + break + cur_file_cps[cp.guid_num] = (cur_file.filename, cur_file_offset, cp.offset, cp.offset + cp.size) + cur_file_offset += cp.size - for guid, value in cur_file_cps.items(): - cur_written_cps[guid].append(value) + for guid, value in cur_file_cps.items(): + cur_written_cps[guid].append(value) last_cache_size = current_cache_size = 0 # set to determine whether a file is currently cached or not