Keep track of previously downloaded files

Refactor image download script to track and skip previously downloaded files based on unique key and consistent naming - Added logic to use unique key from JSON data for consistent filenames and tracking - Improved file existence check to skip downloading files that already exist in the directory - Updated JSON list to store keys of downloaded files for persistent tracking across runs - Incorporated delay between downloads for smoother processing - Cleaned up old redundant code and ensured consistency in file naming and tracking - Added ascii art to indicate the start of the download process
2025-06-25 09:21:23 +00:00 · 2024-09-29 02:11:45 +05:30 · 2024-09-29 02:11:45 +05:30 · 815a9eabdc
parent 82e50c64f0
commit 815a9eabdc
1 changed files with 79 additions and 17 deletions
--- a/mkbsd.js
+++ b/mkbsd.js
@ -1,14 +1,19 @@
-// Copyright 2024 Nadim Kobeissi
+const fs = require('fs');
-// Licensed under the WTFPL License
+const path = require('path');
-
+const crypto = require('crypto');
 const fs = require(`fs`);
 const path = require(`path`);
 async function main() {
 	const url = 'https://storage.googleapis.com/panels-api/data/20240916/media-1a-i-p~s';
-	const delay = (ms) => {
+	const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
-		return new Promise(resolve => setTimeout(resolve, ms));
+	const downloadedListPath = path.join(__dirname, 'downloadedList.json');
 	let downloadedList = [];
 	// Load existing downloaded list if it exists
 	if (fs.existsSync(downloadedListPath)) {
 		const downloadedData = await fs.promises.readFile(downloadedListPath, 'utf8');
 		downloadedList = JSON.parse(downloadedData);
 	}
 	try {
 		const response = await fetch(url);
 		if (!response.ok) {
@ -16,30 +21,63 @@ async function main() {
 		}
 		const jsonData = await response.json();
 		const data = jsonData.data;
 		if (!data) {
 			throw new Error('⛔ JSON does not have a "data" property at its root.');
 		}
-		const downloadDir = path.join(__dirname, 'downloads');
+
 		const downloadDir = path.join(__dirname, 'downloads-1');
 		if (!fs.existsSync(downloadDir)) {
 			fs.mkdirSync(downloadDir);
 			console.info(`📁 Created directory: ${downloadDir}`);
 		}
-		let fileIndex = 1;
+
 		let downloadedCount = 0;
 		let skippedCount = 0;
 		for (const key in data) {
 			const subproperty = data[key];
 			if (subproperty && subproperty.dhd) {
 				// Use the unique key to track downloads and in the file name
 				const imageUrl = subproperty.dhd;
-				console.info(`🔍 Found image URL!`);
+				const imageName = `${extractNameFromUrl(imageUrl)}-${key}`;
 				await delay(100);
 				const ext = path.extname(new URL(imageUrl).pathname) || '.jpg';
-				const filename = `${fileIndex}${ext}`;
+				const filePath = path.join(downloadDir, `${imageName}${ext}`);
-				const filePath = path.join(downloadDir, filename);
+
-				await downloadImage(imageUrl, filePath);
+				// Check if the file already exists
-				console.info(`🖼️ Saved image to ${filePath}`);
+				if (fs.existsSync(filePath)) {
-				fileIndex++;
+					// If the file exists but the key is missing in the JSON, add it to avoid re-downloading
-				await delay(250);
+					if (!downloadedList.includes(key)) {
 						downloadedList.push(key);
 						console.info(`✅ Found existing file, added key to list: ${filePath}`);
 						await fs.promises.writeFile(downloadedListPath, JSON.stringify(downloadedList, null, 2));
 					}
 					skippedCount++;
 				} else {
 					// Download the image only if it doesn't exist
 					downloadedCount++;
 					console.info(`🔍 Found new image URL: ${imageUrl}`);
 					// Download the image
 					await downloadImage(imageUrl, filePath);
 					console.info(`🖼️ Saved image to ${filePath}`);
 					// Add the unique key to the downloaded list
 					downloadedList.push(key);
 					// Save the updated downloaded list to JSON file
 					await fs.promises.writeFile(downloadedListPath, JSON.stringify(downloadedList, null, 2));
 					console.info(`📄 Updated downloaded list with key: ${key}`);
 					// Delay for the next download
 					await delay(250);
 				}
 			}
 		}
 		console.log(`🚀 🚀 🚀 Downloaded ${downloadedCount} new images`);
 		console.info(`✅ Skipped ${skippedCount} images that already exist`);
 	} catch (error) {
 		console.error(`Error: ${error.message}`);
 	}
@ -55,6 +93,30 @@ async function downloadImage(url, filePath) {
 	await fs.promises.writeFile(filePath, buffer);
 }
 function extractNameFromUrl(url) {
 	try {
 		const urlParts = new URL(url).pathname.split('/');
 		const nameWithExtension = urlParts[urlParts.length - 1]; // Get the last part of the URL
 		// Remove the query string from the name (everything after the '?' symbol)
 		const nameWithoutQuery = nameWithExtension.split('?')[0];
 		// Get the prefix part (e.g., 'hytha', 'outrunyouth', etc.)
 		const prefixPart = urlParts.find(part => part.startsWith('a~'));
 		const prefix = prefixPart ? prefixPart.split('~')[1].split('_')[0].toLowerCase() : 'unknown'; // Clean up the prefix
 		// Simplify the base name by removing everything after the first tilde (~)
 		const baseName = nameWithoutQuery.split('.')[0].split('~')[0].replace(/[^a-zA-Z0-9]+/g, '').toLowerCase();
 		return `${prefix}-${baseName}`; // Return cleaned prefix and simplified base name
 	} catch (error) {
 		console.error(`Error extracting name from URL: ${error.message}, ${url}`);
 		// Fallback to deterministic name using hash if extraction fails
 		const hash = crypto.createHash('md5').update(url).digest('hex');
 		return `image-${hash}`;
 	}
 }
 function asciiArt() {
 	console.info(`
 /$$      /$$ /$$   /$$ /$$$$$$$   /$$$$$$  /$$$$$$$