Keep track of previously downloaded files

Refactor image download script to track and skip previously downloaded files based on unique key and consistent naming - Added logic to use unique key from JSON data for consistent filenames and tracking - Improved file existence check to skip downloading files that already exist in the directory - Updated JSON list to store keys of downloaded files for persistent tracking across runs - Incorporated delay between downloads for smoother processing - Cleaned up old redundant code and ensured consistency in file naming and tracking - Added ascii art to indicate the start of the download process
2025-10-16 12:07:34 +00:00 · 2024-09-29 02:11:45 +05:30 · 2024-09-29 02:11:45 +05:30 · 815a9eabdc
parent 82e50c64f0
commit 815a9eabdc
1 changed files with 79 additions and 17 deletions
--- a/mkbsd.js
+++ b/mkbsd.js
@ -1,14 +1,19 @@
-// Copyright 2024 Nadim Kobeissi
-// Licensed under the WTFPL License
-
-const fs = require(`fs`);
-const path = require(`path`);
+const fs = require('fs');
+const path = require('path');
+const crypto = require('crypto');

 async function main() {
 	const url = 'https://storage.googleapis.com/panels-api/data/20240916/media-1a-i-p~s';
-	const delay = (ms) => {
-		return new Promise(resolve => setTimeout(resolve, ms));
+	const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
+	const downloadedListPath = path.join(__dirname, 'downloadedList.json');
+	let downloadedList = [];
+
+	// Load existing downloaded list if it exists
+	if (fs.existsSync(downloadedListPath)) {
+		const downloadedData = await fs.promises.readFile(downloadedListPath, 'utf8');
+		downloadedList = JSON.parse(downloadedData);
 	}
+
 	try {
 		const response = await fetch(url);
 		if (!response.ok) {
@ -16,30 +21,63 @@ async function main() {
 		}
 		const jsonData = await response.json();
 		const data = jsonData.data;
+
 		if (!data) {
 			throw new Error('⛔ JSON does not have a "data" property at its root.');
 		}
-		const downloadDir = path.join(__dirname, 'downloads');
+
+		const downloadDir = path.join(__dirname, 'downloads-1');
 		if (!fs.existsSync(downloadDir)) {
 			fs.mkdirSync(downloadDir);
 			console.info(`📁 Created directory: ${downloadDir}`);
 		}
-		let fileIndex = 1;
+
+		let downloadedCount = 0;
+		let skippedCount = 0;
+
 		for (const key in data) {
 			const subproperty = data[key];
 			if (subproperty && subproperty.dhd) {
+				// Use the unique key to track downloads and in the file name
 				const imageUrl = subproperty.dhd;
-				console.info(`🔍 Found image URL!`);
-				await delay(100);
+				const imageName = `${extractNameFromUrl(imageUrl)}-${key}`;
 				const ext = path.extname(new URL(imageUrl).pathname) || '.jpg';
-				const filename = `${fileIndex}${ext}`;
-				const filePath = path.join(downloadDir, filename);
+				const filePath = path.join(downloadDir, `${imageName}${ext}`);
+
+				// Check if the file already exists
+				if (fs.existsSync(filePath)) {
+					// If the file exists but the key is missing in the JSON, add it to avoid re-downloading
+					if (!downloadedList.includes(key)) {
+						downloadedList.push(key);
+						console.info(`✅ Found existing file, added key to list: ${filePath}`);
+						await fs.promises.writeFile(downloadedListPath, JSON.stringify(downloadedList, null, 2));
+					}
+					skippedCount++;
+				} else {
+					// Download the image only if it doesn't exist
+					downloadedCount++;
+					console.info(`🔍 Found new image URL: ${imageUrl}`);
+
+					// Download the image
 					await downloadImage(imageUrl, filePath);
 					console.info(`🖼️ Saved image to ${filePath}`);
-				fileIndex++;
+
+					// Add the unique key to the downloaded list
+					downloadedList.push(key);
+
+					// Save the updated downloaded list to JSON file
+					await fs.promises.writeFile(downloadedListPath, JSON.stringify(downloadedList, null, 2));
+					console.info(`📄 Updated downloaded list with key: ${key}`);
+
+					// Delay for the next download
 					await delay(250);
 				}
 			}
+		}
+
+		console.log(`🚀 🚀 🚀 Downloaded ${downloadedCount} new images`);
+		console.info(`✅ Skipped ${skippedCount} images that already exist`);
+
 	} catch (error) {
 		console.error(`Error: ${error.message}`);
 	}
@ -55,6 +93,30 @@ async function downloadImage(url, filePath) {
 	await fs.promises.writeFile(filePath, buffer);
 }

+function extractNameFromUrl(url) {
+	try {
+		const urlParts = new URL(url).pathname.split('/');
+		const nameWithExtension = urlParts[urlParts.length - 1]; // Get the last part of the URL
+
+		// Remove the query string from the name (everything after the '?' symbol)
+		const nameWithoutQuery = nameWithExtension.split('?')[0];
+
+		// Get the prefix part (e.g., 'hytha', 'outrunyouth', etc.)
+		const prefixPart = urlParts.find(part => part.startsWith('a~'));
+		const prefix = prefixPart ? prefixPart.split('~')[1].split('_')[0].toLowerCase() : 'unknown'; // Clean up the prefix
+		// Simplify the base name by removing everything after the first tilde (~)
+		const baseName = nameWithoutQuery.split('.')[0].split('~')[0].replace(/[^a-zA-Z0-9]+/g, '').toLowerCase();
+
+		return `${prefix}-${baseName}`; // Return cleaned prefix and simplified base name
+	} catch (error) {
+		console.error(`Error extracting name from URL: ${error.message}, ${url}`);
+
+		// Fallback to deterministic name using hash if extraction fails
+		const hash = crypto.createHash('md5').update(url).digest('hex');
+		return `image-${hash}`;
+	}
+}
+
 function asciiArt() {
 	console.info(`
 /$$      /$$ /$$   /$$ /$$$$$$$   /$$$$$$  /$$$$$$$