Refactor image download logic to group images by artist name

- Extract artist name from the URL by removing the unique ID suffix.
- Ensure all images from the same artist (e.g., 'justinmaller') are saved in a single folder.
- Support dynamic file extensions (e.g., .jpg, .png) when saving images.
- Created artist-specific directories based on the artist's name, ensuring proper organization of images.
This commit is contained in:
SimplyJanDE 2024-09-27 13:27:32 +02:00
parent 7fa0915766
commit 7ea8a0fb94
2 changed files with 28 additions and 22 deletions

View file

@ -15,11 +15,6 @@ async function main() {
if (!data) { if (!data) {
throw new Error('⛔ JSON does not have a "data" property at its root.'); throw new Error('⛔ JSON does not have a "data" property at its root.');
} }
const downloadDir = path.join(__dirname, 'downloads');
if (!fs.existsSync(downloadDir)) {
fs.mkdirSync(downloadDir);
console.info(`📁 Created directory: ${downloadDir}`);
}
for (const key in data) { for (const key in data) {
const subproperty = data[key]; const subproperty = data[key];
@ -27,10 +22,20 @@ async function main() {
const imageUrl = subproperty.dhd; const imageUrl = subproperty.dhd;
console.info(`🔍 Found image URL!`); console.info(`🔍 Found image URL!`);
// Extrahiere den Dateinamen aus der URL // Extrahiere den Künstlernamen vor dem Unterstrich
const artistNameMatch = imageUrl.match(/a~([^_/]+)/);
const artistName = artistNameMatch ? artistNameMatch[1] : 'unknown_artist';
const artistDir = path.join(__dirname, 'downloads', artistName);
if (!fs.existsSync(artistDir)) {
fs.mkdirSync(artistDir, { recursive: true });
console.info(`📁 Created directory: ${artistDir}`);
}
// Extrahiere den Dateinamen und die Endung
const urlPath = new URL(imageUrl).pathname; const urlPath = new URL(imageUrl).pathname;
const fileName = path.basename(urlPath, '.jpg'); // Name ohne '.jpg' const fileName = path.basename(urlPath); // Name inklusive Endung (z.B. .jpg oder .png)
const filePath = path.join(downloadDir, `${fileName}.jpg`); const filePath = path.join(artistDir, fileName);
await downloadImage(imageUrl, filePath); await downloadImage(imageUrl, filePath);
console.info(`🖼️ Saved image to ${filePath}`); console.info(`🖼️ Saved image to ${filePath}`);

View file

@ -1,5 +1,3 @@
# Licensed under the WTFPL License
import os import os
import time import time
import aiohttp import aiohttp
@ -34,20 +32,23 @@ async def main():
if not data: if not data:
raise Exception('⛔ JSON does not have a "data" property at its root.') raise Exception('⛔ JSON does not have a "data" property at its root.')
download_dir = os.path.join(os.getcwd(), 'downloads') for key, subproperty in data.items():
if not os.path.exists(download_dir):
os.makedirs(download_dir)
print(f"📁 Created directory: {download_dir}")
for file_index, (key, subproperty) in enumerate(data.items(), start=1):
if subproperty and subproperty.get('dhd'): if subproperty and subproperty.get('dhd'):
image_url = subproperty['dhd'] image_url = subproperty['dhd']
print(f"🔍 Found image URL!") print(f"🔍 Found image URL!")
parsed_url = urlparse(image_url)
# Extrahiere den Dateinamen ohne .jpg # Extrahiere den Künstlernamen vor dem Unterstrich
filename = os.path.basename(parsed_url.path).replace('.jpg', '') or f'image_{file_index}' parsed_url = urlparse(image_url)
file_path = os.path.join(download_dir, f"{filename}.jpg") artist_name = image_url.split('a~')[1].split('_')[0]
artist_dir = os.path.join(os.getcwd(), 'downloads', artist_name)
if not os.path.exists(artist_dir):
os.makedirs(artist_dir)
print(f"📁 Created directory: {artist_dir}")
# Extrahiere den Dateinamen und die Endung
filename = os.path.basename(parsed_url.path) # Name inklusive Endung
file_path = os.path.join(artist_dir, filename)
await download_image(session, image_url, file_path) await download_image(session, image_url, file_path)
print(f"🖼️ Saved image to {file_path}") print(f"🖼️ Saved image to {file_path}")