import string def fix(line, data): try: hash_end = line.find('&') hash = line[20:hash_end].lower() # hash is end of prefix to first '&', lowercased except: hash = line[20:] line = line + '&dn=' try: int(hash, 16) # check if hash is hexadecimal except ValueError: return positions = [pos for pos, char in enumerate(line) if char in ('&', '=', 'n')] if len(positions) > 1: # look for trackers title_start = positions[positions.index('=') + 1] + 1 title_end = positions[positions.index('&', 1) + 1] title = line[title_start:title_end] # title is second '=' to second '&' else: title = line[positions[0] + 1:] # title is second '=' to end if no trackers title = ''.join(char for char in title if ord(char) < 128) # strip non-ascii characters linesplit = ['magnet:?xt=urn:btih:', hash, '&dn=', title] data.append(linesplit) return data data = [] # lists within list with open('everything.txt', encoding='utf-8') as file: for line in file: line = line.strip() if line.startswith('magnet:?xt=urn:btih:'): if 'magnet:?xt=urn:btih:' in line[20:]: secondline = line[line.find('magnet:?xt=urn:btih:', 20):] line = line[:line.find('magnet:?xt=urn:btih:', 20)] data = fix(secondline, data) if 'magnetxturnbtih' in line[20:]: hash_start = line.find('magnetxturnbtih', 20) + 15 hash_end = line.find('n', hash_start) title_start = hash_end + 1 hash = line[hash_start:hash_end] # pull just the hash of the second magnet title = line[title_start:] # title stretches from that 'n' to the end secondline = 'magnet:?xt=urn:btih:' + hash + '&dn=' + title line = line[:line.find('magnetxturnbtih', 20)] data = fix(secondline, data) data = fix(line, data) symbols = string.punctuation.replace('.', '').replace('-', '') translation_table = str.maketrans('', '', symbols) for magnet in data: magnet[3] = magnet[3].translate(translation_table) magnet[3] = magnet[3].replace(' ', '.') hashes = set() results = [] for magnet in data: magnet_key = magnet[0] + magnet[1] if magnet_key not in hashes: hashes.add(magnet_key) results.append('{}{}'.format(magnet_key, magnet[2] + magnet[3])) with open('output.txt', 'a', encoding='utf-8') as output: for i in results: output.write(i + '\n')