From 6c6386913ef603b4eddcf7224d82fdae231e5aef Mon Sep 17 00:00:00 2001 From: Zed <zedeus@pm.me> Date: Tue, 11 Jan 2022 03:10:42 +0100 Subject: [PATCH] Remove nim-regex dependency, improve performance --- nitter.nimble | 1 - src/formatters.nim | 53 +++++++++++++++++++++++---------------------- src/parserutils.nim | 9 ++++---- src/utils.nim | 2 +- 4 files changed, 33 insertions(+), 32 deletions(-) diff --git a/nitter.nimble b/nitter.nimble index 428c308..ce9d783 100644 --- a/nitter.nimble +++ b/nitter.nimble @@ -14,7 +14,6 @@ requires "nim >= 1.4.8" requires "jester >= 0.5.0" requires "karax#c71bc92" requires "sass#e683aa1" -requires "regex#eeefb4f" requires "nimcrypto#a5742a9" requires "markdown#abdbe5e" requires "packedjson#d11d167" diff --git a/src/formatters.nim b/src/formatters.nim index 161505d..8ac61b7 100644 --- a/src/formatters.nim +++ b/src/formatters.nim @@ -1,10 +1,17 @@ # SPDX-License-Identifier: AGPL-3.0-only import strutils, strformat, times, uri, tables, xmltree, htmlparser, htmlgen -import std/enumerate -import regex +import std/[enumerate, re] import types, utils, query const + cards = "cards.twitter.com/cards" + tco = "https://t.co" + twitter = parseUri("https://twitter.com") + +let + twRegex = re"(?<=(?<!\S)https:\/\/|(?<=\s))(www\.|mobile\.)?twitter\.com" + twLinkRegex = re"""<a href="https:\/\/twitter.com([^"]+)">twitter\.com(\S+)</a>""" + ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)" igRegex = re"(www\.)?instagram\.com" @@ -15,20 +22,11 @@ const # Images aren't supported due to errors from Teddit when the image # wasn't first displayed via a post on the Teddit instance. - twRegex = re"(?<=(?<!\S)https:\/\/|(?<=\s))(www\.|mobile\.)?twitter\.com" - twLinkRegex = re"""<a href="https:\/\/twitter.com([^"]+)">twitter\.com(\S+)</a>""" - - cards = "cards.twitter.com/cards" - tco = "https://t.co" - wwwRegex = re"https?://(www[0-9]?\.)?" m3u8Regex = re"""url="(.+.m3u8)"""" - manifestRegex = re"\/(.+(.ts|.m4s|.m3u8|.vmap|.mp4))" userPicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$" extRegex = re"(\.[A-z]+)$" - illegalXmlRegex = re"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u10000-\u10FFFF]" - - twitter = parseUri("https://twitter.com") + illegalXmlRegex = re"(*UTF8)[^\x09\x0A\x0D\x20-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]" proc getUrlPrefix*(cfg: Config): string = if cfg.useHttps: https & cfg.hostname @@ -54,45 +52,48 @@ proc shortLink*(text: string; length=28): string = proc replaceUrls*(body: string; prefs: Prefs; absolute=""): string = result = body - if prefs.replaceYouTube.len > 0 and ytRegex in result: + if prefs.replaceYouTube.len > 0 and "youtu" in result: result = result.replace(ytRegex, prefs.replaceYouTube) if prefs.replaceYouTube in result: result = result.replace("/c/", "/") - if prefs.replaceTwitter.len > 0 and - (twRegex in result or twLinkRegex in result or tco in result): + if prefs.replaceTwitter.len > 0 and ("twitter.com" in body or tco in body): result = result.replace(tco, https & prefs.replaceTwitter & "/t.co") result = result.replace(cards, prefs.replaceTwitter & "/cards") result = result.replace(twRegex, prefs.replaceTwitter) result = result.replace(twLinkRegex, a( prefs.replaceTwitter & "$2", href = https & prefs.replaceTwitter & "$1")) - if prefs.replaceReddit.len > 0 and (rdRegex in result or "redd.it" in result): + if prefs.replaceReddit.len > 0 and ("reddit.com" in result or "redd.it" in result): result = result.replace(rdShortRegex, prefs.replaceReddit & "/comments/") result = result.replace(rdRegex, prefs.replaceReddit) if prefs.replaceReddit in result and "/gallery/" in result: result = result.replace("/gallery/", "/comments/") - if prefs.replaceInstagram.len > 0 and igRegex in result: + if prefs.replaceInstagram.len > 0 and "instagram.com" in result: result = result.replace(igRegex, prefs.replaceInstagram) if absolute.len > 0 and "href" in result: result = result.replace("href=\"/", "href=\"" & absolute & "/") proc getM3u8Url*(content: string): string = - var m: RegexMatch - if content.find(m3u8Regex, m): - result = content[m.group(0)[0]] + var matches: array[1, string] + if re.find(content, m3u8Regex, matches) != -1: + result = matches[0] proc proxifyVideo*(manifest: string; proxy: bool): string = - proc cb(m: RegexMatch; s: string): string = - result = "https://video.twimg.com/" & s[m.group(0)[0]] - if proxy: result = getVidUrl(result) - result = manifest.replace(manifestRegex, cb) + var replacements: seq[(string, string)] + for line in manifest.splitLines: + let url = + if line.startsWith("#EXT-X-MAP:URI"): line[16 .. ^2] + else: line + if url[0] == '/': + let path = "https://video.twimg.com" & url + replacements.add (url, if proxy: path.getVidUrl else: path) + return manifest.multiReplace(replacements) proc getUserPic*(userPic: string; style=""): string = - let pic = userPic.replace(userPicRegex, "$2") - pic.replace(extRegex, style & "$1") + userPic.replacef(userPicRegex, "$2").replacef(extRegex, style & "$1") proc getUserPic*(profile: Profile; style=""): string = getUserPic(profile.userPic, style) diff --git a/src/parserutils.nim b/src/parserutils.nim index f7a5b17..aae3dfc 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -1,9 +1,10 @@ # SPDX-License-Identifier: AGPL-3.0-only import strutils, times, macros, htmlgen, unicode, options, algorithm -import regex, packedjson +import std/re +import packedjson import types, utils, formatters -const +let unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})" unReplace = "$1<a href=\"/$2\">@$2</a>" @@ -213,8 +214,8 @@ proc expandProfileEntities*(profile: var Profile; js: JsonNode) = replacements.sort(cmp) profile.bio = orig.replacedWith(replacements, 0 .. orig.len) - profile.bio = profile.bio.replace(unRegex, unReplace) - .replace(htRegex, htReplace) + profile.bio = profile.bio.replacef(unRegex, unReplace) + .replacef(htRegex, htReplace) proc expandTweetEntities*(tweet: Tweet; js: JsonNode) = let diff --git a/src/utils.nim b/src/utils.nim index 02d8288..9c8414d 100644 --- a/src/utils.nim +++ b/src/utils.nim @@ -1,6 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-only import strutils, strformat, uri, tables, base64 -import nimcrypto, regex +import nimcrypto var hmacKey: string