From 6c6386913ef603b4eddcf7224d82fdae231e5aef Mon Sep 17 00:00:00 2001
From: Zed <zedeus@pm.me>
Date: Tue, 11 Jan 2022 03:10:42 +0100
Subject: [PATCH] Remove nim-regex dependency, improve performance

---
 nitter.nimble       |  1 -
 src/formatters.nim  | 53 +++++++++++++++++++++++----------------------
 src/parserutils.nim |  9 ++++----
 src/utils.nim       |  2 +-
 4 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/nitter.nimble b/nitter.nimble
index 428c308..ce9d783 100644
--- a/nitter.nimble
+++ b/nitter.nimble
@@ -14,7 +14,6 @@ requires "nim >= 1.4.8"
 requires "jester >= 0.5.0"
 requires "karax#c71bc92"
 requires "sass#e683aa1"
-requires "regex#eeefb4f"
 requires "nimcrypto#a5742a9"
 requires "markdown#abdbe5e"
 requires "packedjson#d11d167"
diff --git a/src/formatters.nim b/src/formatters.nim
index 161505d..8ac61b7 100644
--- a/src/formatters.nim
+++ b/src/formatters.nim
@@ -1,10 +1,17 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 import strutils, strformat, times, uri, tables, xmltree, htmlparser, htmlgen
-import std/enumerate
-import regex
+import std/[enumerate, re]
 import types, utils, query
 
 const
+  cards = "cards.twitter.com/cards"
+  tco = "https://t.co"
+  twitter = parseUri("https://twitter.com")
+
+let
+  twRegex = re"(?<=(?<!\S)https:\/\/|(?<=\s))(www\.|mobile\.)?twitter\.com"
+  twLinkRegex = re"""<a href="https:\/\/twitter.com([^"]+)">twitter\.com(\S+)</a>"""
+
   ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
   igRegex = re"(www\.)?instagram\.com"
 
@@ -15,20 +22,11 @@ const
   # Images aren't supported due to errors from Teddit when the image
   # wasn't first displayed via a post on the Teddit instance.
 
-  twRegex = re"(?<=(?<!\S)https:\/\/|(?<=\s))(www\.|mobile\.)?twitter\.com"
-  twLinkRegex = re"""<a href="https:\/\/twitter.com([^"]+)">twitter\.com(\S+)</a>"""
-
-  cards = "cards.twitter.com/cards"
-  tco = "https://t.co"
-
   wwwRegex = re"https?://(www[0-9]?\.)?"
   m3u8Regex = re"""url="(.+.m3u8)""""
-  manifestRegex = re"\/(.+(.ts|.m4s|.m3u8|.vmap|.mp4))"
   userPicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
   extRegex = re"(\.[A-z]+)$"
-  illegalXmlRegex = re"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u10000-\u10FFFF]"
-
-  twitter = parseUri("https://twitter.com")
+  illegalXmlRegex = re"(*UTF8)[^\x09\x0A\x0D\x20-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]"
 
 proc getUrlPrefix*(cfg: Config): string =
   if cfg.useHttps: https & cfg.hostname
@@ -54,45 +52,48 @@ proc shortLink*(text: string; length=28): string =
 proc replaceUrls*(body: string; prefs: Prefs; absolute=""): string =
   result = body
 
-  if prefs.replaceYouTube.len > 0 and ytRegex in result:
+  if prefs.replaceYouTube.len > 0 and "youtu" in result:
     result = result.replace(ytRegex, prefs.replaceYouTube)
     if prefs.replaceYouTube in result:
       result = result.replace("/c/", "/")
 
-  if prefs.replaceTwitter.len > 0 and
-     (twRegex in result or twLinkRegex in result or tco in result):
+  if prefs.replaceTwitter.len > 0 and ("twitter.com" in body or tco in body):
     result = result.replace(tco, https & prefs.replaceTwitter & "/t.co")
     result = result.replace(cards, prefs.replaceTwitter & "/cards")
     result = result.replace(twRegex, prefs.replaceTwitter)
     result = result.replace(twLinkRegex, a(
       prefs.replaceTwitter & "$2", href = https & prefs.replaceTwitter & "$1"))
 
-  if prefs.replaceReddit.len > 0 and (rdRegex in result or "redd.it" in result):
+  if prefs.replaceReddit.len > 0 and ("reddit.com" in result or "redd.it" in result):
     result = result.replace(rdShortRegex, prefs.replaceReddit & "/comments/")
     result = result.replace(rdRegex, prefs.replaceReddit)
     if prefs.replaceReddit in result and "/gallery/" in result:
       result = result.replace("/gallery/", "/comments/")
 
-  if prefs.replaceInstagram.len > 0 and igRegex in result:
+  if prefs.replaceInstagram.len > 0 and "instagram.com" in result:
     result = result.replace(igRegex, prefs.replaceInstagram)
 
   if absolute.len > 0 and "href" in result:
     result = result.replace("href=\"/", "href=\"" & absolute & "/")
 
 proc getM3u8Url*(content: string): string =
-  var m: RegexMatch
-  if content.find(m3u8Regex, m):
-    result = content[m.group(0)[0]]
+  var matches: array[1, string]
+  if re.find(content, m3u8Regex, matches) != -1:
+    result = matches[0]
 
 proc proxifyVideo*(manifest: string; proxy: bool): string =
-  proc cb(m: RegexMatch; s: string): string =
-    result = "https://video.twimg.com/" & s[m.group(0)[0]]
-    if proxy: result = getVidUrl(result)
-  result = manifest.replace(manifestRegex, cb)
+  var replacements: seq[(string, string)]
+  for line in manifest.splitLines:
+    let url =
+      if line.startsWith("#EXT-X-MAP:URI"): line[16 .. ^2]
+      else: line
+    if url[0] == '/':
+      let path = "https://video.twimg.com" & url
+      replacements.add (url, if proxy: path.getVidUrl else: path)
+  return manifest.multiReplace(replacements)
 
 proc getUserPic*(userPic: string; style=""): string =
-  let pic = userPic.replace(userPicRegex, "$2")
-  pic.replace(extRegex, style & "$1")
+  userPic.replacef(userPicRegex, "$2").replacef(extRegex, style & "$1")
 
 proc getUserPic*(profile: Profile; style=""): string =
   getUserPic(profile.userPic, style)
diff --git a/src/parserutils.nim b/src/parserutils.nim
index f7a5b17..aae3dfc 100644
--- a/src/parserutils.nim
+++ b/src/parserutils.nim
@@ -1,9 +1,10 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 import strutils, times, macros, htmlgen, unicode, options, algorithm
-import regex, packedjson
+import std/re
+import packedjson
 import types, utils, formatters
 
-const
+let
   unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
   unReplace = "$1<a href=\"/$2\">@$2</a>"
 
@@ -213,8 +214,8 @@ proc expandProfileEntities*(profile: var Profile; js: JsonNode) =
   replacements.sort(cmp)
 
   profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
-  profile.bio = profile.bio.replace(unRegex, unReplace)
-                           .replace(htRegex, htReplace)
+  profile.bio = profile.bio.replacef(unRegex, unReplace)
+                           .replacef(htRegex, htReplace)
 
 proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
   let
diff --git a/src/utils.nim b/src/utils.nim
index 02d8288..9c8414d 100644
--- a/src/utils.nim
+++ b/src/utils.nim
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 import strutils, strformat, uri, tables, base64
-import nimcrypto, regex
+import nimcrypto
 
 var
   hmacKey: string