mirror of
https://github.com/zedeus/nitter.git
synced 2025-01-03 15:25:31 +00:00
Add experimental user parser
This commit is contained in:
parent
fcfc1ef497
commit
cdf49dcddd
|
@ -7,6 +7,7 @@
|
||||||
# disable annoying warnings
|
# disable annoying warnings
|
||||||
warning("GcUnsafe2", off)
|
warning("GcUnsafe2", off)
|
||||||
hint("XDeclaredButNotUsed", off)
|
hint("XDeclaredButNotUsed", off)
|
||||||
|
hint("XCannotRaiseY", off)
|
||||||
hint("User", off)
|
hint("User", off)
|
||||||
|
|
||||||
const
|
const
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
import asyncdispatch, httpclient, uri, strutils
|
import asyncdispatch, httpclient, uri, strutils
|
||||||
import packedjson
|
import packedjson
|
||||||
import types, query, formatters, consts, apiutils, parser
|
import types, query, formatters, consts, apiutils, parser
|
||||||
|
import experimental/parser/user
|
||||||
|
|
||||||
proc getGraphListBySlug*(name, list: string): Future[List] {.async.} =
|
proc getGraphListBySlug*(name, list: string): Future[List] {.async.} =
|
||||||
let
|
let
|
||||||
|
@ -32,14 +33,14 @@ proc getListMembers*(list: List; after=""): Future[Result[Profile]] {.async.} =
|
||||||
proc getProfile*(username: string): Future[Profile] {.async.} =
|
proc getProfile*(username: string): Future[Profile] {.async.} =
|
||||||
let
|
let
|
||||||
ps = genParams({"screen_name": username})
|
ps = genParams({"screen_name": username})
|
||||||
js = await fetch(userShow ? ps, Api.userShow)
|
json = await fetchRaw(userShow ? ps, Api.userShow)
|
||||||
result = parseUserShow(js, username=username)
|
result = parseUser(json)
|
||||||
|
|
||||||
proc getProfileById*(userId: string): Future[Profile] {.async.} =
|
proc getProfileById*(userId: string): Future[Profile] {.async.} =
|
||||||
let
|
let
|
||||||
ps = genParams({"user_id": userId})
|
ps = genParams({"user_id": userId})
|
||||||
js = await fetch(userShow ? ps, Api.userShow)
|
json = await fetchRaw(userShow ? ps, Api.userShow)
|
||||||
result = parseUserShow(js, id=userId)
|
result = parseUser(json)
|
||||||
|
|
||||||
proc getTimeline*(id: string; after=""; replies=false): Future[Timeline] {.async.} =
|
proc getTimeline*(id: string; after=""; replies=false): Future[Timeline] {.async.} =
|
||||||
let
|
let
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
import httpclient, asyncdispatch, options, times, strutils, uri
|
import httpclient, asyncdispatch, options, sequtils, strutils, uri
|
||||||
import packedjson, zippy
|
import jsony, packedjson, zippy
|
||||||
import types, tokens, consts, parserutils, http_pool
|
import types, tokens, consts, parserutils, http_pool
|
||||||
|
from experimental/types/common import Errors, ErrorObj
|
||||||
|
|
||||||
const
|
const
|
||||||
rlRemaining = "x-rate-limit-remaining"
|
rlRemaining = "x-rate-limit-remaining"
|
||||||
|
@ -40,7 +41,14 @@ proc genHeaders*(token: Token = nil): HttpHeaders =
|
||||||
"DNT": "1"
|
"DNT": "1"
|
||||||
})
|
})
|
||||||
|
|
||||||
proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
template updateToken() =
|
||||||
|
if api != Api.search and resp.headers.hasKey(rlRemaining):
|
||||||
|
let
|
||||||
|
remaining = parseInt(resp.headers[rlRemaining])
|
||||||
|
reset = parseInt(resp.headers[rlReset])
|
||||||
|
token.setRateLimit(api, remaining, reset)
|
||||||
|
|
||||||
|
template fetchImpl(result, fetchBody) {.dirty.} =
|
||||||
once:
|
once:
|
||||||
pool = HttpPool()
|
pool = HttpPool()
|
||||||
|
|
||||||
|
@ -48,37 +56,21 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
||||||
if token.tok.len == 0:
|
if token.tok.len == 0:
|
||||||
raise rateLimitError()
|
raise rateLimitError()
|
||||||
|
|
||||||
let headers = genHeaders(token)
|
|
||||||
try:
|
try:
|
||||||
var resp: AsyncResponse
|
var resp: AsyncResponse
|
||||||
var body = pool.use(headers):
|
result = pool.use(genHeaders(token)):
|
||||||
resp = await c.get($url)
|
resp = await c.get($url)
|
||||||
await resp.body
|
await resp.body
|
||||||
|
|
||||||
if body.len > 0:
|
if result.len > 0:
|
||||||
if resp.headers.getOrDefault("content-encoding") == "gzip":
|
if resp.headers.getOrDefault("content-encoding") == "gzip":
|
||||||
body = uncompress(body, dfGzip)
|
result = uncompress(result, dfGzip)
|
||||||
else:
|
else:
|
||||||
echo "non-gzip body, url: ", url, ", body: ", body
|
echo "non-gzip body, url: ", url, ", body: ", result
|
||||||
|
|
||||||
if body.startsWith('{') or body.startsWith('['):
|
fetchBody
|
||||||
result = parseJson(body)
|
|
||||||
else:
|
|
||||||
echo resp.status, ": ", body
|
|
||||||
result = newJNull()
|
|
||||||
|
|
||||||
if api != Api.search and resp.headers.hasKey(rlRemaining):
|
release(token, used=true)
|
||||||
let
|
|
||||||
remaining = parseInt(resp.headers[rlRemaining])
|
|
||||||
reset = parseInt(resp.headers[rlReset])
|
|
||||||
token.setRateLimit(api, remaining, reset)
|
|
||||||
|
|
||||||
if result.getError notin {invalidToken, forbidden, badToken}:
|
|
||||||
release(token, used=true)
|
|
||||||
else:
|
|
||||||
echo "fetch error: ", result.getError
|
|
||||||
release(token, invalid=true)
|
|
||||||
raise rateLimitError()
|
|
||||||
|
|
||||||
if resp.status == $Http400:
|
if resp.status == $Http400:
|
||||||
raise newException(InternalError, $url)
|
raise newException(InternalError, $url)
|
||||||
|
@ -89,3 +81,35 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
||||||
if "length" notin e.msg and "descriptor" notin e.msg:
|
if "length" notin e.msg and "descriptor" notin e.msg:
|
||||||
release(token, invalid=true)
|
release(token, invalid=true)
|
||||||
raise rateLimitError()
|
raise rateLimitError()
|
||||||
|
|
||||||
|
proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
||||||
|
var body: string
|
||||||
|
fetchImpl body:
|
||||||
|
if body.startsWith('{') or body.startsWith('['):
|
||||||
|
result = parseJson(body)
|
||||||
|
else:
|
||||||
|
echo resp.status, ": ", body
|
||||||
|
result = newJNull()
|
||||||
|
|
||||||
|
updateToken()
|
||||||
|
|
||||||
|
let error = result.getError
|
||||||
|
if error in {invalidToken, forbidden, badToken}:
|
||||||
|
echo "fetch error: ", result.getError
|
||||||
|
release(token, invalid=true)
|
||||||
|
raise rateLimitError()
|
||||||
|
|
||||||
|
proc fetchRaw*(url: Uri; api: Api): Future[string] {.async.} =
|
||||||
|
fetchImpl result:
|
||||||
|
if not (result.startsWith('{') or result.startsWith('[')):
|
||||||
|
echo resp.status, ": ", result
|
||||||
|
result.setLen(0)
|
||||||
|
|
||||||
|
updateToken()
|
||||||
|
|
||||||
|
if result.startsWith("{\"errors"):
|
||||||
|
let errors = result.fromJson(Errors).errors
|
||||||
|
if errors.anyIt(it.code in {invalidToken, forbidden, badToken}):
|
||||||
|
echo "fetch error: ", errors
|
||||||
|
release(token, invalid=true)
|
||||||
|
raise rateLimitError()
|
||||||
|
|
67
src/experimental/parser/slices.nim
Normal file
67
src/experimental/parser/slices.nim
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
import std/[macros, htmlgen, unicode]
|
||||||
|
import ../types/common
|
||||||
|
import ".."/../[formatters, utils]
|
||||||
|
|
||||||
|
type
|
||||||
|
ReplaceSliceKind = enum
|
||||||
|
rkRemove, rkUrl, rkHashtag, rkMention
|
||||||
|
|
||||||
|
ReplaceSlice* = object
|
||||||
|
slice: Slice[int]
|
||||||
|
kind: ReplaceSliceKind
|
||||||
|
url, display: string
|
||||||
|
|
||||||
|
proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
|
||||||
|
|
||||||
|
proc dedupSlices*(s: var seq[ReplaceSlice]) =
|
||||||
|
var
|
||||||
|
len = s.len
|
||||||
|
i = 0
|
||||||
|
while i < len:
|
||||||
|
var j = i + 1
|
||||||
|
while j < len:
|
||||||
|
if s[i].slice.a == s[j].slice.a:
|
||||||
|
s.del j
|
||||||
|
dec len
|
||||||
|
else:
|
||||||
|
inc j
|
||||||
|
inc i
|
||||||
|
|
||||||
|
proc extractUrls*(result: var seq[ReplaceSlice]; url: Url;
|
||||||
|
textLen: int; hideTwitter = false) =
|
||||||
|
let
|
||||||
|
link = url.expandedUrl
|
||||||
|
slice = url.indices[0] ..< url.indices[1]
|
||||||
|
|
||||||
|
if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl:
|
||||||
|
if slice.a < textLen:
|
||||||
|
result.add ReplaceSlice(kind: rkRemove, slice: slice)
|
||||||
|
else:
|
||||||
|
result.add ReplaceSlice(kind: rkUrl, url: link,
|
||||||
|
display: link.shortLink, slice: slice)
|
||||||
|
|
||||||
|
proc replacedWith*(runes: seq[Rune]; repls: openArray[ReplaceSlice];
|
||||||
|
textSlice: Slice[int]): string =
|
||||||
|
template extractLowerBound(i: int; idx): int =
|
||||||
|
if i > 0: repls[idx].slice.b.succ else: textSlice.a
|
||||||
|
|
||||||
|
result = newStringOfCap(runes.len)
|
||||||
|
|
||||||
|
for i, rep in repls:
|
||||||
|
result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
|
||||||
|
case rep.kind
|
||||||
|
of rkHashtag:
|
||||||
|
let
|
||||||
|
name = $runes[rep.slice.a.succ .. rep.slice.b]
|
||||||
|
symbol = $runes[rep.slice.a]
|
||||||
|
result.add a(symbol & name, href = "/search?q=%23" & name)
|
||||||
|
of rkMention:
|
||||||
|
result.add a($runes[rep.slice], href = rep.url, title = rep.display)
|
||||||
|
of rkUrl:
|
||||||
|
result.add a(rep.display, href = rep.url)
|
||||||
|
of rkRemove:
|
||||||
|
discard
|
||||||
|
|
||||||
|
let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b
|
||||||
|
if rest.a <= rest.b:
|
||||||
|
result.add $runes[rest]
|
68
src/experimental/parser/user.nim
Normal file
68
src/experimental/parser/user.nim
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
import std/[algorithm, unicode, re, strutils]
|
||||||
|
import jsony
|
||||||
|
import utils, slices
|
||||||
|
import ../types/user as userType
|
||||||
|
from ../../types import Profile, Error
|
||||||
|
|
||||||
|
let
|
||||||
|
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
|
||||||
|
unReplace = "$1<a href=\"/$2\">@$2</a>"
|
||||||
|
|
||||||
|
htRegex = re"(^|[^\w-_./?])([##$])([\w_]+)"
|
||||||
|
htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
|
||||||
|
|
||||||
|
proc expandProfileEntities(profile: var Profile; user: User) =
|
||||||
|
let
|
||||||
|
orig = profile.bio.toRunes
|
||||||
|
ent = user.entities
|
||||||
|
|
||||||
|
if ent.url.urls.len > 0:
|
||||||
|
profile.website = ent.url.urls[0].expandedUrl
|
||||||
|
|
||||||
|
var replacements = newSeq[ReplaceSlice]()
|
||||||
|
|
||||||
|
for u in ent.description.urls:
|
||||||
|
replacements.extractUrls(u, orig.high)
|
||||||
|
|
||||||
|
replacements.dedupSlices
|
||||||
|
replacements.sort(cmp)
|
||||||
|
|
||||||
|
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
|
||||||
|
.replacef(unRegex, unReplace)
|
||||||
|
.replacef(htRegex, htReplace)
|
||||||
|
|
||||||
|
proc getBanner(user: User): string =
|
||||||
|
if user.profileBannerUrl.len > 0:
|
||||||
|
return user.profileBannerUrl & "/1500x500"
|
||||||
|
if user.profileLinkColor.len > 0:
|
||||||
|
return '#' & user.profileLinkColor
|
||||||
|
return "#161616"
|
||||||
|
|
||||||
|
proc parseUser*(json: string): Profile =
|
||||||
|
handleErrors:
|
||||||
|
case error
|
||||||
|
of suspended: return Profile(suspended: true)
|
||||||
|
of userNotFound: return
|
||||||
|
else: echo "[error - parseUser]: ", error
|
||||||
|
|
||||||
|
let user = json.fromJson(User)
|
||||||
|
|
||||||
|
result = Profile(
|
||||||
|
id: user.idStr,
|
||||||
|
username: user.screenName,
|
||||||
|
fullname: user.name,
|
||||||
|
location: user.location,
|
||||||
|
bio: user.description,
|
||||||
|
following: user.friendsCount,
|
||||||
|
followers: user.followersCount,
|
||||||
|
tweets: user.statusesCount,
|
||||||
|
likes: user.favouritesCount,
|
||||||
|
media: user.mediaCount,
|
||||||
|
verified: user.verified,
|
||||||
|
protected: user.protected,
|
||||||
|
joinDate: parseTwitterDate(user.createdAt),
|
||||||
|
banner: getBanner(user),
|
||||||
|
userPic: getImageUrl(user.profileImageUrlHttps).replace("_normal", "")
|
||||||
|
)
|
||||||
|
|
||||||
|
result.expandProfileEntities(user)
|
22
src/experimental/parser/utils.nim
Normal file
22
src/experimental/parser/utils.nim
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
import std/[sugar, strutils, times]
|
||||||
|
import ../types/common
|
||||||
|
import ../../utils as uutils
|
||||||
|
|
||||||
|
template parseTime(time: string; f: static string; flen: int): DateTime =
|
||||||
|
if time.len != flen: return
|
||||||
|
parse(time, f, utc())
|
||||||
|
|
||||||
|
proc parseIsoDate*(date: string): DateTime =
|
||||||
|
date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
|
||||||
|
|
||||||
|
proc parseTwitterDate*(date: string): DateTime =
|
||||||
|
date.parseTime("ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30)
|
||||||
|
|
||||||
|
proc getImageUrl*(url: string): string =
|
||||||
|
url.dup(removePrefix(twimg), removePrefix(https))
|
||||||
|
|
||||||
|
template handleErrors*(body) =
|
||||||
|
if json.startsWith("{\"errors"):
|
||||||
|
let error {.inject.} = json.fromJson(Errors).errors[0].code
|
||||||
|
body
|
30
src/experimental/types/common.nim
Normal file
30
src/experimental/types/common.nim
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from ../../types import Error
|
||||||
|
|
||||||
|
type
|
||||||
|
Url* = object
|
||||||
|
url*: string
|
||||||
|
expandedUrl*: string
|
||||||
|
displayUrl*: string
|
||||||
|
indices*: array[2, int]
|
||||||
|
|
||||||
|
ErrorCode* = enum
|
||||||
|
null = 0
|
||||||
|
noUserMatches = 17
|
||||||
|
protectedUser = 22
|
||||||
|
couldntAuth = 32
|
||||||
|
doesntExist = 34
|
||||||
|
userNotFound = 50
|
||||||
|
suspended = 63
|
||||||
|
rateLimited = 88
|
||||||
|
invalidToken = 89
|
||||||
|
listIdOrSlug = 112
|
||||||
|
forbidden = 200
|
||||||
|
badToken = 239
|
||||||
|
noCsrf = 353
|
||||||
|
|
||||||
|
ErrorObj* = object
|
||||||
|
code*: Error
|
||||||
|
message*: string
|
||||||
|
|
||||||
|
Errors* = object
|
||||||
|
errors*: seq[ErrorObj]
|
28
src/experimental/types/user.nim
Normal file
28
src/experimental/types/user.nim
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
import common
|
||||||
|
|
||||||
|
type
|
||||||
|
User* = object
|
||||||
|
idStr*: string
|
||||||
|
name*: string
|
||||||
|
screenName*: string
|
||||||
|
location*: string
|
||||||
|
description*: string
|
||||||
|
entities*: Entities
|
||||||
|
createdAt*: string
|
||||||
|
followersCount*: int
|
||||||
|
friendsCount*: int
|
||||||
|
favouritesCount*: int
|
||||||
|
statusesCount*: int
|
||||||
|
mediaCount*: int
|
||||||
|
verified*: bool
|
||||||
|
protected*: bool
|
||||||
|
profileBannerUrl*: string
|
||||||
|
profileImageUrlHttps*: string
|
||||||
|
profileLinkColor*: string
|
||||||
|
|
||||||
|
Entities* = object
|
||||||
|
url*: Urls
|
||||||
|
description*: Urls
|
||||||
|
|
||||||
|
Urls* = object
|
||||||
|
urls*: seq[Url]
|
Loading…
Reference in a new issue