Out with the old

This commit is contained in:
Zed 2020-06-01 02:14:29 +02:00
parent 4bb92a5201
commit 4167ce458b
12 changed files with 0 additions and 1230 deletions

View file

@ -1,24 +0,0 @@
import uri
const
lang* = "en-US,en;q=0.9"
auth* = "Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"
htmlAccept* = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"
jsonAccept* = "application/json, text/javascript, */*; q=0.01"
base* = parseUri("https://twitter.com/")
apiBase* = parseUri("https://api.twitter.com/1.1/")
timelineUrl* = "i/profiles/show/$1/timeline/tweets"
timelineMediaUrl* = "i/profiles/show/$1/media_timeline"
listUrl* = "$1/lists/$2/timeline"
listMembersUrl* = "$1/lists/$2/members"
profilePopupUrl* = "i/profiles/popup"
profileIntentUrl* = "intent/user"
searchUrl* = "i/search/timeline"
tweetUrl* = "status"
repliesUrl* = "i/$1/conversation/$2"
videoUrl* = "videos/tweet/config/$1.json"
tokenUrl* = "guest/activate.json"
cardUrl* = "i/cards/tfw/v1/$1"
pollUrl* = cardUrl & "?cardname=poll2choice_text_only&lang=en"

View file

@ -1,65 +0,0 @@
import httpclient, asyncdispatch, htmlparser
import sequtils, strutils, json, uri
import ".."/[types, parser, parserutils, query]
import utils, consts, timeline, search
proc getListTimeline*(username, list, after, agent: string; media=true): Future[Timeline] {.async.} =
let url = base / (listUrl % [username, list])
var params = toSeq({
"include_available_features": "1",
"include_entities": "1",
"reset_error_state": "false"
})
if after.len > 0:
params.add {"max_position": after}
let json = await fetchJson(url ? params, genHeaders(agent, url))
result = await finishTimeline(json, Query(), after, agent, media)
if result.content.len == 0:
return
result.minId = getLastId(result)
proc getListMembersSearch(username, list, after, agent: string): Future[Result[Profile]] {.async.} =
let
referer = base / (listMembersUrl % [username, list])
url = referer / "timeline"
headers = genHeaders({"x-push-with": "XMLHttpRequest"}, agent, referer, xml=true)
var params = toSeq({
"include_available_features": "1",
"include_entities": "1",
"reset_error_state": "false"
})
if after.len > 0:
params.add {"max_position": after}
let json = await fetchJson(url ? params, headers)
result = getResult[Profile](json, Query(kind: userList), after)
if json == nil or not json.hasKey("items_html"): return
let html = json["items_html"].to(string)
result.hasMore = html != "\n"
for p in parseHtml(html).selectAll(".account"):
result.content.add parseListProfile(p)
proc getListMembers*(username, list, after, agent: string): Future[Result[Profile]] {.async.} =
if after.len > 0:
return await getListMembersSearch(username, list, after, agent)
let
url = base / (listMembersUrl % [username, list])
html = await fetchHtml(url, genHeaders(agent, url))
result = Result[Profile](
minId: html.selectAttr(".stream-container", "data-min-position"),
hasMore: html.select(".has-more-items") != nil,
beginning: true,
query: Query(kind: userList),
content: html.selectAll(".account").map(parseListProfile)
)

View file

@ -1,159 +0,0 @@
import httpclient, asyncdispatch, times, sequtils, strutils, json, uri
import macros, options
import ".."/[types, parser, formatters, cache]
import utils, consts
var
guestToken = ""
tokenUses = 0
tokenMaxUses = 230
tokenUpdated: Time
tokenLifetime = initDuration(minutes=20)
macro genMediaGet(media: untyped; token=false) =
let
mediaName = capitalizeAscii($media)
multi = ident("get" & mediaName & "s")
convo = ident("getConversation" & mediaName & "s")
replies = ident("getReplies" & mediaName & "s")
single = ident("get" & mediaName)
quote do:
proc `multi`*(thread: Chain | Timeline; agent: string; token="") {.async.} =
if thread == nil: return
var `media` = thread.content.filterIt(it.`media`.isSome)
when `token`:
var gToken = token
if gToken.len == 0: gToken = await getGuestToken(agent)
await all(`media`.mapIt(`single`(it, token, agent)))
else:
await all(`media`.mapIt(`single`(it, agent)))
proc `replies`*(replies: Result[Chain]; agent: string; token="") {.async.} =
when `token`:
var gToken = token
if gToken.len == 0: gToken = await getGuestToken(agent)
await all(replies.content.mapIt(`multi`(it, agent, token=gToken)))
else:
await all(replies.content.mapIt(`multi`(it, agent)))
proc `convo`*(convo: Conversation; agent: string) {.async.} =
var futs: seq[Future[void]]
when `token`:
var token = await getGuestToken(agent)
futs.add `single`(convo.tweet, agent, token)
futs.add `multi`(convo.before, agent, token=token)
futs.add `multi`(convo.after, agent, token=token)
if convo.replies != nil:
futs.add `replies`(convo.replies, agent, token=token)
else:
futs.add `single`(convo.tweet, agent)
futs.add `multi`(convo.before, agent)
futs.add `multi`(convo.after, agent)
if convo.replies != nil:
futs.add `replies`(convo.replies, agent)
await all(futs)
proc getGuestToken(agent: string; force=false): Future[string] {.async.} =
if getTime() - tokenUpdated < tokenLifetime and
not force and tokenUses < tokenMaxUses:
return guestToken
tokenUpdated = getTime()
tokenUses = 0
let headers = genHeaders({"authorization": auth}, agent, base, lang=false)
newClient()
var res: string
try: res = await client.postContent($(apiBase / tokenUrl))
except: return
let json = parseJson(res)
if json != nil:
result = json["guest_token"].to(string)
guestToken = result
proc getVideoVar(tweet: Tweet): var Option[Video] =
if tweet.card.isSome():
return get(tweet.card).video
else:
return tweet.video
proc getVideoFetch(tweet: Tweet; agent, token: string; retry=true): Future[Option[Video]] {.async.} =
if tweet.video.isNone(): return
let
headers = genHeaders({"authorization": auth, "x-guest-token": token},
agent, base / getLink(tweet, focus=false), lang=false)
url = apiBase / (videoUrl % $tweet.id)
json = await fetchJson(url, headers)
if json == nil:
if not retry: return
if getTime() - tokenUpdated > initDuration(seconds=1):
tokenUpdated = getTime()
discard await getGuestToken(agent, force=true)
return await getVideoFetch(tweet, agent, guestToken, retry=false)
var video = parseVideo(json, tweet.id)
video.title = get(tweet.video).title
video.description = get(tweet.video).description
cache(video)
result = some video
tokenUses.inc
proc videoIsInvalid(video: Video): bool =
not video.available and video.url.len == 0
proc getVideo*(tweet: Tweet; agent, token: string; force=false) {.async.} =
let token = if token.len == 0: guestToken else: token
var video = getCachedVideo(tweet.id)
if video.isNone:
video = await getVideoFetch(tweet, agent, token)
elif videoIsInvalid(get(video)) and tweet.gif.isSome:
# gif was mistakenly parsed as a gif
uncache(tweet.id)
return
getVideoVar(tweet) = video
if tweet.card.isSome: tweet.video = none Video
proc getPoll*(tweet: Tweet; agent: string) {.async.} =
if tweet.poll.isNone(): return
let
headers = genHeaders(agent, base / getLink(tweet, focus=false), auth=true)
url = base / (pollUrl % $tweet.id)
html = await fetchHtml(url, headers)
if html == nil: return
tweet.poll = some parsePoll(html)
proc getCard*(tweet: Tweet; agent: string) {.async.} =
if tweet.card.isNone(): return
let
headers = genHeaders(agent, base / getLink(tweet, focus=false), auth=true)
query = get(tweet.card).query.replace("sensitive=true", "sensitive=false")
html = await fetchHtml(base / query, headers)
if html == nil: return
parseCard(get(tweet.card), html)
proc getPhotoRail*(username, agent: string; skip=false): Future[seq[GalleryPhoto]] {.async.} =
if skip: return
let
headers = genHeaders(agent, base / username, xml=true)
params = {"for_photo_rail": "true", "oldest_unread_id": "0"}
url = base / (timelineMediaUrl % username) ? params
html = await fetchHtml(url, headers, jsonKey="items_html")
result = parsePhotoRail(html)
genMediaGet(video, token=true)
genMediaGet(poll)
genMediaGet(card)

View file

@ -1,41 +0,0 @@
import httpclient, asyncdispatch, times, strutils, uri
import ".."/[types, parser, parserutils]
import utils, consts
proc getProfileFallback(username: string; headers: HttpHeaders): Future[Profile] {.async.} =
let url = base / profileIntentUrl ? {"screen_name": username}
let html = await fetchHtml(url, headers)
if html == nil: return Profile()
result = parseIntentProfile(html)
proc getProfile*(username, agent: string): Future[Profile] {.async.} =
let
headers = genHeaders(agent, base / username, xml=true)
params = {
"screen_name": username,
"wants_hovercard": "true",
"_": $(epochTime().int)
}
url = base / profilePopupUrl ? params
html = await fetchHtml(url, headers, jsonKey="html")
if html == nil: return Profile()
if html.select(".ProfileCard-sensitiveWarningContainer") != nil:
return await getProfileFallback(username, headers)
result = parsePopupProfile(html)
proc getProfileFull*(username, agent: string): Future[Profile] {.async.} =
let
url = base / username
headers = genHeaders(agent, url, auth=true)
html = await fetchHtml(url, headers)
if html == nil: return
result = parseTimelineProfile(html)

View file

@ -1,13 +0,0 @@
import asyncdispatch, httpclient
import ".."/[formatters, types]
proc resolve*(url: string; prefs: Prefs): Future[string] {.async.} =
let client = newAsyncHttpClient(maxRedirects=0)
try:
let resp = await client.request(url, $HttpHead)
result = resp.headers["location"].replaceUrl(prefs)
except:
discard
finally:
client.close()

View file

@ -1,53 +0,0 @@
import httpclient, asyncdispatch, htmlparser
import strutils, json, xmltree, uri
import ".."/[types, parser, parserutils, query]
import utils, consts, timeline
proc getResult*[T](json: JsonNode; query: Query; after: string): Result[T] =
if json == nil: return Result[T](beginning: true, query: query)
Result[T](
hasMore: json{"has_more_items"}.getBool(false),
maxId: json{"max_position"}.getStr,
minId: json{"min_position"}.getStr,
query: query,
beginning: after.len == 0
)
proc getSearch*[T](query: Query; after, agent: string;
media=true): Future[Result[T]] {.async.} =
let
kind = if query.kind == users: "users" else: "tweets"
param = genQueryParam(query)
encoded = encodeUrl(param, usePlus=false)
referer = base / ("search?f=$1&q=$2&src=typd" % [kind, encoded])
headers = genHeaders(agent, referer, auth=true, xml=true)
params = {
"f": kind,
"vertical": "default",
"q": param,
"src": "typd",
"include_available_features": "1",
"include_entities": "1",
"max_position": if after.len > 0: after else: "0",
"reset_error_state": "false"
}
if param in ["include:nativeretweets", "-filter:nativeretweets", ""]:
return Result[T](query: query, beginning: true)
let json = await fetchJson(base / searchUrl ? params, headers)
result = getResult[T](json, query, after)
if json == nil or not json.hasKey("items_html"): return
when T is Tweet:
result = await finishTimeline(json, query, after, agent, media)
elif T is Profile:
let html = json["items_html"].to(string)
result.hasMore = html != "\n"
for p in parseHtml(html).selectAll(".js-stream-item"):
result.content.add parsePopupProfile(p, ".ProfileCard")

View file

@ -1,75 +0,0 @@
import httpclient, asyncdispatch, htmlparser, strformat
import sequtils, strutils, json, uri
import ".."/[types, parser, parserutils, formatters, query]
import utils, consts, media, search
proc getMedia(thread: Chain | Timeline; agent: string) {.async.} =
await all(getVideos(thread, agent),
getCards(thread, agent),
getPolls(thread, agent))
proc finishTimeline*(json: JsonNode; query: Query; after, agent: string;
media=true): Future[Timeline] {.async.} =
result = getResult[Tweet](json, query, after)
if json == nil: return
if json["new_latent_count"].to(int) == 0: return
if not json.hasKey("items_html"): return
let html = parseHtml(json["items_html"].to(string))
let timeline = parseChain(html)
if media: await getMedia(timeline, agent)
result.content = timeline.content
proc getProfileAndTimeline*(username, after, agent: string;
media=true): Future[(Profile, Timeline)] {.async.} =
var url = base / username
if after.len > 0:
url = url ? {"max_position": after}
let
headers = genHeaders(agent, base / username, auth=true)
html = await fetchHtml(url, headers)
timeline = parseTimeline(html.select("#timeline > .stream-container"), after)
profile = parseTimelineProfile(html)
if media and profile.username.len > 0: await getMedia(timeline, agent)
result = (profile, timeline)
proc getTimeline*(username, after, agent: string;
media=true): Future[Timeline] {.async.} =
var params = toSeq({
"include_available_features": "1",
"include_entities": "1",
"include_new_items_bar": "false",
"reset_error_state": "false"
})
if after.len > 0:
params.add {"max_position": after}
let headers = genHeaders(agent, base / username, xml=true)
let json = await fetchJson(base / (timelineUrl % username) ? params, headers)
result = await finishTimeline(json, Query(), after, agent, media)
proc getMediaTimeline*(username, after, agent: string;
media=true): Future[Timeline] {.async.} =
var params = toSeq({
"include_available_features": "1",
"include_entities": "1",
"reset_error_state": "false"
})
if after.len > 0:
params.add {"max_position": after}
let
headers = genHeaders(agent, base / username, xml=true)
json = await fetchJson(base / (timelineMediaUrl % username) ? params, headers)
query = Query(kind: QueryKind.media)
result = await finishTimeline(json, query, after, agent, media)
result.minId = getLastId(result)

View file

@ -1,61 +0,0 @@
import asyncdispatch, strutils, uri, httpclient, json, xmltree, htmlparser
import ".."/[types, parser]
import utils, consts, media
proc getTweet*(username, id, after, agent: string): Future[Conversation] {.async.} =
let
headers = genHeaders({
"pragma": "no-cache",
"x-previous-page-name": "profile",
"accept": htmlAccept
}, agent, base, xml=true)
url = base / username / tweetUrl / id ? {"max_position": after}
newClient()
var html: XmlNode
try:
let resp = await client.get($url)
if resp.code == Http403 and "suspended" in (await resp.body):
return Conversation(tweet: Tweet(tombstone: "User has been suspended"))
html = parseHtml(await resp.body)
except:
discard
if html == nil: return
result = parseConversation(html, after)
await all(getConversationVideos(result, agent),
getConversationCards(result, agent),
getConversationPolls(result, agent))
proc getReplies*(username, id, after, agent: string): Future[Result[Chain]] {.async.} =
let
headers = genHeaders({
"pragma": "no-cache",
"x-previous-page-name": "permalink",
"accept": htmlAccept
}, agent, base, xml=true)
params = {
"include_available_features": "1",
"include_entities": "1",
"max_position": after,
}
url = base / (repliesUrl % [username, id]) ? params
let json = await fetchJson(url, headers)
if json == nil or not json.hasKey("items_html"): return
let html = parseHtml(json{"items_html"}.getStr)
result = parseReplies(html)
result.minId = json{"min_position"}.getStr(result.minId)
if result.minId.len > 0:
result.hasMore = true
await all(getRepliesVideos(result, agent),
getRepliesCards(result, agent),
getRepliesPolls(result, agent))

View file

@ -1,64 +0,0 @@
import httpclient, asyncdispatch, htmlparser, options
import strutils, json, xmltree, uri
import ../types
import consts
proc genHeaders*(headers: openArray[tuple[key: string, val: string]];
agent: string; referer: Uri; lang=true;
auth=false; xml=false): HttpHeaders =
result = newHttpHeaders({
"referer": $referer,
"user-agent": agent,
"x-twitter-active-user": "yes",
})
if auth: result["authority"] = "twitter.com"
if lang: result["accept-language"] = consts.lang
if xml: result["x-requested-with"] = "XMLHttpRequest"
for (key, val) in headers:
result[key] = val
proc genHeaders*(agent: string; referer: Uri; lang=true;
auth=false; xml=false): HttpHeaders =
genHeaders([], agent, referer, lang, auth, xml)
template newClient*() {.dirty.} =
var client = newAsyncHttpClient()
defer:
try: client.close()
except: discard
client.headers = headers
proc fetchHtml*(url: Uri; headers: HttpHeaders; jsonKey = ""): Future[XmlNode] {.async.} =
headers["accept"] = htmlAccept
newClient()
var resp = ""
try:
resp = await client.getContent($url)
except:
return nil
if jsonKey.len > 0:
resp = parseJson(resp)[jsonKey].str
return parseHtml(resp)
proc fetchJson*(url: Uri; headers: HttpHeaders): Future[JsonNode] {.async.} =
headers["accept"] = jsonAccept
newClient()
try:
let resp = await client.getContent($url)
result = parseJson(resp)
except:
return nil
proc getLastId*(tweets: Result[Tweet]): string =
if tweets.content.len == 0: return
let last = tweets.content[^1]
if last.retweet.isNone:
$last.id
else:
$(get(last.retweet).id)

View file

@ -1,91 +0,0 @@
import asyncdispatch, times, strutils
import norm/sqlite
import types, api/profile
template safeAddColumn(field: typedesc): untyped =
try: field.addColumn
except DbError: discard
dbFromTypes("cache.db", "", "", "", [Profile, Video])
withDb:
Video.createTable(force=true)
try: Profile.createTable()
except DbError: discard
safeAddColumn Profile.lowername
safeAddColumn Profile.suspended
var profileCacheTime = initDuration(minutes=10)
proc isOutdated*(profile: Profile): bool =
getTime() - profile.updated > profileCacheTime
proc cache*(profile: var Profile) =
withDb:
try:
let p = Profile.getOne("lowername = ?", profile.lowername)
profile.id = p.id
profile.update()
except KeyError:
if profile.username.len > 0:
profile.insert()
proc hasCachedProfile*(username: string): Option[Profile] =
withDb:
try:
let p = Profile.getOne("lowername = ?", toLower(username))
doAssert not p.isOutdated
result = some p
except AssertionError, KeyError:
result = none Profile
proc getCachedProfile*(username, agent: string;
force=false): Future[Profile] {.async.} =
withDb:
try:
result.getOne("lowername = ?", toLower(username))
doAssert not result.isOutdated
except AssertionError, KeyError:
result = await getProfileFull(username, agent)
cache(result)
proc setProfileCacheTime*(minutes: int) =
profileCacheTime = initDuration(minutes=minutes)
proc cache*(video: var Video) =
withDb:
try:
let v = Video.getOne("videoId = ?", video.videoId)
video.id = v.id
video.update()
except KeyError:
if video.videoId.len > 0:
video.insert()
proc uncache*(id: int64) =
withDb:
try:
var video = Video.getOne("videoId = ?", $id)
video.delete()
except:
discard
proc getCachedVideo*(id: int64): Option[Video] =
withDb:
try:
return some Video.getOne("videoId = ?", $id)
except KeyError:
return none Video
proc cacheCleaner*() {.async.} =
while true:
await sleepAsync(profileCacheTime.inMilliseconds.int)
withDb:
let up = "updated<" & $toUnix(getTime() - profileCacheTime)
var profiles = Profile.getMany(10000, cond=up)
var videos = Video.getMany(10000, cond=up)
transaction:
for p in profiles.mitems: delete(p)
for v in videos.mitems: delete(v)

View file

@ -1,290 +0,0 @@
import xmltree, sequtils, strutils, json, options
import types, parserutils, formatters
proc parseJsonData*(node: XmlNode): JsonNode =
let jsonData = node.selectAttr("input.json-data", "value")
if jsonData.len > 0:
return parseJson(jsonData)
proc parseTimelineProfile*(node: XmlNode): Profile =
let profile = node.select(".ProfileHeaderCard")
if profile == nil:
let data = parseJsonData(node)
if data != nil and data{"sectionName"}.getStr == "suspended":
let username = data{"internalReferer"}.getStr.strip(chars={'/'})
return Profile(username: username, suspended: true)
return
let pre = ".ProfileHeaderCard-"
let username = profile.getUsername(pre & "screenname")
result = Profile(
fullname: profile.getName(pre & "nameLink"),
username: username,
lowername: toLower(username),
joinDate: profile.getDate(pre & "joinDateText"),
website: profile.selectAttr(pre & "urlText a", "title"),
bio: profile.getBio(pre & "bio"),
location: getLocation(profile),
userpic: node.getAvatar(".profile-picture img"),
verified: isVerified(profile),
protected: isProtected(profile),
banner: getTimelineBanner(node),
media: getMediaCount(node)
)
result.getProfileStats(node.select(".ProfileNav-list"))
proc parsePopupProfile*(node: XmlNode; selector=".profile-card"): Profile =
let profile = node.select(selector)
if profile == nil: return
let username = profile.getUsername(".username")
result = Profile(
fullname: profile.getName(".fullname"),
username: username,
lowername: toLower(username),
bio: profile.getBio(".bio", fallback=".ProfileCard-bio"),
userpic: profile.getAvatar(".ProfileCard-avatarImage"),
verified: isVerified(profile),
protected: isProtected(profile),
banner: getBanner(profile)
)
result.getPopupStats(profile)
proc parseListProfile*(profile: XmlNode): Profile =
result = Profile(
fullname: profile.getName(".fullname"),
username: profile.getUsername(".username"),
bio: profile.getBio(".bio").stripText(),
userpic: profile.getAvatar(".avatar"),
verified: isVerified(profile),
protected: isProtected(profile),
)
proc parseIntentProfile*(profile: XmlNode): Profile =
result = Profile(
fullname: profile.getName("a.fn.url.alternate-context"),
username: profile.getUsername(".nickname"),
bio: profile.getBio("p.note"),
userpic: profile.select(".profile.summary").getAvatar("img.photo"),
verified: profile.select("li.verified") != nil,
protected: profile.select("li.protected") != nil,
banner: getBanner(profile)
)
result.getIntentStats(profile)
proc parseTweetProfile*(profile: XmlNode): Profile =
result = Profile(
fullname: profile.attr("data-name").stripText(),
username: profile.attr("data-screen-name"),
userpic: profile.getAvatar(".avatar"),
verified: isVerified(profile)
)
proc parseQuote*(quote: XmlNode): Quote =
result = Quote(
id: parseBiggestInt(quote.attr("data-item-id")),
text: getQuoteText(quote),
reply: parseTweetReply(quote),
hasThread: quote.select(".self-thread-context") != nil,
available: true
)
result.profile = Profile(
fullname: quote.selectText(".QuoteTweet-fullname").stripText(),
username: quote.attr("data-screen-name"),
verified: isVerified(quote)
)
result.getQuoteMedia(quote)
proc parseTweet*(node: XmlNode): Tweet =
if node == nil:
return Tweet()
if "withheld" in node.attr("class"):
return Tweet(tombstone: getTombstone(node.selectText(".Tombstone-label")))
let tweet = node.select(".tweet")
if tweet == nil:
return Tweet()
result = Tweet(
id: parseBiggestInt(tweet.attr("data-item-id")),
threadId: parseBiggestInt(tweet.attr("data-conversation-id")),
text: getTweetText(tweet),
time: getTimestamp(tweet),
shortTime: getShortTime(tweet),
profile: parseTweetProfile(tweet),
stats: parseTweetStats(tweet),
reply: parseTweetReply(tweet),
mediaTags: getMediaTags(tweet),
location: getTweetLocation(tweet),
hasThread: tweet.select(".content > .self-thread-context") != nil,
pinned: "pinned" in tweet.attr("class"),
available: true
)
result.getTweetMedia(tweet)
result.getTweetCard(tweet)
let by = tweet.selectText(".js-retweet-text > a > b")
if by.len > 0:
result.retweet = some Retweet(
by: stripText(by),
id: parseBiggestInt(tweet.attr("data-retweet-id"))
)
let quote = tweet.select(".QuoteTweet-innerContainer")
if quote != nil:
result.quote = some parseQuote(quote)
let tombstone = tweet.select(".Tombstone")
if tombstone != nil:
if "unavailable" in tombstone.innerText():
let quote = Quote(tombstone: getTombstone(node.selectText(".Tombstone-label")))
result.quote = some quote
proc parseChain*(nodes: XmlNode): Chain =
if nodes == nil: return
result = Chain()
for n in nodes.filterIt(it.kind != xnText):
let class = n.attr("class").toLower()
if "tombstone" in class or "unavailable" in class or "withheld" in class:
result.content.add Tweet()
elif "morereplies" in class:
result.more = getMoreReplies(n)
else:
result.content.add parseTweet(n)
proc parseReplies*(replies: XmlNode; skipFirst=false): Result[Chain] =
new(result)
for i, reply in replies.filterIt(it.kind != xnText):
if skipFirst and i == 0: continue
let class = reply.attr("class").toLower()
if "lone" in class:
result.content.add parseChain(reply)
elif "showmore" in class:
result.minId = reply.selectAttr("button", "data-cursor")
result.hasMore = true
else:
result.content.add parseChain(reply.select(".stream-items"))
proc parseConversation*(node: XmlNode; after: string): Conversation =
let tweet = node.select(".permalink-tweet-container")
if tweet == nil:
return Conversation(tweet: parseTweet(node.select(".permalink-tweet-withheld")))
result = Conversation(
tweet: parseTweet(tweet),
before: parseChain(node.select(".in-reply-to .stream-items")),
)
if result.before != nil:
let maxId = node.selectAttr(".in-reply-to .stream-container", "data-max-position")
if maxId.len > 0:
result.before.more = -1
let replies = node.select(".replies-to .stream-items")
if replies == nil: return
let nodes = replies.filterIt(it.kind != xnText and "self" in it.attr("class"))
if nodes.len > 0 and "self" in nodes[0].attr("class"):
result.after = parseChain(nodes[0].select(".stream-items"))
result.replies = parseReplies(replies, result.after != nil)
result.replies.beginning = after.len == 0
if result.replies.minId.len == 0:
result.replies.minId = node.selectAttr(".replies-to .stream-container", "data-min-position")
result.replies.hasMore = node.select(".stream-footer .has-more-items") != nil
proc parseTimeline*(node: XmlNode; after: string): Timeline =
if node == nil: return Timeline()
result = Timeline(
content: parseChain(node.select(".stream > .stream-items")).content,
minId: node.attr("data-min-position"),
maxId: node.attr("data-max-position"),
hasMore: node.select(".has-more-items") != nil,
beginning: after.len == 0
)
proc parseVideo*(node: JsonNode; tweetId: int64): Video =
let
track = node{"track"}
cType = track["contentType"].to(string)
pType = track["playbackType"].to(string)
case cType
of "media_entity":
result = Video(
playbackType: if "mp4" in pType: mp4 else: m3u8,
contentId: track["contentId"].to(string),
durationMs: track["durationMs"].to(int),
views: track["viewCount"].to(string),
url: track["playbackUrl"].to(string),
available: track{"mediaAvailability"}["status"].to(string) == "available",
reason: track{"mediaAvailability"}["reason"].to(string))
of "vmap":
result = Video(
playbackType: vmap,
durationMs: track.getOrDefault("durationMs").getInt(0),
url: track["vmapUrl"].to(string),
available: true)
else:
echo "Can't parse video of type ", cType, " ", tweetId
result.videoId = $tweetId
result.thumb = node["posterImage"].to(string)
proc parsePoll*(node: XmlNode): Poll =
let
choices = node.selectAll(".PollXChoice-choice")
votes = node.selectText(".PollXChoice-footer--total")
result.votes = votes.strip().split(" ")[0]
result.status = node.selectText(".PollXChoice-footer--time")
for choice in choices:
for span in choice.select(".PollXChoice-choice--text").filterIt(it.kind != xnText):
if span.attr("class").len == 0:
result.options.add span.innerText()
elif "progress" in span.attr("class"):
result.values.add parseInt(span.innerText()[0 .. ^2])
var highest = 0
for i, n in result.values:
if n > highest:
highest = n
result.leader = i
proc parsePhotoRail*(node: XmlNode): seq[GalleryPhoto] =
for img in node.selectAll(".tweet-media-img-placeholder"):
result.add GalleryPhoto(
url: img.attr("data-image-url"),
tweetId: img.attr("data-tweet-id"),
color: img.attr("background-color").replace("style: ", "")
)
proc parseCard*(card: var Card; node: XmlNode) =
card.title = node.selectText("h2.TwitterCard-title")
card.text = node.selectText("p.tcu-resetMargin")
card.dest = node.selectText("span.SummaryCard-destination")
if card.url.len == 0:
card.url = node.selectAttr("a", "href")
if card.url.len == 0:
card.url = node.selectAttr(".ConvoCard-thankYouContent", "data-thank-you-url")
let image = node.select(".tcu-imageWrapper img")
if image != nil:
# workaround for issue 11713
card.image = some image.attr("data-src").replace("gname", "g&name")
if card.kind == liveEvent:
card.text = card.title
card.title = node.selectText(".TwitterCard-attribution--category")

View file

@ -1,294 +0,0 @@
import xmltree, times, uri, options, json
import strtabs, strformat, strutils, sequtils
import regex
import types, formatters
from q import nil
from htmlgen import a
const
thumbRegex = re".+:url\('([^']+)'\)"
gifRegex = re".+thumb/([^\.']+)\.[jpng].*"
reColor = re"a:active \{\n +color: (#[A-Z0-9]+)"
proc selectAll*(node: XmlNode; selector: string): seq[XmlNode] =
if node == nil: return
q.select(node, selector)
proc select*(node: XmlNode; selector: string): XmlNode =
if node == nil: return
let nodes = node.selectAll(selector)
if nodes.len > 0: nodes[0] else: nil
proc selectAttr*(node: XmlNode; selector: string; attr: string): string =
let res = node.select(selector)
if res == nil: "" else: res.attr(attr)
proc selectText*(node: XmlNode; selector: string): string =
let res = node.select(selector)
result = if res == nil: "" else: res.innerText()
proc getHeader(profile: XmlNode): XmlNode =
result = profile.select(".permalink-header")
if result == nil:
result = profile.select(".stream-item-header")
if result == nil:
result = profile.select(".ProfileCard-userFields")
if result == nil:
result = profile
proc isVerified*(profile: XmlNode): bool =
getHeader(profile).select(".Icon.Icon--verified") != nil
proc isProtected*(profile: XmlNode): bool =
getHeader(profile).select(".Icon.Icon--protected") != nil
proc parseText*(text: XmlNode; skipLink=""): string =
if text == nil: return
for el in text:
case el.kind
of xnText:
result.add el
of xnElement:
if el.attrs == nil:
if el.tag == "strong":
result.add $el
continue
let class = el.attr("class")
if "data-expanded-url" in el.attrs:
let url = el.attr("data-expanded-url")
if url == skipLink: continue
if "u-hidden" in class and result.len > 0:
result.add "\n"
result.add a(shortLink(url), href=url)
elif "ashtag" in class or "hashflag" in class:
let hash = el.innerText()
result.add a(hash, href=("/search?q=" & encodeUrl(hash)))
elif "atreply" in class:
result.add a(el.innerText(), href=el.attr("href"))
elif "Emoji" in class:
result.add el.attr("alt")
else: discard
proc getQuoteText*(tweet: XmlNode): string =
parseText(tweet.select(".QuoteTweet-text"))
proc getTweetText*(tweet: XmlNode): string =
let
quote = tweet.select(".QuoteTweet")
text = tweet.select(".tweet-text")
link = text.selectAttr("a.twitter-timeline-link.u-hidden", "data-expanded-url")
parseText(text, if quote != nil: link else: "")
proc getTimestamp*(tweet: XmlNode): Time =
let time = tweet.selectAttr(".js-short-timestamp", "data-time")
fromUnix(if time.len > 0: parseBiggestInt(time) else: 0)
proc getShortTime*(tweet: XmlNode): string =
tweet.selectText(".js-short-timestamp")
proc getDate*(node: XmlNode; selector: string): Time =
let date = node.select(selector)
if date == nil: return
parseTime(date.attr("title"), "h:mm tt - d MMM YYYY", utc())
proc getName*(profile: XmlNode; selector: string): string =
profile.selectText(selector).stripText()
proc getUsername*(profile: XmlNode; selector: string): string =
profile.selectText(selector).strip(chars={'@', ' ', '\n'})
proc getBio*(profile: XmlNode; selector: string; fallback=""): string =
var bio = profile.select(selector)
if bio == nil and fallback.len > 0:
bio = profile.select(fallback)
parseText(bio)
proc getLocation*(profile: XmlNode): string =
let sel = ".ProfileHeaderCard-locationText"
result = profile.selectText(sel).stripText()
let link = profile.selectAttr(sel & " a", "data-place-id")
if link.len > 0:
result &= ":" & link
proc getAvatar*(profile: XmlNode; selector: string): string =
profile.selectAttr(selector, "src").getUserpic()
proc getBanner*(node: XmlNode): string =
let url = node.selectAttr("svg > image", "xlink:href")
if url.len > 0:
result = url.replace("600x200", "1500x500")
else:
result = node.selectAttr(".ProfileCard-bg", "style")
result = result.replace("background-color: ", "")
if result.len == 0:
result = "#161616"
proc getTimelineBanner*(node: XmlNode): string =
let banner = node.select(".ProfileCanopy-headerBg img")
let img = banner.attr("src")
if img.len > 0:
return img
let style = node.select("style").innerText()
var m: RegexMatch
if style.find(reColor, m):
return style[m.group(0)[0]]
proc getMediaCount*(node: XmlNode): string =
let text = node.selectText(".PhotoRail-headingWithCount")
return text.stripText().split(" ")[0]
proc getProfileStats*(profile: var Profile; node: XmlNode) =
for s in node.selectAll( ".ProfileNav-stat"):
let text = s.attr("title").split(" ")[0]
case s.attr("data-nav")
of "followers": profile.followers = text
of "following": profile.following = text
of "favorites": profile.likes = text
of "tweets": profile.tweets = text
proc getPopupStats*(profile: var Profile; node: XmlNode) =
for s in node.selectAll( ".ProfileCardStats-statLink"):
let text = s.attr("title").split(" ")[0]
case s.attr("href").split("/")[^1]
of "followers": profile.followers = text
of "following": profile.following = text
else: profile.tweets = text
proc getIntentStats*(profile: var Profile; node: XmlNode) =
profile.tweets = "?"
for s in node.selectAll( "dd.count > a"):
let text = s.innerText()
case s.attr("href").split("/")[^1]
of "followers": profile.followers = text
of "following": profile.following = text
proc parseTweetStats*(node: XmlNode): TweetStats =
result = TweetStats()
for action in node.selectAll(".ProfileTweet-actionCountForAria"):
let text = action.innerText.split()
case text[1][0 .. 2]
of "ret": result.retweets = text[0].parseInt
of "rep": result.replies = text[0].parseInt
of "lik": result.likes = text[0].parseInt
proc parseTweetReply*(node: XmlNode): seq[string] =
let reply = node.select(".ReplyingToContextBelowAuthor")
if reply == nil: return
let selector = if "Quote" in node.attr("class"): "b"
else: "a b"
result = reply.selectAll(selector).map(innerText)
proc getGif(player: XmlNode): Gif =
let
thumb = player.attr("style").replace(thumbRegex, "$1")
id = thumb.replace(gifRegex, "$1")
url = &"https://video.twimg.com/tweet_video/{id}.mp4"
Gif(url: url, thumb: thumb)
proc getTweetMedia*(tweet: Tweet; node: XmlNode) =
for photo in node.selectAll(".AdaptiveMedia-photoContainer"):
tweet.photos.add photo.attrs["data-image-url"]
let player = node.select(".PlayableMedia")
if player == nil: return
let attrib = player.select(".PlayableMedia-attribution")
if attrib != nil:
tweet.attribution = some Profile(
username: attrib.attr("href").strip(chars={'/'}),
fullname: attrib.selectText(".fullname"),
userpic: attrib.selectAttr(".avatar", "src")
)
if "gif" in player.attr("class"):
tweet.gif = some getGif(player.select(".PlayableMedia-player"))
elif "video" in player.attr("class"):
let
thumb = player.selectAttr(".PlayableMedia-player", "style").split("'")
desc = player.selectText(".PlayableMedia-description")
title = player.selectText(".PlayableMedia-title")
var video = Video(title: title, description: desc)
if thumb.len > 1:
video.thumb = thumb[^2]
tweet.video = some video
proc getQuoteMedia*(quote: var Quote; node: XmlNode) =
if node.select(".QuoteTweet--sensitive") != nil:
quote.sensitive = true
return
let media = node.select(".QuoteMedia")
if media != nil:
quote.thumb = media.selectAttr("img", "src")
let badge = node.select(".AdaptiveMedia-badgeText")
let gifBadge = node.select(".Icon--gifBadge")
if badge != nil:
quote.badge = badge.innerText()
elif gifBadge != nil:
quote.badge = "GIF"
proc getTweetCard*(tweet: Tweet; node: XmlNode) =
if node.attr("data-has-cards") == "false": return
var cardType = node.attr("data-card2-type")
if ":" in cardType:
cardType = cardType.split(":")[^1]
if "poll" in cardType:
tweet.poll = some Poll()
return
if "message_me" in cardType:
return
let cardDiv = node.select(".card2 > .js-macaw-cards-iframe-container")
if cardDiv == nil: return
var card = Card(
id: $tweet.id,
query: cardDiv.attr("data-src")
)
try:
card.kind = parseEnum[CardKind](cardType)
except ValueError:
card.kind = summary
let cardUrl = cardDiv.attr("data-card-url")
for n in node.selectAll(".tweet-text a"):
if n.attr("href") == cardUrl:
card.url = n.attr("data-expanded-url")
tweet.card = some card
proc getMoreReplies*(node: XmlNode): int64 =
let text = node.innerText().strip()
try:
result = parseBiggestInt(text.split(" ")[0])
except:
result = -1
proc getMediaTags*(node: XmlNode): seq[Profile] =
let usernames = node.attr("data-tagged")
if usernames.len == 0: return
let users = parseJson(node.attr("data-reply-to-users-json"))
for user in users:
let un = user["screen_name"].getStr
if un notin usernames: continue
result.add Profile(username: un, fullname: user["name"].getStr)
proc getTweetLocation*(node: XmlNode): string =
let geo = node.select(".js-geo-pivot-link")
if geo == nil: return
result = geo.innerText().stripText()
result &= ":" & geo.attr("data-place-id")