WIP tweets/timeline parser

This commit is contained in:
Zed 2022-01-30 23:38:39 +01:00
parent 266e0a0082
commit c9b261a793
13 changed files with 359 additions and 50 deletions

View file

@ -89,8 +89,8 @@ proc getSearch*[T](query: Query; after=""): Future[Result[T]] {.async.} =
else: else:
const const
searchMode = ("tweet_search_mode", "live") searchMode = ("tweet_search_mode", "live")
parse = parseTimeline parse = parseTweets
fetchFunc = fetch fetchFunc = fetchRaw
let q = genQueryParam(query) let q = genQueryParam(query)
if q.len == 0 or q == emptyQuery: if q.len == 0 or q == emptyQuery:

View file

@ -0,0 +1,44 @@
import std/[json, strutils, times, math]
import utils
import ".."/types/[media, tweet]
from ../../types import Poll, Gif, Video, VideoVariant, VideoType
proc parseVideo*(entity: Entity): Video =
result = Video(
thumb: entity.mediaUrlHttps.getImageUrl,
views: entity.ext.mediaStats{"r", "ok", "viewCount"}.getStr,
available: entity.extMediaAvailability.status == "available",
title: entity.extAltText,
durationMs: entity.videoInfo.durationMillis,
description: entity.additionalMediaInfo.description,
variants: entity.videoInfo.variants
# playbackType: mp4
)
if entity.additionalMediaInfo.title.len > 0:
result.title = entity.additionalMediaInfo.title
proc parseGif*(entity: Entity): Gif =
result = Gif(
url: entity.videoInfo.variants[0].url.getImageUrl,
thumb: entity.getImageUrl
)
proc parsePoll*(card: Card): Poll =
let vals = card.bindingValues
# name format is pollNchoice_*
for i in '1' .. card.name[4]:
let choice = "choice" & i
result.values.add parseInt(vals{choice & "_count", "string_value"}.getStr("0"))
result.options.add vals{choice & "_label", "string_value"}.getStr
let time = vals{"end_datetime_utc", "string_value"}.getStr.parseIsoDate
if time > now():
let timeLeft = $(time - now())
result.status = timeLeft[0 ..< timeLeft.find(",")]
else:
result.status = "Final results"
result.leader = result.values.find(max(result.values))
result.votes = result.values.sum

View file

@ -1,15 +1,14 @@
import std/[macros, htmlgen, unicode] import std/[macros, htmlgen, unicode]
import ../types/common
import ".."/../[formatters, utils] import ".."/../[formatters, utils]
type type
ReplaceSliceKind = enum ReplaceSliceKind* = enum
rkRemove, rkUrl, rkHashtag, rkMention rkRemove, rkUrl, rkHashtag, rkMention
ReplaceSlice* = object ReplaceSlice* = object
slice: Slice[int] slice*: Slice[int]
kind: ReplaceSliceKind kind*: ReplaceSliceKind
url, display: string url*, display*: string
proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b) proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
@ -27,11 +26,14 @@ proc dedupSlices*(s: var seq[ReplaceSlice]) =
inc j inc j
inc i inc i
proc extractUrls*(result: var seq[ReplaceSlice]; url: Url; proc extractHashtags*(result: var seq[ReplaceSlice]; slice: Slice[int]) =
textLen: int; hideTwitter = false) = result.add ReplaceSlice(kind: rkHashtag, slice: slice)
proc extractUrls*[T](result: var seq[ReplaceSlice]; entity: T;
textLen: int; hideTwitter = false) =
let let
link = url.expandedUrl link = entity.expandedUrl
slice = url.indices[0] ..< url.indices[1] slice = entity.indices
if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl: if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl:
if slice.a < textLen: if slice.a < textLen:

View file

@ -1,13 +1,50 @@
import std/[strutils, tables] import std/[strutils, tables, options]
import jsony import jsony
import user, ../types/timeline import user, tweet, utils, ../types/timeline
from ../../types import Result, User from ../../types import Result, User, Tweet
proc parseHook(s: string; i: var int; v: var Slice[int]) =
var slice: array[2, int]
parseHook(s, i, slice)
v = slice[0] ..< slice[1]
proc getId(id: string): string {.inline.} = proc getId(id: string): string {.inline.} =
let start = id.rfind("-") let start = id.rfind("-")
if start < 0: return id if start < 0: return id
id[start + 1 ..< id.len] id[start + 1 ..< id.len]
proc processTweet(id: string; objects: GlobalObjects;
userCache: var Table[string, User]): Tweet =
let raw = objects.tweets[id]
result = toTweet raw
let uid = result.user.id
if uid.len > 0 and uid in objects.users:
if uid notin userCache:
userCache[uid] = toUser objects.users[uid]
result.user = userCache[uid]
let rtId = raw.retweetedStatusIdStr
if rtId.len > 0:
if rtId in objects.tweets:
result.retweet = some processTweet(rtId, objects, userCache)
else:
result.retweet = some Tweet(id: rtId.toId)
let qId = raw.quotedStatusIdStr
if qId.len > 0:
if qId in objects.tweets:
result.quote = some processTweet(qId, objects, userCache)
else:
result.quote = some Tweet(id: qId.toId)
proc parseCursor[T](e: Entry; result: var Result[T]) =
let cursor = e.content.operation.cursor
if cursor.cursorType == "Top":
result.top = cursor.value
elif cursor.cursorType == "Bottom":
result.bottom = cursor.value
proc parseUsers*(json: string; after=""): Result[User] = proc parseUsers*(json: string; after=""): Result[User] =
result = Result[User](beginning: after.len == 0) result = Result[User](beginning: after.len == 0)
@ -16,13 +53,32 @@ proc parseUsers*(json: string; after=""): Result[User] =
return return
for e in raw.timeline.instructions[0].addEntries.entries: for e in raw.timeline.instructions[0].addEntries.entries:
let id = e.entryId.getId let
if e.entryId.startsWith("user"): eId = e.entryId
id = eId.getId
if eId.startsWith("user") or eId.startsWith("sq-U"):
if id in raw.globalObjects.users: if id in raw.globalObjects.users:
result.content.add toUser raw.globalObjects.users[id] result.content.add toUser raw.globalObjects.users[id]
elif e.entryId.startsWith("cursor"): elif eId.startsWith("cursor") or eId.startsWith("sq-C"):
let cursor = e.content.operation.cursor parseCursor(e, result)
if cursor.cursorType == "Top":
result.top = cursor.value proc parseTweets*(json: string; after=""): Result[Tweet] =
elif cursor.cursorType == "Bottom": result = Result[Tweet](beginning: after.len == 0)
result.bottom = cursor.value
let raw = json.fromJson(Search)
if raw.timeline.instructions.len == 0:
return
var userCache: Table[string, User]
for e in raw.timeline.instructions[0].addEntries.entries:
let
eId = e.entryId
id = eId.getId
if eId.startsWith("tweet") or eId.startsWith("sq-I-t"):
if id in raw.globalObjects.tweets:
result.content.add processTweet(id, raw.globalObjects, userCache)
elif eId.startsWith("cursor") or eId.startsWith("sq-C"):
parseCursor(e, result)

View file

@ -0,0 +1,97 @@
import std/[strutils, options, algorithm, json]
import std/unicode except strip
import utils, slices, media, user
import ../types/tweet
from ../types/media as mediaTypes import MediaType
from ../../types import Tweet, User, TweetStats
proc expandTweetEntities(tweet: var Tweet; raw: RawTweet) =
let
orig = raw.fullText.toRunes
textRange = raw.displayTextRange
textSlice = textRange[0] .. textRange[1]
hasCard = raw.card.isSome
var replyTo = ""
if tweet.replyId > 0:
tweet.reply.add raw.inReplyToScreenName
replyTo = raw.inReplyToScreenName
var replacements = newSeq[ReplaceSlice]()
for u in raw.entities.urls:
if u.url.len == 0 or u.url notin raw.fullText:
continue
replacements.extractUrls(u, textSlice.b, hideTwitter=raw.isQuoteStatus)
# if hasCard and u.url == get(tweet.card).url:
# get(tweet.card).url = u.expandedUrl
for m in raw.entities.media:
replacements.extractUrls(m, textSlice.b, hideTwitter=true)
for hashtag in raw.entities.hashtags:
replacements.extractHashtags(hashtag.indices)
for symbol in raw.entities.symbols:
replacements.extractHashtags(symbol.indices)
for mention in raw.entities.userMentions:
let
name = mention.screenName
idx = tweet.reply.find(name)
if mention.indices.a >= textSlice.a:
replacements.add ReplaceSlice(kind: rkMention, slice: mention.indices,
url: "/" & name, display: mention.name)
if idx > -1 and name != replyTo:
tweet.reply.delete idx
elif idx == -1 and tweet.replyId != 0:
tweet.reply.add name
replacements.dedupSlices
replacements.sort(cmp)
tweet.text = orig.replacedWith(replacements, textSlice)
.strip(leading=false)
proc toTweet*(raw: RawTweet): Tweet =
result = Tweet(
id: raw.idStr.toId,
threadId: raw.conversationIdStr.toId,
replyId: raw.inReplyToStatusIdStr.toId,
time: parseTwitterDate(raw.createdAt),
hasThread: raw.selfThread.idStr.len > 0,
available: true,
user: User(id: raw.userIdStr),
stats: TweetStats(
replies: raw.replyCount,
retweets: raw.retweetCount,
likes: raw.favoriteCount,
quotes: raw.quoteCount
)
)
result.expandTweetEntities(raw)
if raw.card.isSome:
let card = raw.card.get
if "poll" in card.name:
result.poll = some parsePoll(card)
if "image" in card.name:
result.photos.add card.bindingValues{"image_large", "image_value", "url"}
.getStr.getImageUrl
# elif card.name == "amplify":
# discard
# # result.video = some(parsePromoVideo(jsCard{"binding_values"}))
# else:
# result.card = some parseCard(card, raw.entities.urls)
for m in raw.extendedEntities.media:
case m.kind
of photo: result.photos.add m.getImageUrl
of video:
result.video = some parseVideo(m)
if m.additionalMediaInfo.sourceUser.isSome:
result.attribution = some toUser get(m.additionalMediaInfo.sourceUser)
of animatedGif: result.gif = some parseGif(m)

View file

@ -1,11 +1,8 @@
import std/[options, tables, strutils, strformat, sugar] import std/[options, tables, strformat]
import jsony import jsony
import ../types/unifiedcard import utils
import ".."/types/[unifiedcard, media]
from ../../types import Card, CardKind, Video from ../../types import Card, CardKind, Video
from ../../utils import twimg, https
proc getImageUrl(entity: MediaEntity): string =
entity.mediaUrlHttps.dup(removePrefix(twimg), removePrefix(https))
proc parseDestination(id: string; card: UnifiedCard; result: var Card) = proc parseDestination(id: string; card: UnifiedCard; result: var Card) =
let destination = card.destinationObjects[id].data let destination = card.destinationObjects[id].data
@ -66,6 +63,7 @@ proc parseMedia(component: Component; card: UnifiedCard; result: var Card) =
durationMs: videoInfo.durationMillis, durationMs: videoInfo.durationMillis,
variants: videoInfo.variants variants: videoInfo.variants
) )
of animatedGif: discard
proc parseUnifiedCard*(json: string): Card = proc parseUnifiedCard*(json: string): Card =
let card = json.fromJson(UnifiedCard) let card = json.fromJson(UnifiedCard)
@ -78,7 +76,7 @@ proc parseUnifiedCard*(json: string): Card =
component.data.parseAppDetails(card, result) component.data.parseAppDetails(card, result)
of mediaWithDetailsHorizontal: of mediaWithDetailsHorizontal:
component.data.parseMediaDetails(card, result) component.data.parseMediaDetails(card, result)
of media, swipeableMedia: of ComponentType.media, swipeableMedia:
component.parseMedia(card, result) component.parseMedia(card, result)
of buttonGroup: of buttonGroup:
discard discard

View file

@ -1,12 +1,16 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
import std/[sugar, strutils, times] import std/[sugar, strutils, times]
import ../types/common import ".."/types/[common, media, tweet]
import ../../utils as uutils import ../../utils as uutils
template parseTime(time: string; f: static string; flen: int): DateTime = template parseTime(time: string; f: static string; flen: int): DateTime =
if time.len != flen: return if time.len != flen: return
parse(time, f, utc()) parse(time, f, utc())
proc toId*(id: string): int64 =
if id.len == 0: 0'i64
else: parseBiggestInt(id)
proc parseIsoDate*(date: string): DateTime = proc parseIsoDate*(date: string): DateTime =
date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20) date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
@ -16,6 +20,9 @@ proc parseTwitterDate*(date: string): DateTime =
proc getImageUrl*(url: string): string = proc getImageUrl*(url: string): string =
url.dup(removePrefix(twimg), removePrefix(https)) url.dup(removePrefix(twimg), removePrefix(https))
proc getImageUrl*(entity: MediaEntity | Entity): string =
entity.mediaUrlHttps.getImageUrl
template handleErrors*(body) = template handleErrors*(body) =
if json.startsWith("{\"errors"): if json.startsWith("{\"errors"):
for error {.inject.} in json.fromJson(Errors).errors: for error {.inject.} in json.fromJson(Errors).errors:

View file

@ -1,3 +1,4 @@
import jsony
from ../../types import Error from ../../types import Error
type type
@ -5,7 +6,7 @@ type
url*: string url*: string
expandedUrl*: string expandedUrl*: string
displayUrl*: string displayUrl*: string
indices*: array[2, int] indices*: Slice[int]
ErrorObj* = object ErrorObj* = object
code*: Error code*: Error
@ -18,3 +19,8 @@ proc contains*(codes: set[Error]; errors: Errors): bool =
for e in errors.errors: for e in errors.errors:
if e.code in codes: if e.code in codes:
return true return true
proc parseHook*(s: string; i: var int; v: var Slice[int]) =
var slice: array[2, int]
parseHook(s, i, slice)
v = slice[0] ..< slice[1]

View file

@ -0,0 +1,15 @@
import options
from ../../types import VideoType, VideoVariant
type
MediaType* = enum
photo, video, animatedGif
MediaEntity* = object
kind*: MediaType
mediaUrlHttps*: string
videoInfo*: Option[VideoInfo]
VideoInfo* = object
durationMillis*: int
variants*: seq[VideoVariant]

View file

@ -1,13 +1,14 @@
import std/tables import std/tables
import user import user, tweet
type type
Search* = object Search* = object
globalObjects*: GlobalObjects globalObjects*: GlobalObjects
timeline*: Timeline timeline*: Timeline
GlobalObjects = object GlobalObjects* = object
users*: Table[string, RawUser] users*: Table[string, RawUser]
tweets*: Table[string, RawTweet]
Timeline = object Timeline = object
instructions*: seq[Instructions] instructions*: seq[Instructions]
@ -15,9 +16,13 @@ type
Instructions = object Instructions = object
addEntries*: tuple[entries: seq[Entry]] addEntries*: tuple[entries: seq[Entry]]
Entry = object Entry* = object
entryId*: string entryId*: string
content*: tuple[operation: Operation] content*: tuple[operation: Operation]
Operation = object Operation = object
cursor*: tuple[value, cursorType: string] cursor*: tuple[value, cursorType: string]
proc renameHook*(v: var Entity; fieldName: var string) =
if fieldName == "type":
fieldName = "kind"

View file

@ -0,0 +1,85 @@
import options
import jsony
from json import JsonNode
import user, media, common
type
RawTweet* = object
createdAt*: string
idStr*: string
fullText*: string
displayTextRange*: array[2, int]
entities*: Entities
extendedEntities*: ExtendedEntities
inReplyToStatusIdStr*: string
inReplyToScreenName*: string
userIdStr*: string
isQuoteStatus*: bool
replyCount*: int
retweetCount*: int
favoriteCount*: int
quoteCount*: int
conversationIdStr*: string
favorited*: bool
retweeted*: bool
selfThread*: tuple[idStr: string]
card*: Option[Card]
quotedStatusIdStr*: string
retweetedStatusIdStr*: string
Card* = object
name*: string
url*: string
bindingValues*: JsonNode
Entities* = object
hashtags*: seq[Hashtag]
symbols*: seq[Hashtag]
userMentions*: seq[UserMention]
urls*: seq[Url]
media*: seq[Entity]
Hashtag* = object
indices*: Slice[int]
UserMention* = object
screenName*: string
name*: string
indices*: Slice[int]
ExtendedEntities* = object
media*: seq[Entity]
Entity* = object
kind*: MediaType
indices*: Slice[int]
mediaUrlHttps*: string
url*: string
expandedUrl*: string
videoInfo*: VideoInfo
ext*: Ext
extMediaAvailability*: tuple[status: string]
extAltText*: string
additionalMediaInfo*: AdditionalMediaInfo
sourceStatusIdStr*: string
sourceUserIdStr*: string
AdditionalMediaInfo* = object
sourceUser*: Option[RawUser]
title*: string
description*: string
Ext* = object
mediaStats*: JsonNode
MediaStats* = object
ok*: tuple[viewCount: string]
proc renameHook*(v: var Entity; fieldName: var string) =
if fieldName == "type":
fieldName = "kind"
proc parseHook*(s: string; i: var int; v: var Slice[int]) =
var slice: array[2, int]
parseHook(s, i, slice)
v = slice[0] ..< slice[1]

View file

@ -1,5 +1,5 @@
import options, tables import options, tables
from ../../types import VideoType, VideoVariant import media as mediaTypes
type type
UnifiedCard* = object UnifiedCard* = object
@ -38,25 +38,13 @@ type
id*: string id*: string
destination*: string destination*: string
Destination* = object
kind*: string
data*: tuple[urlData: UrlData]
UrlData* = object UrlData* = object
url*: string url*: string
vanity*: string vanity*: string
MediaType* = enum Destination* = object
photo, video kind*: string
data*: tuple[urlData: UrlData]
MediaEntity* = object
kind*: MediaType
mediaUrlHttps*: string
videoInfo*: Option[VideoInfo]
VideoInfo* = object
durationMillis*: int
variants*: seq[VideoVariant]
AppType* = enum AppType* = enum
androidApp, iPhoneApp, iPadApp androidApp, iPhoneApp, iPadApp

View file

@ -1,4 +1,5 @@
import options import options
import jsony
import common import common
type type
@ -41,3 +42,8 @@ type
Color* = object Color* = object
red*, green*, blue*: int red*, green*, blue*: int
proc parseHook*(s: string; i: var int; v: var Slice[int]) =
var slice: array[2, int]
parseHook(s, i, slice)
v = slice[0] ..< slice[1]