mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-12-22 10:35:29 +00:00
Open-sourcing Representation Manager
Representation Manager (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services.
This commit is contained in:
parent
197bf2c563
commit
43cdcf2ed6
1
representation-manager/BUILD.bazel
Normal file
1
representation-manager/BUILD.bazel
Normal file
|
@ -0,0 +1 @@
|
|||
# This prevents SQ query from grabbing //:all since it traverses up once to find a BUILD
|
4
representation-manager/README.md
Normal file
4
representation-manager/README.md
Normal file
|
@ -0,0 +1,4 @@
|
|||
# Representation Manager #
|
||||
|
||||
**Representation Manager** (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services.
|
||||
|
4
representation-manager/bin/deploy.sh
Executable file
4
representation-manager/bin/deploy.sh
Executable file
|
@ -0,0 +1,4 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
JOB=representation-manager bazel run --ui_event_filters=-info,-stdout,-stderr --noshow_progress \
|
||||
//relevance-platform/src/main/python/deploy -- "$@"
|
|
@ -0,0 +1,17 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finatra/inject/inject-thrift-client",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
|
||||
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"stitch/stitch-storehaus",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,208 @@
|
|||
package com.twitter.representation_manager
|
||||
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.representation_manager.config.ClientConfig
|
||||
import com.twitter.representation_manager.config.DisabledInMemoryCacheParams
|
||||
import com.twitter.representation_manager.config.EnabledInMemoryCacheParams
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.strato.thrift.ScroogeConvImplicits._
|
||||
|
||||
/**
|
||||
* This is the class that offers features to build readable stores for a given
|
||||
* SimClustersEmbeddingView (i.e. embeddingType and modelVersion). It applies ClientConfig
|
||||
* for a particular service and build ReadableStores which implement that config.
|
||||
*/
|
||||
class StoreBuilder(
|
||||
clientConfig: ClientConfig,
|
||||
stratoClient: StratoClient,
|
||||
memCachedClient: MemcachedClient,
|
||||
globalStats: StatsReceiver,
|
||||
) {
|
||||
private val stats =
|
||||
globalStats.scope("representation_manager_client").scope(this.getClass.getSimpleName)
|
||||
|
||||
// Column consts
|
||||
private val ColPathPrefix = "recommendations/representation_manager/"
|
||||
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
|
||||
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
|
||||
private val SimclustersTopicIdColPath = ColPathPrefix + "simClustersEmbedding.TopicId"
|
||||
private val SimclustersLocaleEntityIdColPath =
|
||||
ColPathPrefix + "simClustersEmbedding.LocaleEntityId"
|
||||
|
||||
def buildSimclustersTweetEmbeddingStore(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[Long, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTweetColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
|
||||
addCacheLayer(rawStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersUserEmbeddingStore(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[Long, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
|
||||
addCacheLayer(rawStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersTopicIdEmbeddingStore(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[TopicId, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTopicIdColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
|
||||
addCacheLayer(rawStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersLocaleEntityIdEmbeddingStore(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[LocaleEntityId, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersLocaleEntityIdColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
|
||||
addCacheLayer(rawStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersTweetEmbeddingStoreWithEmbeddingIdAsKey(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTweetColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) =>
|
||||
tweetId
|
||||
}
|
||||
|
||||
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersUserEmbeddingStoreWithEmbeddingIdAsKey(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||
userId
|
||||
}
|
||||
|
||||
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersTopicEmbeddingStoreWithEmbeddingIdAsKey(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTopicIdColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
|
||||
topicId
|
||||
}
|
||||
|
||||
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersTopicIdEmbeddingStoreWithEmbeddingIdAsKey(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTopicIdColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
|
||||
topicId
|
||||
}
|
||||
|
||||
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
def buildSimclustersLocaleEntityIdEmbeddingStoreWithEmbeddingIdAsKey(
|
||||
embeddingColumnView: SimClustersEmbeddingView
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersLocaleEntityIdColPath,
|
||||
embeddingColumnView)
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) =>
|
||||
localeEntityId
|
||||
}
|
||||
|
||||
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||
}
|
||||
|
||||
private def addCacheLayer[K](
|
||||
rawStore: ReadableStore[K, SimClustersEmbedding],
|
||||
embeddingColumnView: SimClustersEmbeddingView,
|
||||
): ReadableStore[K, SimClustersEmbedding] = {
|
||||
// Add in-memory caching based on ClientConfig
|
||||
val inMemCacheParams = clientConfig.inMemoryCacheConfig
|
||||
.getCacheSetup(embeddingColumnView.embeddingType, embeddingColumnView.modelVersion)
|
||||
|
||||
val statsPerStore = stats
|
||||
.scope(embeddingColumnView.embeddingType.name).scope(embeddingColumnView.modelVersion.name)
|
||||
|
||||
inMemCacheParams match {
|
||||
case DisabledInMemoryCacheParams =>
|
||||
ObservedReadableStore(
|
||||
store = rawStore
|
||||
)(statsPerStore)
|
||||
case EnabledInMemoryCacheParams(ttl, maxKeys, cacheName) =>
|
||||
ObservedCachedReadableStore.from[K, SimClustersEmbedding](
|
||||
rawStore,
|
||||
ttl = ttl,
|
||||
maxKeys = maxKeys,
|
||||
cacheName = cacheName,
|
||||
windowSize = 10000L
|
||||
)(statsPerStore)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finatra/inject/inject-thrift-client",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,25 @@
|
|||
package com.twitter.representation_manager.config
|
||||
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
|
||||
/*
|
||||
* This is RMS client config class.
|
||||
* We only support setting up in memory cache params for now, but we expect to enable other
|
||||
* customisations in the near future e.g. request timeout
|
||||
*
|
||||
* --------------------------------------------
|
||||
* PLEASE NOTE:
|
||||
* Having in-memory cache is not necessarily a free performance win, anyone considering it should
|
||||
* investigate rather than blindly enabling it
|
||||
* */
|
||||
class ClientConfig(inMemCacheParamsOverrides: Map[
|
||||
(EmbeddingType, ModelVersion),
|
||||
InMemoryCacheParams
|
||||
] = Map.empty) {
|
||||
// In memory cache config per embedding
|
||||
val inMemCacheParams = DefaultInMemoryCacheConfig.cacheParamsMap ++ inMemCacheParamsOverrides
|
||||
val inMemoryCacheConfig = new InMemoryCacheConfig(inMemCacheParams)
|
||||
}
|
||||
|
||||
object DefaultClientConfig extends ClientConfig
|
|
@ -0,0 +1,53 @@
|
|||
package com.twitter.representation_manager.config
|
||||
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.util.Duration
|
||||
|
||||
/*
|
||||
* --------------------------------------------
|
||||
* PLEASE NOTE:
|
||||
* Having in-memory cache is not necessarily a free performance win, anyone considering it should
|
||||
* investigate rather than blindly enabling it
|
||||
* --------------------------------------------
|
||||
* */
|
||||
|
||||
sealed trait InMemoryCacheParams
|
||||
|
||||
/*
|
||||
* This holds params that is required to set up a in-mem cache for a single embedding store
|
||||
*/
|
||||
case class EnabledInMemoryCacheParams(
|
||||
ttl: Duration,
|
||||
maxKeys: Int,
|
||||
cacheName: String)
|
||||
extends InMemoryCacheParams
|
||||
object DisabledInMemoryCacheParams extends InMemoryCacheParams
|
||||
|
||||
/*
|
||||
* This is the class for the in-memory cache config. Client could pass in their own cacheParamsMap to
|
||||
* create a new InMemoryCacheConfig instead of using the DefaultInMemoryCacheConfig object below
|
||||
* */
|
||||
class InMemoryCacheConfig(
|
||||
cacheParamsMap: Map[
|
||||
(EmbeddingType, ModelVersion),
|
||||
InMemoryCacheParams
|
||||
] = Map.empty) {
|
||||
|
||||
def getCacheSetup(
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion
|
||||
): InMemoryCacheParams = {
|
||||
// When requested embedding type doesn't exist, we return DisabledInMemoryCacheParams
|
||||
cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledInMemoryCacheParams)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Default config for the in-memory cache
|
||||
* Clients can directly import and use this one if they don't want to set up a customised config
|
||||
* */
|
||||
object DefaultInMemoryCacheConfig extends InMemoryCacheConfig {
|
||||
// set default to no in-memory caching
|
||||
val cacheParamsMap = Map.empty
|
||||
}
|
21
representation-manager/server/BUILD
Normal file
21
representation-manager/server/BUILD
Normal file
|
@ -0,0 +1,21 @@
|
|||
jvm_binary(
|
||||
name = "bin",
|
||||
basename = "representation-manager",
|
||||
main = "com.twitter.representation_manager.RepresentationManagerFedServerMain",
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finatra/inject/inject-logback/src/main/scala",
|
||||
"loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback",
|
||||
"representation-manager/server/src/main/resources",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager",
|
||||
"twitter-server/logback-classic/src/main/scala",
|
||||
],
|
||||
)
|
||||
|
||||
# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app
|
||||
jvm_app(
|
||||
name = "representation-manager-app",
|
||||
archive = "zip",
|
||||
binary = ":bin",
|
||||
)
|
7
representation-manager/server/src/main/resources/BUILD
Normal file
7
representation-manager/server/src/main/resources/BUILD
Normal file
|
@ -0,0 +1,7 @@
|
|||
resources(
|
||||
sources = [
|
||||
"*.xml",
|
||||
"config/*.yml",
|
||||
],
|
||||
tags = ["bazel-compatible"],
|
||||
)
|
|
@ -0,0 +1,219 @@
|
|||
# ---------- traffic percentage by embedding type and model version ----------
|
||||
# Decider strings are build dynamically following the rule in there
|
||||
# i.e. s"enable_${embeddingType.name}_${modelVersion.name}"
|
||||
# Hence this should be updated accordingly if usage is changed in the embedding stores
|
||||
|
||||
# Tweet embeddings
|
||||
"enable_LogFavBasedTweet_Model20m145k2020":
|
||||
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145k2020. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedTweet_Model20m145kUpdated":
|
||||
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145kUpdated. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavLongestL2EmbeddingTweet_Model20m145k2020":
|
||||
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145k2020. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavLongestL2EmbeddingTweet_Model20m145kUpdated":
|
||||
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145kUpdated. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
# Topic embeddings
|
||||
"enable_FavTfgTopic_Model20m145k2020":
|
||||
comment: "Enable the read traffic to FavTfgTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedKgoApeTopic_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedKgoApeTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
# User embeddings - KnownFor
|
||||
"enable_FavBasedProducer_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to FavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FavBasedProducer_Model20m145k2020":
|
||||
comment: "Enable the read traffic to FavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FollowBasedProducer_Model20m145k2020":
|
||||
comment: "Enable the read traffic to FollowBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_AggregatableFavBasedProducer_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_AggregatableFavBasedProducer_Model20m145k2020":
|
||||
comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_AggregatableLogFavBasedProducer_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_AggregatableLogFavBasedProducer_Model20m145k2020":
|
||||
comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
enable_RelaxedAggregatableLogFavBasedProducer_Model20m145kUpdated:
|
||||
comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
enable_RelaxedAggregatableLogFavBasedProducer_Model20m145k2020:
|
||||
comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
# User embeddings - InterestedIn
|
||||
"enable_LogFavBasedUserInterestedInFromAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FollowBasedUserInterestedInFromAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to FollowBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FavBasedUserInterestedIn_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FavBasedUserInterestedIn_Model20m145k2020":
|
||||
comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FollowBasedUserInterestedIn_Model20m145k2020":
|
||||
comment: "Enable the read traffic to FollowBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedUserInterestedIn_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FavBasedUserInterestedInFromPE_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to FavBasedUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FilteredUserInterestedIn_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FilteredUserInterestedIn_Model20m145k2020":
|
||||
comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_FilteredUserInterestedInFromPE_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to FilteredUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_UnfilteredUserInterestedIn_Model20m145kUpdated":
|
||||
comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_UnfilteredUserInterestedIn_Model20m145k2020":
|
||||
comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_UserNextInterestedIn_Model20m145k2020":
|
||||
comment: "Enable the read traffic to UserNextInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedUserInterestedAverageAddressBookFromIIAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
"enable_LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||
default_availability: 10000
|
||||
|
||||
# ---------- load shedding by caller id ----------
|
||||
# To create a new decider, add here with the same format and caller's details :
|
||||
# "representation-manager_load_shed_by_caller_id_twtr:{{role}}:{{name}}:{{environment}}:{{cluster}}"
|
||||
# All the deciders below are generated by this script:
|
||||
# ./strato/bin/fed deciders representation-manager --service-role=representation-manager --service-name=representation-manager
|
||||
# If you need to run the script and paste the output, add ONLY the prod deciders here.
|
||||
"representation-manager_load_shed_by_caller_id_all":
|
||||
comment: "Reject all traffic from caller id: all"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:pdxa":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:pdxa"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoapi:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoapi:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:atla":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:atla"
|
||||
default_availability: 0
|
||||
|
||||
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:pdxa":
|
||||
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:pdxa"
|
||||
default_availability: 0
|
||||
|
||||
# ---------- Dark Traffic Proxy ----------
|
||||
representation-manager_forward_dark_traffic:
|
||||
comment: "Defines the percentage of traffic to forward to diffy-proxy. Set to 0 to disable dark traffic forwarding"
|
||||
default_availability: 0
|
165
representation-manager/server/src/main/resources/logback.xml
Normal file
165
representation-manager/server/src/main/resources/logback.xml
Normal file
|
@ -0,0 +1,165 @@
|
|||
<configuration>
|
||||
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Service Config -->
|
||||
<!-- ===================================================== -->
|
||||
<property name="DEFAULT_SERVICE_PATTERN"
|
||||
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
|
||||
|
||||
<property name="DEFAULT_ACCESS_PATTERN"
|
||||
value="%msg"/>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Common Config -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<!-- JUL/JDK14 to Logback bridge -->
|
||||
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
|
||||
<resetJUL>true</resetJUL>
|
||||
</contextListener>
|
||||
|
||||
<!-- ====================================================================================== -->
|
||||
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
|
||||
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
|
||||
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
|
||||
<!-- ====================================================================================== -->
|
||||
|
||||
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${log.service.output}</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
|
||||
<!-- the maximum total size of all the log files -->
|
||||
<totalSizeCap>3GB</totalSizeCap>
|
||||
<!-- keep maximum 21 days' worth of history -->
|
||||
<maxHistory>21</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${log.access.output}</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
|
||||
<!-- the maximum total size of all the log files -->
|
||||
<totalSizeCap>100MB</totalSizeCap>
|
||||
<!-- keep maximum 7 days' worth of history -->
|
||||
<maxHistory>7</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!--LogLens -->
|
||||
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||
<category>${log.lens.category}</category>
|
||||
<index>${log.lens.index}</index>
|
||||
<tag>${log.lens.tag}/service</tag>
|
||||
<encoder>
|
||||
<pattern>%msg</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- LogLens Access -->
|
||||
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||
<category>${log.lens.category}</category>
|
||||
<index>${log.lens.index}</index>
|
||||
<tag>${log.lens.tag}/access</tag>
|
||||
<encoder>
|
||||
<pattern>%msg</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- Pipeline Execution Logs -->
|
||||
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>allow_listed_pipeline_executions.log</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
|
||||
<!-- the maximum total size of all the log files -->
|
||||
<totalSizeCap>100MB</totalSizeCap>
|
||||
<!-- keep maximum 7 days' worth of history -->
|
||||
<maxHistory>7</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Primary Async Appenders -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<property name="async_queue_size" value="${queue.size:-50000}"/>
|
||||
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
|
||||
|
||||
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="SERVICE"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="ACCESS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="LOGLENS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="LOGLENS-ACCESS"/>
|
||||
</appender>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Package Config -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<!-- Per-Package Config -->
|
||||
<logger name="com.twitter" level="INHERITED"/>
|
||||
<logger name="com.twitter.wilyns" level="INHERITED"/>
|
||||
<logger name="com.twitter.configbus.client.file" level="INHERITED"/>
|
||||
<logger name="com.twitter.finagle.mux" level="INHERITED"/>
|
||||
<logger name="com.twitter.finagle.serverset2" level="INHERITED"/>
|
||||
<logger name="com.twitter.logging.ScribeHandler" level="INHERITED"/>
|
||||
<logger name="com.twitter.zookeeper.client.internal" level="INHERITED"/>
|
||||
|
||||
<!-- Root Config -->
|
||||
<!-- For all logs except access logs, disable logging below log_level level by default. This can be overriden in the per-package loggers, and dynamically in the admin panel of individual instances. -->
|
||||
<root level="${log_level:-INFO}">
|
||||
<appender-ref ref="ASYNC-SERVICE"/>
|
||||
<appender-ref ref="ASYNC-LOGLENS"/>
|
||||
</root>
|
||||
|
||||
<!-- Access Logging -->
|
||||
<!-- Access logs are turned off by default -->
|
||||
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter" level="OFF" additivity="false">
|
||||
<appender-ref ref="ASYNC-ACCESS"/>
|
||||
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
|
||||
</logger>
|
||||
|
||||
</configuration>
|
|
@ -0,0 +1,13 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finatra/inject/inject-thrift-client",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user",
|
||||
"strato/src/main/scala/com/twitter/strato/fed",
|
||||
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,40 @@
|
|||
package com.twitter.representation_manager
|
||||
|
||||
import com.google.inject.Module
|
||||
import com.twitter.inject.thrift.modules.ThriftClientIdModule
|
||||
import com.twitter.representation_manager.columns.topic.LocaleEntityIdSimClustersEmbeddingCol
|
||||
import com.twitter.representation_manager.columns.topic.TopicIdSimClustersEmbeddingCol
|
||||
import com.twitter.representation_manager.columns.tweet.TweetSimClustersEmbeddingCol
|
||||
import com.twitter.representation_manager.columns.user.UserSimClustersEmbeddingCol
|
||||
import com.twitter.representation_manager.modules.CacheModule
|
||||
import com.twitter.representation_manager.modules.InterestsThriftClientModule
|
||||
import com.twitter.representation_manager.modules.LegacyRMSConfigModule
|
||||
import com.twitter.representation_manager.modules.StoreModule
|
||||
import com.twitter.representation_manager.modules.TimerModule
|
||||
import com.twitter.representation_manager.modules.UttClientModule
|
||||
import com.twitter.strato.fed._
|
||||
import com.twitter.strato.fed.server._
|
||||
|
||||
object RepresentationManagerFedServerMain extends RepresentationManagerFedServer
|
||||
|
||||
trait RepresentationManagerFedServer extends StratoFedServer {
|
||||
override def dest: String = "/s/representation-manager/representation-manager"
|
||||
override val modules: Seq[Module] =
|
||||
Seq(
|
||||
CacheModule,
|
||||
InterestsThriftClientModule,
|
||||
LegacyRMSConfigModule,
|
||||
StoreModule,
|
||||
ThriftClientIdModule,
|
||||
TimerModule,
|
||||
UttClientModule
|
||||
)
|
||||
|
||||
override def columns: Seq[Class[_ <: StratoFed.Column]] =
|
||||
Seq(
|
||||
classOf[TweetSimClustersEmbeddingCol],
|
||||
classOf[UserSimClustersEmbeddingCol],
|
||||
classOf[TopicIdSimClustersEmbeddingCol],
|
||||
classOf[LocaleEntityIdSimClustersEmbeddingCol]
|
||||
)
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"strato/src/main/scala/com/twitter/strato/fed",
|
||||
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,26 @@
|
|||
package com.twitter.representation_manager.columns
|
||||
|
||||
import com.twitter.strato.access.Access.LdapGroup
|
||||
import com.twitter.strato.config.ContactInfo
|
||||
import com.twitter.strato.config.FromColumns
|
||||
import com.twitter.strato.config.Has
|
||||
import com.twitter.strato.config.Prefix
|
||||
import com.twitter.strato.config.ServiceIdentifierPattern
|
||||
|
||||
object ColumnConfigBase {
|
||||
|
||||
/****************** Internal permissions *******************/
|
||||
val recosPermissions: Seq[com.twitter.strato.config.Policy] = Seq()
|
||||
|
||||
/****************** External permissions *******************/
|
||||
// This is used to grant limited access to members outside of RP team.
|
||||
val externalPermissions: Seq[com.twitter.strato.config.Policy] = Seq()
|
||||
|
||||
val contactInfo: ContactInfo = ContactInfo(
|
||||
description = "Please contact Relevance Platform for more details",
|
||||
contactEmail = "no-reply@twitter.com",
|
||||
ldapGroup = "ldap",
|
||||
jiraProject = "JIRA",
|
||||
links = Seq("http://go/rms-runbook")
|
||||
)
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"strato/src/main/scala/com/twitter/strato/fed",
|
||||
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,77 @@
|
|||
package com.twitter.representation_manager.columns.topic
|
||||
|
||||
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||
import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
|
||||
import com.twitter.stitch
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||
import com.twitter.strato.catalog.OpMetadata
|
||||
import com.twitter.strato.config.AnyOf
|
||||
import com.twitter.strato.config.ContactInfo
|
||||
import com.twitter.strato.config.FromColumns
|
||||
import com.twitter.strato.config.Policy
|
||||
import com.twitter.strato.config.Prefix
|
||||
import com.twitter.strato.data.Conv
|
||||
import com.twitter.strato.data.Description.PlainText
|
||||
import com.twitter.strato.data.Lifecycle
|
||||
import com.twitter.strato.fed._
|
||||
import com.twitter.strato.thrift.ScroogeConv
|
||||
import javax.inject.Inject
|
||||
|
||||
class LocaleEntityIdSimClustersEmbeddingCol @Inject() (
|
||||
embeddingStore: TopicSimClustersEmbeddingStore)
|
||||
extends StratoFed.Column(
|
||||
"recommendations/representation_manager/simClustersEmbedding.LocaleEntityId")
|
||||
with StratoFed.Fetch.Stitch {
|
||||
|
||||
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||
StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||
|
||||
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||
Set(
|
||||
Prefix("ml/featureStore/simClusters"),
|
||||
))
|
||||
|
||||
override val policy: Policy = AnyOf({
|
||||
colPermissions
|
||||
})
|
||||
|
||||
override type Key = LocaleEntityId
|
||||
override type View = SimClustersEmbeddingView
|
||||
override type Value = SimClustersEmbedding
|
||||
|
||||
override val keyConv: Conv[Key] = ScroogeConv.fromStruct[LocaleEntityId]
|
||||
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||
|
||||
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||
|
||||
override val metadata: OpMetadata = OpMetadata(
|
||||
lifecycle = Some(Lifecycle.Production),
|
||||
description = Some(
|
||||
PlainText(
|
||||
"The Topic SimClusters Embedding Endpoint in Representation Management Service with LocaleEntityId." +
|
||||
" TDD: http://go/rms-tdd"))
|
||||
)
|
||||
|
||||
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||
val embeddingId = SimClustersEmbeddingId(
|
||||
view.embeddingType,
|
||||
view.modelVersion,
|
||||
InternalId.LocaleEntityId(key)
|
||||
)
|
||||
|
||||
storeStitch(embeddingId)
|
||||
.map(embedding => found(embedding))
|
||||
.handle {
|
||||
case stitch.NotFound => missing
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
package com.twitter.representation_manager.columns.topic
|
||||
|
||||
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||
import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.stitch
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||
import com.twitter.strato.catalog.OpMetadata
|
||||
import com.twitter.strato.config.AnyOf
|
||||
import com.twitter.strato.config.ContactInfo
|
||||
import com.twitter.strato.config.FromColumns
|
||||
import com.twitter.strato.config.Policy
|
||||
import com.twitter.strato.config.Prefix
|
||||
import com.twitter.strato.data.Conv
|
||||
import com.twitter.strato.data.Description.PlainText
|
||||
import com.twitter.strato.data.Lifecycle
|
||||
import com.twitter.strato.fed._
|
||||
import com.twitter.strato.thrift.ScroogeConv
|
||||
import javax.inject.Inject
|
||||
|
||||
class TopicIdSimClustersEmbeddingCol @Inject() (embeddingStore: TopicSimClustersEmbeddingStore)
|
||||
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.TopicId")
|
||||
with StratoFed.Fetch.Stitch {
|
||||
|
||||
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||
StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||
|
||||
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||
Set(
|
||||
Prefix("ml/featureStore/simClusters"),
|
||||
))
|
||||
|
||||
override val policy: Policy = AnyOf({
|
||||
colPermissions
|
||||
})
|
||||
|
||||
override type Key = TopicId
|
||||
override type View = SimClustersEmbeddingView
|
||||
override type Value = SimClustersEmbedding
|
||||
|
||||
override val keyConv: Conv[Key] = ScroogeConv.fromStruct[TopicId]
|
||||
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||
|
||||
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||
|
||||
override val metadata: OpMetadata = OpMetadata(
|
||||
lifecycle = Some(Lifecycle.Production),
|
||||
description = Some(PlainText(
|
||||
"The Topic SimClusters Embedding Endpoint in Representation Management Service with TopicId." +
|
||||
" TDD: http://go/rms-tdd"))
|
||||
)
|
||||
|
||||
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||
val embeddingId = SimClustersEmbeddingId(
|
||||
view.embeddingType,
|
||||
view.modelVersion,
|
||||
InternalId.TopicId(key)
|
||||
)
|
||||
|
||||
storeStitch(embeddingId)
|
||||
.map(embedding => found(embedding))
|
||||
.handle {
|
||||
case stitch.NotFound => missing
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"strato/src/main/scala/com/twitter/strato/fed",
|
||||
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,73 @@
|
|||
package com.twitter.representation_manager.columns.tweet
|
||||
|
||||
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||
import com.twitter.representation_manager.store.TweetSimClustersEmbeddingStore
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.stitch
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||
import com.twitter.strato.catalog.OpMetadata
|
||||
import com.twitter.strato.config.AnyOf
|
||||
import com.twitter.strato.config.ContactInfo
|
||||
import com.twitter.strato.config.FromColumns
|
||||
import com.twitter.strato.config.Policy
|
||||
import com.twitter.strato.config.Prefix
|
||||
import com.twitter.strato.data.Conv
|
||||
import com.twitter.strato.data.Description.PlainText
|
||||
import com.twitter.strato.data.Lifecycle
|
||||
import com.twitter.strato.fed._
|
||||
import com.twitter.strato.thrift.ScroogeConv
|
||||
import javax.inject.Inject
|
||||
|
||||
class TweetSimClustersEmbeddingCol @Inject() (embeddingStore: TweetSimClustersEmbeddingStore)
|
||||
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.Tweet")
|
||||
with StratoFed.Fetch.Stitch {
|
||||
|
||||
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||
StitchOfReadableStore(embeddingStore.tweetSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||
|
||||
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||
Set(
|
||||
Prefix("ml/featureStore/simClusters"),
|
||||
))
|
||||
|
||||
override val policy: Policy = AnyOf({
|
||||
colPermissions
|
||||
})
|
||||
|
||||
override type Key = Long // TweetId
|
||||
override type View = SimClustersEmbeddingView
|
||||
override type Value = SimClustersEmbedding
|
||||
|
||||
override val keyConv: Conv[Key] = Conv.long
|
||||
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||
|
||||
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||
|
||||
override val metadata: OpMetadata = OpMetadata(
|
||||
lifecycle = Some(Lifecycle.Production),
|
||||
description = Some(
|
||||
PlainText("The Tweet SimClusters Embedding Endpoint in Representation Management Service." +
|
||||
" TDD: http://go/rms-tdd"))
|
||||
)
|
||||
|
||||
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||
val embeddingId = SimClustersEmbeddingId(
|
||||
view.embeddingType,
|
||||
view.modelVersion,
|
||||
InternalId.TweetId(key)
|
||||
)
|
||||
|
||||
storeStitch(embeddingId)
|
||||
.map(embedding => found(embedding))
|
||||
.handle {
|
||||
case stitch.NotFound => missing
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"strato/src/main/scala/com/twitter/strato/fed",
|
||||
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,73 @@
|
|||
package com.twitter.representation_manager.columns.user
|
||||
|
||||
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||
import com.twitter.representation_manager.store.UserSimClustersEmbeddingStore
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.stitch
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||
import com.twitter.strato.catalog.OpMetadata
|
||||
import com.twitter.strato.config.AnyOf
|
||||
import com.twitter.strato.config.ContactInfo
|
||||
import com.twitter.strato.config.FromColumns
|
||||
import com.twitter.strato.config.Policy
|
||||
import com.twitter.strato.config.Prefix
|
||||
import com.twitter.strato.data.Conv
|
||||
import com.twitter.strato.data.Description.PlainText
|
||||
import com.twitter.strato.data.Lifecycle
|
||||
import com.twitter.strato.fed._
|
||||
import com.twitter.strato.thrift.ScroogeConv
|
||||
import javax.inject.Inject
|
||||
|
||||
class UserSimClustersEmbeddingCol @Inject() (embeddingStore: UserSimClustersEmbeddingStore)
|
||||
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.User")
|
||||
with StratoFed.Fetch.Stitch {
|
||||
|
||||
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||
StitchOfReadableStore(embeddingStore.userSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||
|
||||
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||
Set(
|
||||
Prefix("ml/featureStore/simClusters"),
|
||||
))
|
||||
|
||||
override val policy: Policy = AnyOf({
|
||||
colPermissions
|
||||
})
|
||||
|
||||
override type Key = Long // UserId
|
||||
override type View = SimClustersEmbeddingView
|
||||
override type Value = SimClustersEmbedding
|
||||
|
||||
override val keyConv: Conv[Key] = Conv.long
|
||||
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||
|
||||
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||
|
||||
override val metadata: OpMetadata = OpMetadata(
|
||||
lifecycle = Some(Lifecycle.Production),
|
||||
description = Some(
|
||||
PlainText("The User SimClusters Embedding Endpoint in Representation Management Service." +
|
||||
" TDD: http://go/rms-tdd"))
|
||||
)
|
||||
|
||||
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||
val embeddingId = SimClustersEmbeddingId(
|
||||
view.embeddingType,
|
||||
view.modelVersion,
|
||||
InternalId.UserId(key)
|
||||
)
|
||||
|
||||
storeStitch(embeddingId)
|
||||
.map(embedding => found(embedding))
|
||||
.handle {
|
||||
case stitch.NotFound => missing
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"decider/src/main/scala",
|
||||
"finagle/finagle-memcached",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,153 @@
|
|||
package com.twitter.representation_manager.common
|
||||
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hashing.KeyHasher
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Duration
|
||||
|
||||
/*
|
||||
* NOTE - ALL the cache configs here are just placeholders, NONE of them is used anyweher in RMS yet
|
||||
* */
|
||||
sealed trait MemCacheParams
|
||||
sealed trait MemCacheConfig
|
||||
|
||||
/*
|
||||
* This holds params that is required to set up a memcache cache for a single embedding store
|
||||
* */
|
||||
case class EnabledMemCacheParams(ttl: Duration) extends MemCacheParams
|
||||
object DisabledMemCacheParams extends MemCacheParams
|
||||
|
||||
/*
|
||||
* We use this MemcacheConfig as the single source to set up the memcache for all RMS use cases
|
||||
* NO OVERRIDE FROM CLIENT
|
||||
* */
|
||||
object MemCacheConfig {
|
||||
val keyHasher: KeyHasher = KeyHasher.FNV1A_64
|
||||
val hashKeyPrefix: String = "RMS"
|
||||
val simclustersEmbeddingCacheKeyBuilder =
|
||||
SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, hashKeyPrefix)
|
||||
|
||||
val cacheParamsMap: Map[
|
||||
(EmbeddingType, ModelVersion),
|
||||
MemCacheParams
|
||||
] = Map(
|
||||
// Tweet Embeddings
|
||||
(LogFavBasedTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||
(LogFavBasedTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||
(LogFavLongestL2EmbeddingTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||
(LogFavLongestL2EmbeddingTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||
// User - KnownFor Embeddings
|
||||
(FavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FollowBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(AggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl =
|
||||
12.hours),
|
||||
(RelaxedAggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl =
|
||||
12.hours),
|
||||
// User - InterestedIn Embeddings
|
||||
(LogFavBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FollowBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FavBasedUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FollowBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(LogFavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FavBasedUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(FilteredUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(UnfilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(UnfilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(UserNextInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl =
|
||||
30.minutes), //embedding is updated every 2 hours, keeping it lower to avoid staleness
|
||||
(
|
||||
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(
|
||||
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(
|
||||
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(
|
||||
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(
|
||||
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
// Topic Embeddings
|
||||
(FavTfgTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
(LogFavBasedKgoApeTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||
)
|
||||
|
||||
def getCacheSetup(
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion
|
||||
): MemCacheParams = {
|
||||
// When requested (embeddingType, modelVersion) doesn't exist, we return DisabledMemCacheParams
|
||||
cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledMemCacheParams)
|
||||
}
|
||||
|
||||
def getCacheKeyPrefix(embeddingType: EmbeddingType, modelVersion: ModelVersion) =
|
||||
s"${embeddingType.value}_${modelVersion.value}_"
|
||||
|
||||
def getStatsName(embeddingType: EmbeddingType, modelVersion: ModelVersion) =
|
||||
s"${embeddingType.name}_${modelVersion.name}_mem_cache"
|
||||
|
||||
/**
|
||||
* Build a ReadableStore based on MemCacheConfig.
|
||||
*
|
||||
* If memcache is disabled, it will return a normal readable store wrapper of the rawStore,
|
||||
* with SimClustersEmbedding as value;
|
||||
* If memcache is enabled, it will return a ObservedMemcachedReadableStore wrapper of the rawStore,
|
||||
* with memcache set up according to the EnabledMemCacheParams
|
||||
* */
|
||||
def buildMemCacheStoreForSimClustersEmbedding(
|
||||
rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding],
|
||||
cacheClient: Client,
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion,
|
||||
stats: StatsReceiver
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val cacheParams = getCacheSetup(embeddingType, modelVersion)
|
||||
val store = cacheParams match {
|
||||
case DisabledMemCacheParams => rawStore
|
||||
case EnabledMemCacheParams(ttl) =>
|
||||
val memCacheKeyPrefix = MemCacheConfig.getCacheKeyPrefix(
|
||||
embeddingType,
|
||||
modelVersion
|
||||
)
|
||||
val statsName = MemCacheConfig.getStatsName(
|
||||
embeddingType,
|
||||
modelVersion
|
||||
)
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = rawStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = ttl
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = stats.scope(statsName),
|
||||
keyToString = { k => memCacheKeyPrefix + k.toString }
|
||||
)
|
||||
}
|
||||
store.mapValues(SimClustersEmbedding(_))
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package com.twitter.representation_manager.common
|
||||
|
||||
import com.twitter.decider.Decider
|
||||
import com.twitter.decider.RandomRecipient
|
||||
import com.twitter.decider.Recipient
|
||||
import com.twitter.simclusters_v2.common.DeciderGateBuilderWithIdHashing
|
||||
import javax.inject.Inject
|
||||
|
||||
case class RepresentationManagerDecider @Inject() (decider: Decider) {
|
||||
|
||||
val deciderGateBuilder = new DeciderGateBuilderWithIdHashing(decider)
|
||||
|
||||
def isAvailable(feature: String, recipient: Option[Recipient]): Boolean = {
|
||||
decider.isAvailable(feature, recipient)
|
||||
}
|
||||
|
||||
/**
|
||||
* When useRandomRecipient is set to false, the decider is either completely on or off.
|
||||
* When useRandomRecipient is set to true, the decider is on for the specified % of traffic.
|
||||
*/
|
||||
def isAvailable(feature: String, useRandomRecipient: Boolean = true): Boolean = {
|
||||
if (useRandomRecipient) isAvailable(feature, Some(RandomRecipient))
|
||||
else isAvailable(feature, None)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||
"src/scala/com/twitter/ml/api/embedding",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/scala/com/twitter/simclusters_v2/score",
|
||||
"src/scala/com/twitter/simclusters_v2/summingbird/stores",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
"storage/clients/manhattan/client/src/main/scala",
|
||||
"tweetypie/src/scala/com/twitter/tweetypie/util",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,846 @@
|
|||
package com.twitter.representation_manager.migration
|
||||
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore
|
||||
import com.twitter.contentrecommender.store.InterestsOptOutStore
|
||||
import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore
|
||||
import com.twitter.contentrecommender.twistly
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.decider.Decider
|
||||
import com.twitter.escherbird.util.uttclient.CacheConfigV2
|
||||
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
|
||||
import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2
|
||||
import com.twitter.escherbird.utt.strato.thriftscala.Environment
|
||||
import com.twitter.finagle.ThriftMux
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax
|
||||
import com.twitter.finagle.mux.ClientDiscardedRequestException
|
||||
import com.twitter.finagle.service.ReqRep
|
||||
import com.twitter.finagle.service.ResponseClass
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.frigate.common.util.SeqLongInjection
|
||||
import com.twitter.hashing.KeyHasher
|
||||
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.interests.thriftscala.InterestsThriftService
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.readablestore.ReadableStoreWithTimeout
|
||||
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||
import com.twitter.representation_manager.store.DeciderConstants
|
||||
import com.twitter.representation_manager.store.DeciderKey
|
||||
import com.twitter.simclusters_v2.common.ModelVersions
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder
|
||||
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||
import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore
|
||||
import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores
|
||||
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore
|
||||
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145k2020
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145kUpdated
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbedding
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Athena
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.strato.client.Strato
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.strato.thrift.ScroogeConvImplicits._
|
||||
import com.twitter.tweetypie.util.UserId
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Throw
|
||||
import com.twitter.util.Timer
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import scala.reflect.ClassTag
|
||||
|
||||
class LegacyRMS @Inject() (
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
cacheClient: Client,
|
||||
stats: StatsReceiver,
|
||||
decider: Decider,
|
||||
clientId: ClientId,
|
||||
timer: Timer,
|
||||
@Named("cacheHashKeyPrefix") val cacheHashKeyPrefix: String = "RMS",
|
||||
@Named("useContentRecommenderConfiguration") val useContentRecommenderConfiguration: Boolean =
|
||||
false) {
|
||||
|
||||
private val mhMtlsParams: ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams(
|
||||
serviceIdentifier)
|
||||
private val rmsDecider = RepresentationManagerDecider(decider)
|
||||
val keyHasher: KeyHasher = KeyHasher.FNV1A_64
|
||||
|
||||
private val embeddingCacheKeyBuilder =
|
||||
SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, cacheHashKeyPrefix)
|
||||
private val statsReceiver = stats.scope("representation_management")
|
||||
|
||||
// Strato client, default timeout = 280ms
|
||||
val stratoClient: StratoClient =
|
||||
Strato.client
|
||||
.withMutualTls(serviceIdentifier)
|
||||
.build()
|
||||
|
||||
// Builds ThriftMux client builder for Content-Recommender service
|
||||
private def makeThriftClientBuilder(
|
||||
requestTimeout: Duration
|
||||
): ThriftMux.Client = {
|
||||
ThriftMux.client
|
||||
.withClientId(clientId)
|
||||
.withMutualTls(serviceIdentifier)
|
||||
.withRequestTimeout(requestTimeout)
|
||||
.withStatsReceiver(statsReceiver.scope("clnt"))
|
||||
.withResponseClassifier {
|
||||
case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable
|
||||
}
|
||||
}
|
||||
|
||||
private def makeThriftClient[ThriftServiceType: ClassTag](
|
||||
dest: String,
|
||||
label: String,
|
||||
requestTimeout: Duration = 450.milliseconds
|
||||
): ThriftServiceType = {
|
||||
makeThriftClientBuilder(requestTimeout)
|
||||
.build[ThriftServiceType](dest, label)
|
||||
}
|
||||
|
||||
/** *** SimCluster Embedding Stores ******/
|
||||
implicit val simClustersEmbeddingIdInjection: Injection[SimClustersEmbeddingId, Array[Byte]] =
|
||||
BinaryScalaCodec(SimClustersEmbeddingId)
|
||||
implicit val simClustersEmbeddingInjection: Injection[ThriftSimClustersEmbedding, Array[Byte]] =
|
||||
BinaryScalaCodec(ThriftSimClustersEmbedding)
|
||||
implicit val simClustersMultiEmbeddingInjection: Injection[SimClustersMultiEmbedding, Array[
|
||||
Byte
|
||||
]] =
|
||||
BinaryScalaCodec(SimClustersMultiEmbedding)
|
||||
implicit val simClustersMultiEmbeddingIdInjection: Injection[SimClustersMultiEmbeddingId, Array[
|
||||
Byte
|
||||
]] =
|
||||
BinaryScalaCodec(SimClustersMultiEmbeddingId)
|
||||
|
||||
def getEmbeddingsDataset(
|
||||
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||
datasetName: String
|
||||
): ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = {
|
||||
ManhattanRO.getReadableStoreWithMtls[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("content_recommender_athena"),
|
||||
DatasetName(datasetName), // this should be correct
|
||||
Athena
|
||||
),
|
||||
mhMtlsParams
|
||||
)
|
||||
}
|
||||
|
||||
lazy val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
PersistentTweetEmbeddingStore
|
||||
.longestL2NormTweetEmbeddingStoreManhattan(
|
||||
mhMtlsParams,
|
||||
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||
statsReceiver,
|
||||
maxLength = 10,
|
||||
).mapValues(_.toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = rawStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 15.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver =
|
||||
statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_mem_cache"),
|
||||
keyToString = { k =>
|
||||
s"scez_l2:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k"
|
||||
}
|
||||
)
|
||||
|
||||
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||
memcachedStore
|
||||
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(
|
||||
LogFavLongestL2EmbeddingTweet,
|
||||
Model20m145k2020,
|
||||
InternalId.TweetId(tweetId)) =>
|
||||
tweetId
|
||||
}
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
inMemoryCacheStore,
|
||||
ttl = 12.minute,
|
||||
maxKeys = 1048575,
|
||||
cacheName = "log_fav_based_longest_l2_tweet_embedding_20m145k2020_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_store"))
|
||||
}
|
||||
|
||||
lazy val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
PersistentTweetEmbeddingStore
|
||||
.mostRecentTweetEmbeddingStoreManhattan(
|
||||
mhMtlsParams,
|
||||
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
|
||||
statsReceiver
|
||||
).mapValues(_.toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = rawStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 10.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_mem_cache"),
|
||||
keyToString = { k =>
|
||||
// SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId
|
||||
s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145KUpdated}_$k"
|
||||
}
|
||||
)
|
||||
|
||||
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
memcachedStore
|
||||
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(
|
||||
LogFavBasedTweet,
|
||||
Model20m145kUpdated,
|
||||
InternalId.TweetId(tweetId)) =>
|
||||
tweetId
|
||||
}
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
}
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
inMemoryCacheStore,
|
||||
ttl = 5.minute,
|
||||
maxKeys = 1048575, // 200MB
|
||||
cacheName = "log_fav_based_tweet_embedding_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("log_fav_based_tweet_embedding_store"))
|
||||
}
|
||||
|
||||
lazy val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
PersistentTweetEmbeddingStore
|
||||
.mostRecentTweetEmbeddingStoreManhattan(
|
||||
mhMtlsParams,
|
||||
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||
statsReceiver,
|
||||
maxLength = 10,
|
||||
).mapValues(_.toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = rawStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 15.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_mem_cache"),
|
||||
keyToString = { k =>
|
||||
// SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId
|
||||
s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k"
|
||||
}
|
||||
)
|
||||
|
||||
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||
memcachedStore
|
||||
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(
|
||||
LogFavBasedTweet,
|
||||
Model20m145k2020,
|
||||
InternalId.TweetId(tweetId)) =>
|
||||
tweetId
|
||||
}
|
||||
.mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
inMemoryCacheStore,
|
||||
ttl = 12.minute,
|
||||
maxKeys = 16777215,
|
||||
cacheName = "log_fav_based_tweet_embedding_20m145k2020_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_store"))
|
||||
}
|
||||
|
||||
lazy val favBasedTfgTopicEmbedding2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val stratoStore =
|
||||
StratoFetchableStore
|
||||
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
"recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020")
|
||||
|
||||
val truncatedStore = stratoStore.mapValues { embedding =>
|
||||
SimClustersEmbedding(embedding, truncate = 50)
|
||||
}
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
ObservedReadableStore(truncatedStore)(
|
||||
statsReceiver.scope("fav_tfg_topic_embedding_2020_cache_backing_store")),
|
||||
ttl = 12.hours,
|
||||
maxKeys = 262143, // 200MB
|
||||
cacheName = "fav_tfg_topic_embedding_2020_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("fav_tfg_topic_embedding_2020_cache"))
|
||||
}
|
||||
|
||||
lazy val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
|
||||
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(
|
||||
AggregatableLogFavBasedProducer,
|
||||
Model20m145k2020,
|
||||
internalId) =>
|
||||
SimClustersEmbeddingId(AggregatableLogFavBasedProducer, Model20m145k2020, internalId)
|
||||
}
|
||||
.mapValues(embedding => SimClustersEmbedding(embedding, 50))
|
||||
)(statsReceiver.scope("aggregatable_producer_embeddings_by_logfav_score_2020"))
|
||||
}
|
||||
|
||||
val interestService: InterestsThriftService.MethodPerEndpoint =
|
||||
makeThriftClient[InterestsThriftService.MethodPerEndpoint](
|
||||
"/s/interests-thrift-service/interests-thrift-service",
|
||||
"interests_thrift_service"
|
||||
)
|
||||
|
||||
val interestsOptOutStore: InterestsOptOutStore = InterestsOptOutStore(interestService)
|
||||
|
||||
// Save 2 ^ 18 UTTs. Promising 100% cache rate
|
||||
lazy val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143)
|
||||
lazy val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2(
|
||||
getTaxonomyConfig = defaultCacheConfigV2,
|
||||
getUttTaxonomyConfig = defaultCacheConfigV2,
|
||||
getLeafIds = defaultCacheConfigV2,
|
||||
getLeafUttEntities = defaultCacheConfigV2
|
||||
)
|
||||
|
||||
// CachedUttClient to use StratoClient
|
||||
lazy val cachedUttClientV2: CachedUttClientV2 = new CachedUttClientV2(
|
||||
stratoClient = stratoClient,
|
||||
env = Environment.Prod,
|
||||
cacheConfigs = uttClientCacheConfigsV2,
|
||||
statsReceiver = statsReceiver.scope("cached_utt_client")
|
||||
)
|
||||
|
||||
lazy val semanticCoreTopicSeedStore: ReadableStore[
|
||||
SemanticCoreTopicSeedStore.Key,
|
||||
Seq[UserId]
|
||||
] = {
|
||||
/*
|
||||
Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case)
|
||||
Assume ~10k active topic/languages ~= 650MB (worst case)
|
||||
*/
|
||||
val underlying = new SemanticCoreTopicSeedStore(cachedUttClientV2, interestsOptOutStore)(
|
||||
statsReceiver.scope("semantic_core_topic_seed_store"))
|
||||
|
||||
val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlying,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = statsReceiver.scope("topic_producer_seed_store_mem_cache"),
|
||||
keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]](
|
||||
store = memcacheStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 20e3.toInt,
|
||||
cacheName = "topic_producer_seed_store_cache",
|
||||
windowSize = 5000
|
||||
)(statsReceiver.scope("topic_producer_seed_store_cache"))
|
||||
}
|
||||
|
||||
lazy val logFavBasedApeEntity20M145K2020EmbeddingStore: ApeEntityEmbeddingStore = {
|
||||
val apeStore = logFavBasedApe20M145K2020EmbeddingStore.composeKeyMapping[UserId]({ id =>
|
||||
SimClustersEmbeddingId(
|
||||
AggregatableLogFavBasedProducer,
|
||||
Model20m145k2020,
|
||||
InternalId.UserId(id))
|
||||
})
|
||||
|
||||
new ApeEntityEmbeddingStore(
|
||||
semanticCoreSeedStore = semanticCoreTopicSeedStore,
|
||||
aggregatableProducerEmbeddingStore = apeStore,
|
||||
statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_store"))
|
||||
}
|
||||
|
||||
lazy val logFavBasedApeEntity20M145K2020EmbeddingCachedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val truncatedStore =
|
||||
logFavBasedApeEntity20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = truncatedStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_mem_cache"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
val inMemoryCachedStore =
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 262143,
|
||||
cacheName = "log_fav_based_ape_entity_2020_embedding_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_cached_store"))
|
||||
|
||||
DeciderableReadableStore(
|
||||
inMemoryCachedStore,
|
||||
rmsDecider.deciderGateBuilder.idGateWithHashing[SimClustersEmbeddingId](
|
||||
DeciderKey.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore),
|
||||
statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_deciderable_store")
|
||||
)
|
||||
}
|
||||
|
||||
lazy val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
"recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020")
|
||||
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145k2020,
|
||||
internalId) =>
|
||||
SimClustersEmbeddingId(
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145k2020,
|
||||
internalId)
|
||||
}
|
||||
.mapValues(embedding => SimClustersEmbedding(embedding).truncate(50))
|
||||
)(statsReceiver.scope(
|
||||
"aggregatable_producer_embeddings_by_logfav_score_relaxed_fav_engagement_threshold_2020"))
|
||||
}
|
||||
|
||||
lazy val relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val truncatedStore =
|
||||
relaxedLogFavBasedApe20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = truncatedStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver =
|
||||
statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_mem_cache"),
|
||||
keyToString = { k: SimClustersEmbeddingId => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 262143,
|
||||
cacheName = "relaxed_log_fav_based_ape_entity_2020_embedding_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_cache_store"))
|
||||
}
|
||||
|
||||
lazy val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val underlyingStore = ProducerClusterEmbeddingReadableStores
|
||||
.getProducerTopKSimClusters2020EmbeddingsStore(
|
||||
mhMtlsParams
|
||||
).composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(
|
||||
FavBasedProducer,
|
||||
Model20m145k2020,
|
||||
InternalId.UserId(userId)) =>
|
||||
userId
|
||||
}.mapValues { topSimClustersWithScore =>
|
||||
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters.take(10))
|
||||
}
|
||||
|
||||
// same memcache config as for favBasedUserInterestedIn20M145K2020Store
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 24.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_mem_cache"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 12.hours,
|
||||
maxKeys = 16777215,
|
||||
cacheName = "fav_based_producer_embedding_20M_145K_2020_embedding_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_embedding_store"))
|
||||
}
|
||||
|
||||
// Production
|
||||
lazy val interestedIn20M145KUpdatedStore: ReadableStore[UserId, ClustersUserIsInterestedIn] = {
|
||||
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
modelVersion = ModelVersions.Model20M145KUpdated
|
||||
)
|
||||
}
|
||||
|
||||
// Production
|
||||
lazy val interestedIn20M145K2020Store: ReadableStore[UserId, ClustersUserIsInterestedIn] = {
|
||||
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
modelVersion = ModelVersions.Model20M145K2020
|
||||
)
|
||||
}
|
||||
|
||||
// Production
|
||||
lazy val InterestedInFromPE20M145KUpdatedStore: ReadableStore[
|
||||
UserId,
|
||||
ClustersUserIsInterestedIn
|
||||
] = {
|
||||
UserInterestedInReadableStore.defaultIIPEStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
modelVersion = ModelVersions.Model20M145KUpdated)
|
||||
}
|
||||
|
||||
lazy val simClustersInterestedInStore: ReadableStore[
|
||||
(UserId, ModelVersion),
|
||||
ClustersUserIsInterestedIn
|
||||
] = {
|
||||
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||
k match {
|
||||
case (userId, Model20m145kUpdated) =>
|
||||
interestedIn20M145KUpdatedStore.get(userId)
|
||||
case (userId, Model20m145k2020) =>
|
||||
interestedIn20M145K2020Store.get(userId)
|
||||
case _ =>
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lazy val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[
|
||||
(UserId, ModelVersion),
|
||||
ClustersUserIsInterestedIn
|
||||
] = {
|
||||
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||
k match {
|
||||
case (userId, ModelVersion.Model20m145kUpdated) =>
|
||||
InterestedInFromPE20M145KUpdatedStore.get(userId)
|
||||
case _ =>
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lazy val userInterestedInStore =
|
||||
new twistly.interestedin.EmbeddingStore(
|
||||
interestedInStore = simClustersInterestedInStore,
|
||||
interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore,
|
||||
statsReceiver = statsReceiver
|
||||
)
|
||||
|
||||
// Production
|
||||
lazy val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val underlyingStore =
|
||||
UserInterestedInReadableStore
|
||||
.defaultSimClustersEmbeddingStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
EmbeddingType.FavBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145kUpdated)
|
||||
.mapValues(_.toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_mem_cache"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 262143,
|
||||
cacheName = "fav_based_user_interested_in_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("fav_based_user_interested_in_store"))
|
||||
}
|
||||
|
||||
// Production
|
||||
lazy val LogFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val underlyingStore =
|
||||
UserInterestedInReadableStore
|
||||
.defaultIIAPESimClustersEmbeddingStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
EmbeddingType.LogFavBasedUserInterestedInFromAPE,
|
||||
ModelVersion.Model20m145k2020)
|
||||
.mapValues(_.toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_from_ape_mem_cache"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 262143,
|
||||
cacheName = "log_fav_based_user_interested_in_from_ape_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("log_fav_based_user_interested_in_from_ape_store"))
|
||||
}
|
||||
|
||||
// Production
|
||||
lazy val FollowBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val underlyingStore =
|
||||
UserInterestedInReadableStore
|
||||
.defaultIIAPESimClustersEmbeddingStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
EmbeddingType.FollowBasedUserInterestedInFromAPE,
|
||||
ModelVersion.Model20m145k2020)
|
||||
.mapValues(_.toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("follow_based_user_interested_in_from_ape_mem_cache"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 262143,
|
||||
cacheName = "follow_based_user_interested_in_from_ape_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("follow_based_user_interested_in_from_ape_store"))
|
||||
}
|
||||
|
||||
// production
|
||||
lazy val favBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val underlyingStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] =
|
||||
UserInterestedInReadableStore
|
||||
.defaultSimClustersEmbeddingStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
EmbeddingType.FavBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020).mapValues(_.toThrift)
|
||||
|
||||
ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_2020_mem_cache"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
}
|
||||
|
||||
// Production
|
||||
lazy val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val underlyingStore =
|
||||
UserInterestedInReadableStore
|
||||
.defaultSimClustersEmbeddingStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
EmbeddingType.LogFavBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore.mapValues(_.toThrift),
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_2020_store"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 262143,
|
||||
cacheName = "log_fav_based_user_interested_in_2020_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("log_fav_based_user_interested_in_2020_store"))
|
||||
}
|
||||
|
||||
// Production
|
||||
lazy val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val underlyingStore =
|
||||
UserInterestedInReadableStore
|
||||
.defaultIIPESimClustersEmbeddingStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
EmbeddingType.FavBasedUserInterestedInFromPE,
|
||||
ModelVersion.Model20m145kUpdated)
|
||||
.mapValues(_.toThrift)
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_from_pe_mem_cache"),
|
||||
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||
).mapValues(SimClustersEmbedding(_))
|
||||
|
||||
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||
memcachedStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 262143,
|
||||
cacheName = "fav_based_user_interested_in_from_pe_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("fav_based_user_interested_in_from_pe_cache"))
|
||||
}
|
||||
|
||||
private val underlyingStores: Map[
|
||||
(EmbeddingType, ModelVersion),
|
||||
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||
] = Map(
|
||||
// Tweet Embeddings
|
||||
(LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore,
|
||||
(LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore,
|
||||
(
|
||||
LogFavLongestL2EmbeddingTweet,
|
||||
Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore,
|
||||
// Entity Embeddings
|
||||
(FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding2020Store,
|
||||
(
|
||||
LogFavBasedKgoApeTopic,
|
||||
Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingCachedStore,
|
||||
// KnownFor Embeddings
|
||||
(FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore,
|
||||
(
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore,
|
||||
// InterestedIn Embeddings
|
||||
(
|
||||
LogFavBasedUserInterestedInFromAPE,
|
||||
Model20m145k2020) -> LogFavBasedInterestedInFromAPE20M145K2020Store,
|
||||
(
|
||||
FollowBasedUserInterestedInFromAPE,
|
||||
Model20m145k2020) -> FollowBasedInterestedInFromAPE20M145K2020Store,
|
||||
(FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore,
|
||||
(FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store,
|
||||
(LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store,
|
||||
(
|
||||
FavBasedUserInterestedInFromPE,
|
||||
Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore,
|
||||
(FilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore,
|
||||
(FilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore,
|
||||
(FilteredUserInterestedInFromPE, Model20m145kUpdated) -> userInterestedInStore,
|
||||
(UnfilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore,
|
||||
(UnfilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore,
|
||||
)
|
||||
|
||||
val simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val underlying: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||
SimClustersEmbeddingStore.buildWithDecider(
|
||||
underlyingStores = underlyingStores,
|
||||
decider = rmsDecider.decider,
|
||||
statsReceiver = statsReceiver.scope("simClusters_embeddings_store_deciderable")
|
||||
)
|
||||
|
||||
val underlyingWithTimeout: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||
new ReadableStoreWithTimeout(
|
||||
rs = underlying,
|
||||
decider = rmsDecider.decider,
|
||||
enableTimeoutDeciderKey = DeciderConstants.enableSimClustersEmbeddingStoreTimeouts,
|
||||
timeoutValueKey = DeciderConstants.simClustersEmbeddingStoreTimeoutValueMillis,
|
||||
timer = timer,
|
||||
statsReceiver = statsReceiver.scope("simClusters_embedding_store_timeouts")
|
||||
)
|
||||
|
||||
ObservedReadableStore(
|
||||
store = underlyingWithTimeout
|
||||
)(statsReceiver.scope("simClusters_embeddings_store"))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||
"finagle/finagle-stats",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||
"interests-service/thrift/src/main/thrift:thrift-scala",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||
"servo/util",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,34 @@
|
|||
package com.twitter.representation_manager.modules
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.storehaus_internal.memcache.MemcacheStore
|
||||
import com.twitter.storehaus_internal.util.ClientName
|
||||
import com.twitter.storehaus_internal.util.ZkEndPoint
|
||||
|
||||
object CacheModule extends TwitterModule {
|
||||
|
||||
private val cacheDest = flag[String]("cache_module.dest", "Path to memcache service")
|
||||
private val timeout = flag[Int]("memcache.timeout", "Memcache client timeout")
|
||||
private val retries = flag[Int]("memcache.retries", "Memcache timeout retries")
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesCache(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
stats: StatsReceiver
|
||||
): Client =
|
||||
MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache_representation_manager"),
|
||||
dest = ZkEndPoint(cacheDest()),
|
||||
timeout = timeout().milliseconds,
|
||||
retries = retries(),
|
||||
statsReceiver = stats.scope("cache_client"),
|
||||
serviceIdentifier = serviceIdentifier
|
||||
)
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
package com.twitter.representation_manager.modules
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.ThriftMux
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax
|
||||
import com.twitter.finagle.mux.ClientDiscardedRequestException
|
||||
import com.twitter.finagle.service.ReqRep
|
||||
import com.twitter.finagle.service.ResponseClass
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.interests.thriftscala.InterestsThriftService
|
||||
import com.twitter.util.Throw
|
||||
import javax.inject.Singleton
|
||||
|
||||
object InterestsThriftClientModule extends TwitterModule {
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesInterestsThriftClient(
|
||||
clientId: ClientId,
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver
|
||||
): InterestsThriftService.MethodPerEndpoint = {
|
||||
ThriftMux.client
|
||||
.withClientId(clientId)
|
||||
.withMutualTls(serviceIdentifier)
|
||||
.withRequestTimeout(450.milliseconds)
|
||||
.withStatsReceiver(statsReceiver.scope("InterestsThriftClient"))
|
||||
.withResponseClassifier {
|
||||
case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable
|
||||
}
|
||||
.build[InterestsThriftService.MethodPerEndpoint](
|
||||
dest = "/s/interests-thrift-service/interests-thrift-service",
|
||||
label = "interests_thrift_service"
|
||||
)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
package com.twitter.representation_manager.modules
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.inject.TwitterModule
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object LegacyRMSConfigModule extends TwitterModule {
|
||||
@Singleton
|
||||
@Provides
|
||||
@Named("cacheHashKeyPrefix")
|
||||
def providesCacheHashKeyPrefix: String = "RMS"
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
@Named("useContentRecommenderConfiguration")
|
||||
def providesUseContentRecommenderConfiguration: Boolean = false
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package com.twitter.representation_manager.modules
|
||||
|
||||
import com.google.inject.Provides
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.decider.Decider
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
|
||||
object StoreModule extends TwitterModule {
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesMhMtlsParams(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesRmsDecider(
|
||||
decider: Decider
|
||||
): RepresentationManagerDecider = RepresentationManagerDecider(decider)
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
package com.twitter.representation_manager.modules
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.finagle.util.DefaultTimer
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.util.Timer
|
||||
import javax.inject.Singleton
|
||||
|
||||
object TimerModule extends TwitterModule {
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesTimer: Timer = DefaultTimer
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package com.twitter.representation_manager.modules
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.escherbird.util.uttclient.CacheConfigV2
|
||||
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
|
||||
import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2
|
||||
import com.twitter.escherbird.utt.strato.thriftscala.Environment
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import javax.inject.Singleton
|
||||
|
||||
object UttClientModule extends TwitterModule {
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesUttClient(
|
||||
stratoClient: StratoClient,
|
||||
statsReceiver: StatsReceiver
|
||||
): CachedUttClientV2 = {
|
||||
// Save 2 ^ 18 UTTs. Promising 100% cache rate
|
||||
val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143)
|
||||
|
||||
val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2(
|
||||
getTaxonomyConfig = defaultCacheConfigV2,
|
||||
getUttTaxonomyConfig = defaultCacheConfigV2,
|
||||
getLeafIds = defaultCacheConfigV2,
|
||||
getLeafUttEntities = defaultCacheConfigV2
|
||||
)
|
||||
|
||||
// CachedUttClient to use StratoClient
|
||||
new CachedUttClientV2(
|
||||
stratoClient = stratoClient,
|
||||
env = Environment.Prod,
|
||||
cacheConfigs = uttClientCacheConfigsV2,
|
||||
statsReceiver = statsReceiver.scope("cached_utt_client")
|
||||
)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||
"src/scala/com/twitter/simclusters_v2/stores",
|
||||
"src/scala/com/twitter/simclusters_v2/summingbird/stores",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"storage/clients/manhattan/client/src/main/scala",
|
||||
"tweetypie/src/scala/com/twitter/tweetypie/util",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,39 @@
|
|||
package com.twitter.representation_manager.store
|
||||
|
||||
import com.twitter.servo.decider.DeciderKeyEnum
|
||||
|
||||
object DeciderConstants {
|
||||
// Deciders inherited from CR and RSX and only used in LegacyRMS
|
||||
// Their value are manipulated by CR and RSX's yml file and their decider dashboard
|
||||
// We will remove them after migration completed
|
||||
val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore =
|
||||
"enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore"
|
||||
|
||||
val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore =
|
||||
"enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore"
|
||||
|
||||
val enablelogFavBased20M145K2020TweetEmbeddingStoreTimeouts =
|
||||
"enable_log_fav_based_tweet_embedding_20m145k2020_timeouts"
|
||||
val logFavBased20M145K2020TweetEmbeddingStoreTimeoutValueMillis =
|
||||
"log_fav_based_tweet_embedding_20m145k2020_timeout_value_millis"
|
||||
|
||||
val enablelogFavBased20M145KUpdatedTweetEmbeddingStoreTimeouts =
|
||||
"enable_log_fav_based_tweet_embedding_20m145kUpdated_timeouts"
|
||||
val logFavBased20M145KUpdatedTweetEmbeddingStoreTimeoutValueMillis =
|
||||
"log_fav_based_tweet_embedding_20m145kUpdated_timeout_value_millis"
|
||||
|
||||
val enableSimClustersEmbeddingStoreTimeouts = "enable_sim_clusters_embedding_store_timeouts"
|
||||
val simClustersEmbeddingStoreTimeoutValueMillis =
|
||||
"sim_clusters_embedding_store_timeout_value_millis"
|
||||
}
|
||||
|
||||
// Necessary for using servo Gates
|
||||
object DeciderKey extends DeciderKeyEnum {
|
||||
val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore: Value = Value(
|
||||
DeciderConstants.enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore
|
||||
)
|
||||
|
||||
val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore: Value = Value(
|
||||
DeciderConstants.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore
|
||||
)
|
||||
}
|
|
@ -0,0 +1,198 @@
|
|||
package com.twitter.representation_manager.store
|
||||
|
||||
import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore
|
||||
import com.twitter.contentrecommender.store.InterestsOptOutStore
|
||||
import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.frigate.common.util.SeqLongInjection
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.interests.thriftscala.InterestsThriftService
|
||||
import com.twitter.representation_manager.common.MemCacheConfig
|
||||
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.tweetypie.util.UserId
|
||||
import javax.inject.Inject
|
||||
|
||||
class TopicSimClustersEmbeddingStore @Inject() (
|
||||
stratoClient: StratoClient,
|
||||
cacheClient: Client,
|
||||
globalStats: StatsReceiver,
|
||||
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||
rmsDecider: RepresentationManagerDecider,
|
||||
interestService: InterestsThriftService.MethodPerEndpoint,
|
||||
uttClient: CachedUttClientV2) {
|
||||
|
||||
private val stats = globalStats.scope(this.getClass.getSimpleName)
|
||||
private val interestsOptOutStore = InterestsOptOutStore(interestService)
|
||||
|
||||
/**
|
||||
* Note this is NOT an embedding store. It is a list of author account ids we use to represent
|
||||
* topics
|
||||
*/
|
||||
private val semanticCoreTopicSeedStore: ReadableStore[
|
||||
SemanticCoreTopicSeedStore.Key,
|
||||
Seq[UserId]
|
||||
] = {
|
||||
/*
|
||||
Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case)
|
||||
Assume ~10k active topic/languages ~= 650MB (worst case)
|
||||
*/
|
||||
val underlying = new SemanticCoreTopicSeedStore(uttClient, interestsOptOutStore)(
|
||||
stats.scope("semantic_core_topic_seed_store"))
|
||||
|
||||
val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlying,
|
||||
cacheClient = cacheClient,
|
||||
ttl = 12.hours)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = stats.scope("topic_producer_seed_store_mem_cache"),
|
||||
keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]](
|
||||
store = memcacheStore,
|
||||
ttl = 6.hours,
|
||||
maxKeys = 20e3.toInt,
|
||||
cacheName = "topic_producer_seed_store_cache",
|
||||
windowSize = 5000
|
||||
)(stats.scope("topic_producer_seed_store_cache"))
|
||||
}
|
||||
|
||||
private val favBasedTfgTopicEmbedding20m145k2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
StratoFetchableStore
|
||||
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
"recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020").mapValues(
|
||||
embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
|
||||
.composeKeyMapping[LocaleEntityId] { localeEntityId =>
|
||||
SimClustersEmbeddingId(
|
||||
FavTfgTopic,
|
||||
Model20m145k2020,
|
||||
InternalId.LocaleEntityId(localeEntityId))
|
||||
}
|
||||
|
||||
buildLocaleEntityIdMemCacheStore(rawStore, FavTfgTopic, Model20m145k2020)
|
||||
}
|
||||
|
||||
private val logFavBasedApeEntity20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val apeStore = StratoFetchableStore
|
||||
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
|
||||
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50))
|
||||
.composeKeyMapping[UserId]({ id =>
|
||||
SimClustersEmbeddingId(
|
||||
AggregatableLogFavBasedProducer,
|
||||
Model20m145k2020,
|
||||
InternalId.UserId(id))
|
||||
})
|
||||
val rawStore = new ApeEntityEmbeddingStore(
|
||||
semanticCoreSeedStore = semanticCoreTopicSeedStore,
|
||||
aggregatableProducerEmbeddingStore = apeStore,
|
||||
statsReceiver = stats.scope("log_fav_based_ape_entity_2020_embedding_store"))
|
||||
.mapValues(embedding => SimClustersEmbedding(embedding.toThrift, truncate = 50).toThrift)
|
||||
.composeKeyMapping[TopicId] { topicId =>
|
||||
SimClustersEmbeddingId(
|
||||
LogFavBasedKgoApeTopic,
|
||||
Model20m145k2020,
|
||||
InternalId.TopicId(topicId))
|
||||
}
|
||||
|
||||
buildTopicIdMemCacheStore(rawStore, LogFavBasedKgoApeTopic, Model20m145k2020)
|
||||
}
|
||||
|
||||
private def buildTopicIdMemCacheStore(
|
||||
rawStore: ReadableStore[TopicId, ThriftSimClustersEmbedding],
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val observedStore: ObservedReadableStore[TopicId, ThriftSimClustersEmbedding] =
|
||||
ObservedReadableStore(
|
||||
store = rawStore
|
||||
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||
|
||||
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
|
||||
topicId
|
||||
}
|
||||
|
||||
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||
storeWithKeyMapping,
|
||||
cacheClient,
|
||||
embeddingType,
|
||||
modelVersion,
|
||||
stats
|
||||
)
|
||||
}
|
||||
|
||||
private def buildLocaleEntityIdMemCacheStore(
|
||||
rawStore: ReadableStore[LocaleEntityId, ThriftSimClustersEmbedding],
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val observedStore: ObservedReadableStore[LocaleEntityId, ThriftSimClustersEmbedding] =
|
||||
ObservedReadableStore(
|
||||
store = rawStore
|
||||
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||
|
||||
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) =>
|
||||
localeEntityId
|
||||
}
|
||||
|
||||
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||
storeWithKeyMapping,
|
||||
cacheClient,
|
||||
embeddingType,
|
||||
modelVersion,
|
||||
stats
|
||||
)
|
||||
}
|
||||
|
||||
private val underlyingStores: Map[
|
||||
(EmbeddingType, ModelVersion),
|
||||
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||
] = Map(
|
||||
// Topic Embeddings
|
||||
(FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding20m145k2020Store,
|
||||
(LogFavBasedKgoApeTopic, Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingStore,
|
||||
)
|
||||
|
||||
val topicSimClustersEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
SimClustersEmbeddingStore.buildWithDecider(
|
||||
underlyingStores = underlyingStores,
|
||||
decider = rmsDecider.decider,
|
||||
statsReceiver = stats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,141 @@
|
|||
package com.twitter.representation_manager.store
|
||||
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.representation_manager.common.MemCacheConfig
|
||||
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||
import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Inject
|
||||
|
||||
class TweetSimClustersEmbeddingStore @Inject() (
|
||||
cacheClient: Client,
|
||||
globalStats: StatsReceiver,
|
||||
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||
rmsDecider: RepresentationManagerDecider) {
|
||||
|
||||
private val stats = globalStats.scope(this.getClass.getSimpleName)
|
||||
|
||||
val logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
PersistentTweetEmbeddingStore
|
||||
.longestL2NormTweetEmbeddingStoreManhattan(
|
||||
mhMtlsParams,
|
||||
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
|
||||
stats
|
||||
).mapValues(_.toThrift)
|
||||
|
||||
buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145kUpdated)
|
||||
}
|
||||
|
||||
val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
PersistentTweetEmbeddingStore
|
||||
.longestL2NormTweetEmbeddingStoreManhattan(
|
||||
mhMtlsParams,
|
||||
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||
stats
|
||||
).mapValues(_.toThrift)
|
||||
|
||||
buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145k2020)
|
||||
}
|
||||
|
||||
val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
PersistentTweetEmbeddingStore
|
||||
.mostRecentTweetEmbeddingStoreManhattan(
|
||||
mhMtlsParams,
|
||||
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
|
||||
stats
|
||||
).mapValues(_.toThrift)
|
||||
|
||||
buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145kUpdated)
|
||||
}
|
||||
|
||||
val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
PersistentTweetEmbeddingStore
|
||||
.mostRecentTweetEmbeddingStoreManhattan(
|
||||
mhMtlsParams,
|
||||
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||
stats
|
||||
).mapValues(_.toThrift)
|
||||
|
||||
buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145k2020)
|
||||
}
|
||||
|
||||
private def buildMemCacheStore(
|
||||
rawStore: ReadableStore[TweetId, ThriftSimClustersEmbedding],
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val observedStore: ObservedReadableStore[TweetId, ThriftSimClustersEmbedding] =
|
||||
ObservedReadableStore(
|
||||
store = rawStore
|
||||
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||
|
||||
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) =>
|
||||
tweetId
|
||||
}
|
||||
|
||||
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||
storeWithKeyMapping,
|
||||
cacheClient,
|
||||
embeddingType,
|
||||
modelVersion,
|
||||
stats
|
||||
)
|
||||
}
|
||||
|
||||
private val underlyingStores: Map[
|
||||
(EmbeddingType, ModelVersion),
|
||||
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||
] = Map(
|
||||
// Tweet Embeddings
|
||||
(LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore,
|
||||
(LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore,
|
||||
(
|
||||
LogFavLongestL2EmbeddingTweet,
|
||||
Model20m145kUpdated) -> logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore,
|
||||
(
|
||||
LogFavLongestL2EmbeddingTweet,
|
||||
Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore,
|
||||
)
|
||||
|
||||
val tweetSimClustersEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
SimClustersEmbeddingStore.buildWithDecider(
|
||||
underlyingStores = underlyingStores,
|
||||
decider = rmsDecider.decider,
|
||||
statsReceiver = stats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,602 @@
|
|||
package com.twitter.representation_manager.store
|
||||
|
||||
import com.twitter.contentrecommender.twistly
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.representation_manager.common.MemCacheConfig
|
||||
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||
import com.twitter.simclusters_v2.common.ModelVersions
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||
import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores
|
||||
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore
|
||||
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.getStore
|
||||
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.modelVersionToDatasetMap
|
||||
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.knownModelVersions
|
||||
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.toSimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanCluster
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.strato.thrift.ScroogeConvImplicits._
|
||||
import com.twitter.tweetypie.util.UserId
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
|
||||
class UserSimClustersEmbeddingStore @Inject() (
|
||||
stratoClient: StratoClient,
|
||||
cacheClient: Client,
|
||||
globalStats: StatsReceiver,
|
||||
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||
rmsDecider: RepresentationManagerDecider) {
|
||||
|
||||
private val stats = globalStats.scope(this.getClass.getSimpleName)
|
||||
|
||||
private val favBasedProducer20M145KUpdatedEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore = ProducerClusterEmbeddingReadableStores
|
||||
.getProducerTopKSimClustersEmbeddingsStore(
|
||||
mhMtlsParams
|
||||
).mapValues { topSimClustersWithScore =>
|
||||
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
|
||||
}.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||
userId
|
||||
}
|
||||
|
||||
buildMemCacheStore(rawStore, FavBasedProducer, Model20m145kUpdated)
|
||||
}
|
||||
|
||||
private val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore = ProducerClusterEmbeddingReadableStores
|
||||
.getProducerTopKSimClusters2020EmbeddingsStore(
|
||||
mhMtlsParams
|
||||
).mapValues { topSimClustersWithScore =>
|
||||
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
|
||||
}.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||
userId
|
||||
}
|
||||
|
||||
buildMemCacheStore(rawStore, FavBasedProducer, Model20m145k2020)
|
||||
}
|
||||
|
||||
private val followBasedProducer20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore = ProducerClusterEmbeddingReadableStores
|
||||
.getProducerTopKSimClustersEmbeddingsByFollowStore(
|
||||
mhMtlsParams
|
||||
).mapValues { topSimClustersWithScore =>
|
||||
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
|
||||
}.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||
userId
|
||||
}
|
||||
|
||||
buildMemCacheStore(rawStore, FollowBasedProducer, Model20m145k2020)
|
||||
}
|
||||
|
||||
private val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore = StratoFetchableStore
|
||||
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
|
||||
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
|
||||
|
||||
buildMemCacheStore(rawStore, AggregatableLogFavBasedProducer, Model20m145k2020)
|
||||
}
|
||||
|
||||
private val rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
ThriftSimClustersEmbedding
|
||||
] = {
|
||||
StratoFetchableStore
|
||||
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
"recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020")
|
||||
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
|
||||
}
|
||||
|
||||
private val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildMemCacheStore(
|
||||
rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore,
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145k2020)
|
||||
}
|
||||
|
||||
private val relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore = rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore
|
||||
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145kUpdated,
|
||||
internalId) =>
|
||||
SimClustersEmbeddingId(
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145k2020,
|
||||
internalId)
|
||||
}
|
||||
|
||||
buildMemCacheStore(rawStore, RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated)
|
||||
}
|
||||
|
||||
private val logFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedInFromAPE,
|
||||
Model20m145k2020)
|
||||
}
|
||||
|
||||
private val followBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls,
|
||||
FollowBasedUserInterestedInFromAPE,
|
||||
Model20m145k2020)
|
||||
}
|
||||
|
||||
private val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||
FavBasedUserInterestedIn,
|
||||
Model20m145kUpdated)
|
||||
}
|
||||
|
||||
private val favBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||
FavBasedUserInterestedIn,
|
||||
Model20m145k2020)
|
||||
}
|
||||
|
||||
private val followBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||
FollowBasedUserInterestedIn,
|
||||
Model20m145k2020)
|
||||
}
|
||||
|
||||
private val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedIn,
|
||||
Model20m145k2020)
|
||||
}
|
||||
|
||||
private val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultIIPESimClustersEmbeddingStoreWithMtls,
|
||||
FavBasedUserInterestedInFromPE,
|
||||
Model20m145kUpdated)
|
||||
}
|
||||
|
||||
private val twistlyUserInterestedInStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
ThriftSimClustersEmbedding
|
||||
] = {
|
||||
val interestedIn20M145KUpdatedStore = {
|
||||
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
modelVersion = ModelVersions.Model20M145KUpdated
|
||||
)
|
||||
}
|
||||
val interestedIn20M145K2020Store = {
|
||||
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
modelVersion = ModelVersions.Model20M145K2020
|
||||
)
|
||||
}
|
||||
val interestedInFromPE20M145KUpdatedStore = {
|
||||
UserInterestedInReadableStore.defaultIIPEStoreWithMtls(
|
||||
mhMtlsParams,
|
||||
modelVersion = ModelVersions.Model20M145KUpdated)
|
||||
}
|
||||
val simClustersInterestedInStore: ReadableStore[
|
||||
(UserId, ModelVersion),
|
||||
ClustersUserIsInterestedIn
|
||||
] = {
|
||||
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||
k match {
|
||||
case (userId, Model20m145kUpdated) =>
|
||||
interestedIn20M145KUpdatedStore.get(userId)
|
||||
case (userId, Model20m145k2020) =>
|
||||
interestedIn20M145K2020Store.get(userId)
|
||||
case _ =>
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[
|
||||
(UserId, ModelVersion),
|
||||
ClustersUserIsInterestedIn
|
||||
] = {
|
||||
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||
k match {
|
||||
case (userId, ModelVersion.Model20m145kUpdated) =>
|
||||
interestedInFromPE20M145KUpdatedStore.get(userId)
|
||||
case _ =>
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
new twistly.interestedin.EmbeddingStore(
|
||||
interestedInStore = simClustersInterestedInStore,
|
||||
interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore,
|
||||
statsReceiver = stats
|
||||
).mapValues(_.toThrift)
|
||||
}
|
||||
|
||||
private val userNextInterestedIn20m145k2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildUserInterestedInStore(
|
||||
UserInterestedInReadableStore.defaultNextInterestedInStoreWithMtls,
|
||||
UserNextInterestedIn,
|
||||
Model20m145k2020)
|
||||
}
|
||||
|
||||
private val filteredUserInterestedIn20m145kUpdatedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145kUpdated)
|
||||
}
|
||||
|
||||
private val filteredUserInterestedIn20m145k2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145k2020)
|
||||
}
|
||||
|
||||
private val filteredUserInterestedInFromPE20m145kUpdatedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildMemCacheStore(
|
||||
twistlyUserInterestedInStore,
|
||||
FilteredUserInterestedInFromPE,
|
||||
Model20m145kUpdated)
|
||||
}
|
||||
|
||||
private val unfilteredUserInterestedIn20m145kUpdatedStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildMemCacheStore(
|
||||
twistlyUserInterestedInStore,
|
||||
UnfilteredUserInterestedIn,
|
||||
Model20m145kUpdated)
|
||||
}
|
||||
|
||||
private val unfilteredUserInterestedIn20m145k2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
buildMemCacheStore(twistlyUserInterestedInStore, UnfilteredUserInterestedIn, Model20m145k2020)
|
||||
}
|
||||
|
||||
// [Experimental] User InterestedIn, generated by aggregating IIAPE embedding from AddressBook
|
||||
|
||||
private val logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val datasetName = "addressbook_sims_embedding_iiape_maxpooling"
|
||||
val appId = "wtf_embedding_apollo"
|
||||
buildUserInterestedInStoreGeneric(
|
||||
simClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020,
|
||||
datasetName = datasetName,
|
||||
appId = appId,
|
||||
manhattanCluster = Apollo
|
||||
)
|
||||
}
|
||||
|
||||
private val logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val datasetName = "addressbook_sims_embedding_iiape_average"
|
||||
val appId = "wtf_embedding_apollo"
|
||||
buildUserInterestedInStoreGeneric(
|
||||
simClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
|
||||
Model20m145k2020,
|
||||
datasetName = datasetName,
|
||||
appId = appId,
|
||||
manhattanCluster = Apollo
|
||||
)
|
||||
}
|
||||
|
||||
private val logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val datasetName = "addressbook_sims_embedding_iiape_booktype_maxpooling"
|
||||
val appId = "wtf_embedding_apollo"
|
||||
buildUserInterestedInStoreGeneric(
|
||||
simClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020,
|
||||
datasetName = datasetName,
|
||||
appId = appId,
|
||||
manhattanCluster = Apollo
|
||||
)
|
||||
}
|
||||
|
||||
private val logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val datasetName = "addressbook_sims_embedding_iiape_largestdim_maxpooling"
|
||||
val appId = "wtf_embedding_apollo"
|
||||
buildUserInterestedInStoreGeneric(
|
||||
simClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020,
|
||||
datasetName = datasetName,
|
||||
appId = appId,
|
||||
manhattanCluster = Apollo
|
||||
)
|
||||
}
|
||||
|
||||
private val logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val datasetName = "addressbook_sims_embedding_iiape_louvain_maxpooling"
|
||||
val appId = "wtf_embedding_apollo"
|
||||
buildUserInterestedInStoreGeneric(
|
||||
simClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020,
|
||||
datasetName = datasetName,
|
||||
appId = appId,
|
||||
manhattanCluster = Apollo
|
||||
)
|
||||
}
|
||||
|
||||
private val logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val datasetName = "addressbook_sims_embedding_iiape_connected_maxpooling"
|
||||
val appId = "wtf_embedding_apollo"
|
||||
buildUserInterestedInStoreGeneric(
|
||||
simClustersEmbeddingStoreWithMtls,
|
||||
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020,
|
||||
datasetName = datasetName,
|
||||
appId = appId,
|
||||
manhattanCluster = Apollo
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper func to build a readable store for some UserInterestedIn embeddings with
|
||||
* 1. A storeFunc from UserInterestedInReadableStore
|
||||
* 2. EmbeddingType
|
||||
* 3. ModelVersion
|
||||
* 4. MemCacheConfig
|
||||
* */
|
||||
private def buildUserInterestedInStore(
|
||||
storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion) => ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
],
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion
|
||||
): ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore = storeFunc(mhMtlsParams, embeddingType, modelVersion)
|
||||
.mapValues(_.toThrift)
|
||||
val observedStore = ObservedReadableStore(
|
||||
store = rawStore
|
||||
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||
|
||||
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||
observedStore,
|
||||
cacheClient,
|
||||
embeddingType,
|
||||
modelVersion,
|
||||
stats
|
||||
)
|
||||
}
|
||||
|
||||
private def buildUserInterestedInStoreGeneric(
|
||||
storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion, String, String,
|
||||
ManhattanCluster) => ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
],
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion,
|
||||
datasetName: String,
|
||||
appId: String,
|
||||
manhattanCluster: ManhattanCluster
|
||||
): ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
val rawStore =
|
||||
storeFunc(mhMtlsParams, embeddingType, modelVersion, datasetName, appId, manhattanCluster)
|
||||
.mapValues(_.toThrift)
|
||||
val observedStore = ObservedReadableStore(
|
||||
store = rawStore
|
||||
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||
|
||||
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||
observedStore,
|
||||
cacheClient,
|
||||
embeddingType,
|
||||
modelVersion,
|
||||
stats
|
||||
)
|
||||
}
|
||||
|
||||
private def simClustersEmbeddingStoreWithMtls(
|
||||
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion,
|
||||
datasetName: String,
|
||||
appId: String,
|
||||
manhattanCluster: ManhattanCluster
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
|
||||
if (!modelVersionToDatasetMap.contains(ModelVersions.toKnownForModelVersion(modelVersion))) {
|
||||
throw new IllegalArgumentException(
|
||||
"Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
|
||||
}
|
||||
getStore(appId, mhMtlsParams, datasetName, manhattanCluster)
|
||||
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||
case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId))
|
||||
if theEmbeddingType == embeddingType && theModelVersion == modelVersion =>
|
||||
userId
|
||||
}.mapValues(toSimClustersEmbedding(_, embeddingType))
|
||||
}
|
||||
|
||||
private def buildMemCacheStore(
|
||||
rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding],
|
||||
embeddingType: EmbeddingType,
|
||||
modelVersion: ModelVersion
|
||||
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||
val observedStore = ObservedReadableStore(
|
||||
store = rawStore
|
||||
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||
|
||||
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||
observedStore,
|
||||
cacheClient,
|
||||
embeddingType,
|
||||
modelVersion,
|
||||
stats
|
||||
)
|
||||
}
|
||||
|
||||
private val underlyingStores: Map[
|
||||
(EmbeddingType, ModelVersion),
|
||||
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||
] = Map(
|
||||
// KnownFor Embeddings
|
||||
(FavBasedProducer, Model20m145kUpdated) -> favBasedProducer20M145KUpdatedEmbeddingStore,
|
||||
(FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore,
|
||||
(FollowBasedProducer, Model20m145k2020) -> followBasedProducer20M145K2020EmbeddingStore,
|
||||
(AggregatableLogFavBasedProducer, Model20m145k2020) -> logFavBasedApe20M145K2020EmbeddingStore,
|
||||
(
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145kUpdated) -> relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore,
|
||||
(
|
||||
RelaxedAggregatableLogFavBasedProducer,
|
||||
Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingStore,
|
||||
// InterestedIn Embeddings
|
||||
(
|
||||
LogFavBasedUserInterestedInFromAPE,
|
||||
Model20m145k2020) -> logFavBasedInterestedInFromAPE20M145K2020Store,
|
||||
(
|
||||
FollowBasedUserInterestedInFromAPE,
|
||||
Model20m145k2020) -> followBasedInterestedInFromAPE20M145K2020Store,
|
||||
(FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore,
|
||||
(FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store,
|
||||
(FollowBasedUserInterestedIn, Model20m145k2020) -> followBasedUserInterestedIn20M145K2020Store,
|
||||
(LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store,
|
||||
(
|
||||
FavBasedUserInterestedInFromPE,
|
||||
Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore,
|
||||
(FilteredUserInterestedIn, Model20m145kUpdated) -> filteredUserInterestedIn20m145kUpdatedStore,
|
||||
(FilteredUserInterestedIn, Model20m145k2020) -> filteredUserInterestedIn20m145k2020Store,
|
||||
(
|
||||
FilteredUserInterestedInFromPE,
|
||||
Model20m145kUpdated) -> filteredUserInterestedInFromPE20m145kUpdatedStore,
|
||||
(
|
||||
UnfilteredUserInterestedIn,
|
||||
Model20m145kUpdated) -> unfilteredUserInterestedIn20m145kUpdatedStore,
|
||||
(UnfilteredUserInterestedIn, Model20m145k2020) -> unfilteredUserInterestedIn20m145k2020Store,
|
||||
(UserNextInterestedIn, Model20m145k2020) -> userNextInterestedIn20m145k2020Store,
|
||||
(
|
||||
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||
(
|
||||
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store,
|
||||
(
|
||||
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||
(
|
||||
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||
(
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||
(
|
||||
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
|
||||
Model20m145k2020) -> logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||
)
|
||||
|
||||
val userSimClustersEmbeddingStore: ReadableStore[
|
||||
SimClustersEmbeddingId,
|
||||
SimClustersEmbedding
|
||||
] = {
|
||||
SimClustersEmbeddingStore.buildWithDecider(
|
||||
underlyingStores = underlyingStores,
|
||||
decider = rmsDecider.decider,
|
||||
statsReceiver = stats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
18
representation-manager/server/src/main/thrift/BUILD
Normal file
18
representation-manager/server/src/main/thrift/BUILD
Normal file
|
@ -0,0 +1,18 @@
|
|||
create_thrift_libraries(
|
||||
base_name = "thrift",
|
||||
sources = [
|
||||
"com/twitter/representation_manager/service.thrift",
|
||||
],
|
||||
platform = "java8",
|
||||
tags = [
|
||||
"bazel-compatible",
|
||||
],
|
||||
dependency_roots = [
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift",
|
||||
],
|
||||
generate_languages = [
|
||||
"java",
|
||||
"scala",
|
||||
"strato",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,14 @@
|
|||
namespace java com.twitter.representation_manager.thriftjava
|
||||
#@namespace scala com.twitter.representation_manager.thriftscala
|
||||
#@namespace strato com.twitter.representation_manager
|
||||
|
||||
include "com/twitter/simclusters_v2/online_store.thrift"
|
||||
include "com/twitter/simclusters_v2/identifier.thrift"
|
||||
|
||||
/**
|
||||
* A uniform column view for all kinds of SimClusters based embeddings.
|
||||
**/
|
||||
struct SimClustersEmbeddingView {
|
||||
1: required identifier.EmbeddingType embeddingType
|
||||
2: required online_store.ModelVersion modelVersion
|
||||
}(persisted = 'false', hasPersonalData = 'false')
|
Loading…
Reference in a new issue