mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-12-22 10:25:29 +00:00
Open-sourcing Representation Manager
Representation Manager (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services.
This commit is contained in:
parent
197bf2c563
commit
43cdcf2ed6
1
representation-manager/BUILD.bazel
Normal file
1
representation-manager/BUILD.bazel
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# This prevents SQ query from grabbing //:all since it traverses up once to find a BUILD
|
4
representation-manager/README.md
Normal file
4
representation-manager/README.md
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
# Representation Manager #
|
||||||
|
|
||||||
|
**Representation Manager** (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services.
|
||||||
|
|
4
representation-manager/bin/deploy.sh
Executable file
4
representation-manager/bin/deploy.sh
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
JOB=representation-manager bazel run --ui_event_filters=-info,-stdout,-stderr --noshow_progress \
|
||||||
|
//relevance-platform/src/main/python/deploy -- "$@"
|
|
@ -0,0 +1,17 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finatra/inject/inject-thrift-client",
|
||||||
|
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||||
|
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||||
|
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
|
||||||
|
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
|
||||||
|
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||||
|
"src/scala/com/twitter/simclusters_v2/common",
|
||||||
|
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||||
|
"stitch/stitch-storehaus",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/client",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,208 @@
|
||||||
|
package com.twitter.representation_manager
|
||||||
|
|
||||||
|
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||||
|
import com.twitter.representation_manager.config.ClientConfig
|
||||||
|
import com.twitter.representation_manager.config.DisabledInMemoryCacheParams
|
||||||
|
import com.twitter.representation_manager.config.EnabledInMemoryCacheParams
|
||||||
|
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||||
|
import com.twitter.storehaus.ReadableStore
|
||||||
|
import com.twitter.strato.client.{Client => StratoClient}
|
||||||
|
import com.twitter.strato.thrift.ScroogeConvImplicits._
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the class that offers features to build readable stores for a given
|
||||||
|
* SimClustersEmbeddingView (i.e. embeddingType and modelVersion). It applies ClientConfig
|
||||||
|
* for a particular service and build ReadableStores which implement that config.
|
||||||
|
*/
|
||||||
|
class StoreBuilder(
|
||||||
|
clientConfig: ClientConfig,
|
||||||
|
stratoClient: StratoClient,
|
||||||
|
memCachedClient: MemcachedClient,
|
||||||
|
globalStats: StatsReceiver,
|
||||||
|
) {
|
||||||
|
private val stats =
|
||||||
|
globalStats.scope("representation_manager_client").scope(this.getClass.getSimpleName)
|
||||||
|
|
||||||
|
// Column consts
|
||||||
|
private val ColPathPrefix = "recommendations/representation_manager/"
|
||||||
|
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
|
||||||
|
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
|
||||||
|
private val SimclustersTopicIdColPath = ColPathPrefix + "simClustersEmbedding.TopicId"
|
||||||
|
private val SimclustersLocaleEntityIdColPath =
|
||||||
|
ColPathPrefix + "simClustersEmbedding.LocaleEntityId"
|
||||||
|
|
||||||
|
def buildSimclustersTweetEmbeddingStore(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[Long, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersTweetColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
addCacheLayer(rawStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersUserEmbeddingStore(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[Long, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersUserColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
addCacheLayer(rawStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersTopicIdEmbeddingStore(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[TopicId, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersTopicIdColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
addCacheLayer(rawStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersLocaleEntityIdEmbeddingStore(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[LocaleEntityId, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersLocaleEntityIdColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
addCacheLayer(rawStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersTweetEmbeddingStoreWithEmbeddingIdAsKey(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersTweetColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) =>
|
||||||
|
tweetId
|
||||||
|
}
|
||||||
|
|
||||||
|
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersUserEmbeddingStoreWithEmbeddingIdAsKey(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersUserColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||||
|
userId
|
||||||
|
}
|
||||||
|
|
||||||
|
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersTopicEmbeddingStoreWithEmbeddingIdAsKey(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersTopicIdColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
|
||||||
|
topicId
|
||||||
|
}
|
||||||
|
|
||||||
|
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersTopicIdEmbeddingStoreWithEmbeddingIdAsKey(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersTopicIdColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
|
||||||
|
topicId
|
||||||
|
}
|
||||||
|
|
||||||
|
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
def buildSimclustersLocaleEntityIdEmbeddingStoreWithEmbeddingIdAsKey(
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
SimclustersLocaleEntityIdColPath,
|
||||||
|
embeddingColumnView)
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) =>
|
||||||
|
localeEntityId
|
||||||
|
}
|
||||||
|
|
||||||
|
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def addCacheLayer[K](
|
||||||
|
rawStore: ReadableStore[K, SimClustersEmbedding],
|
||||||
|
embeddingColumnView: SimClustersEmbeddingView,
|
||||||
|
): ReadableStore[K, SimClustersEmbedding] = {
|
||||||
|
// Add in-memory caching based on ClientConfig
|
||||||
|
val inMemCacheParams = clientConfig.inMemoryCacheConfig
|
||||||
|
.getCacheSetup(embeddingColumnView.embeddingType, embeddingColumnView.modelVersion)
|
||||||
|
|
||||||
|
val statsPerStore = stats
|
||||||
|
.scope(embeddingColumnView.embeddingType.name).scope(embeddingColumnView.modelVersion.name)
|
||||||
|
|
||||||
|
inMemCacheParams match {
|
||||||
|
case DisabledInMemoryCacheParams =>
|
||||||
|
ObservedReadableStore(
|
||||||
|
store = rawStore
|
||||||
|
)(statsPerStore)
|
||||||
|
case EnabledInMemoryCacheParams(ttl, maxKeys, cacheName) =>
|
||||||
|
ObservedCachedReadableStore.from[K, SimClustersEmbedding](
|
||||||
|
rawStore,
|
||||||
|
ttl = ttl,
|
||||||
|
maxKeys = maxKeys,
|
||||||
|
cacheName = cacheName,
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsPerStore)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finatra/inject/inject-thrift-client",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||||
|
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||||
|
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/client",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,25 @@
|
||||||
|
package com.twitter.representation_manager.config
|
||||||
|
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is RMS client config class.
|
||||||
|
* We only support setting up in memory cache params for now, but we expect to enable other
|
||||||
|
* customisations in the near future e.g. request timeout
|
||||||
|
*
|
||||||
|
* --------------------------------------------
|
||||||
|
* PLEASE NOTE:
|
||||||
|
* Having in-memory cache is not necessarily a free performance win, anyone considering it should
|
||||||
|
* investigate rather than blindly enabling it
|
||||||
|
* */
|
||||||
|
class ClientConfig(inMemCacheParamsOverrides: Map[
|
||||||
|
(EmbeddingType, ModelVersion),
|
||||||
|
InMemoryCacheParams
|
||||||
|
] = Map.empty) {
|
||||||
|
// In memory cache config per embedding
|
||||||
|
val inMemCacheParams = DefaultInMemoryCacheConfig.cacheParamsMap ++ inMemCacheParamsOverrides
|
||||||
|
val inMemoryCacheConfig = new InMemoryCacheConfig(inMemCacheParams)
|
||||||
|
}
|
||||||
|
|
||||||
|
object DefaultClientConfig extends ClientConfig
|
|
@ -0,0 +1,53 @@
|
||||||
|
package com.twitter.representation_manager.config
|
||||||
|
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||||
|
import com.twitter.util.Duration
|
||||||
|
|
||||||
|
/*
|
||||||
|
* --------------------------------------------
|
||||||
|
* PLEASE NOTE:
|
||||||
|
* Having in-memory cache is not necessarily a free performance win, anyone considering it should
|
||||||
|
* investigate rather than blindly enabling it
|
||||||
|
* --------------------------------------------
|
||||||
|
* */
|
||||||
|
|
||||||
|
sealed trait InMemoryCacheParams
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This holds params that is required to set up a in-mem cache for a single embedding store
|
||||||
|
*/
|
||||||
|
case class EnabledInMemoryCacheParams(
|
||||||
|
ttl: Duration,
|
||||||
|
maxKeys: Int,
|
||||||
|
cacheName: String)
|
||||||
|
extends InMemoryCacheParams
|
||||||
|
object DisabledInMemoryCacheParams extends InMemoryCacheParams
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the class for the in-memory cache config. Client could pass in their own cacheParamsMap to
|
||||||
|
* create a new InMemoryCacheConfig instead of using the DefaultInMemoryCacheConfig object below
|
||||||
|
* */
|
||||||
|
class InMemoryCacheConfig(
|
||||||
|
cacheParamsMap: Map[
|
||||||
|
(EmbeddingType, ModelVersion),
|
||||||
|
InMemoryCacheParams
|
||||||
|
] = Map.empty) {
|
||||||
|
|
||||||
|
def getCacheSetup(
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion
|
||||||
|
): InMemoryCacheParams = {
|
||||||
|
// When requested embedding type doesn't exist, we return DisabledInMemoryCacheParams
|
||||||
|
cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledInMemoryCacheParams)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Default config for the in-memory cache
|
||||||
|
* Clients can directly import and use this one if they don't want to set up a customised config
|
||||||
|
* */
|
||||||
|
object DefaultInMemoryCacheConfig extends InMemoryCacheConfig {
|
||||||
|
// set default to no in-memory caching
|
||||||
|
val cacheParamsMap = Map.empty
|
||||||
|
}
|
21
representation-manager/server/BUILD
Normal file
21
representation-manager/server/BUILD
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
jvm_binary(
|
||||||
|
name = "bin",
|
||||||
|
basename = "representation-manager",
|
||||||
|
main = "com.twitter.representation_manager.RepresentationManagerFedServerMain",
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finatra/inject/inject-logback/src/main/scala",
|
||||||
|
"loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback",
|
||||||
|
"representation-manager/server/src/main/resources",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager",
|
||||||
|
"twitter-server/logback-classic/src/main/scala",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app
|
||||||
|
jvm_app(
|
||||||
|
name = "representation-manager-app",
|
||||||
|
archive = "zip",
|
||||||
|
binary = ":bin",
|
||||||
|
)
|
7
representation-manager/server/src/main/resources/BUILD
Normal file
7
representation-manager/server/src/main/resources/BUILD
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
resources(
|
||||||
|
sources = [
|
||||||
|
"*.xml",
|
||||||
|
"config/*.yml",
|
||||||
|
],
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
)
|
|
@ -0,0 +1,219 @@
|
||||||
|
# ---------- traffic percentage by embedding type and model version ----------
|
||||||
|
# Decider strings are build dynamically following the rule in there
|
||||||
|
# i.e. s"enable_${embeddingType.name}_${modelVersion.name}"
|
||||||
|
# Hence this should be updated accordingly if usage is changed in the embedding stores
|
||||||
|
|
||||||
|
# Tweet embeddings
|
||||||
|
"enable_LogFavBasedTweet_Model20m145k2020":
|
||||||
|
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145k2020. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedTweet_Model20m145kUpdated":
|
||||||
|
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145kUpdated. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavLongestL2EmbeddingTweet_Model20m145k2020":
|
||||||
|
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145k2020. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavLongestL2EmbeddingTweet_Model20m145kUpdated":
|
||||||
|
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145kUpdated. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
# Topic embeddings
|
||||||
|
"enable_FavTfgTopic_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to FavTfgTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedKgoApeTopic_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedKgoApeTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
# User embeddings - KnownFor
|
||||||
|
"enable_FavBasedProducer_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to FavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FavBasedProducer_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to FavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FollowBasedProducer_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to FollowBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_AggregatableFavBasedProducer_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_AggregatableFavBasedProducer_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_AggregatableLogFavBasedProducer_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_AggregatableLogFavBasedProducer_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
enable_RelaxedAggregatableLogFavBasedProducer_Model20m145kUpdated:
|
||||||
|
comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
enable_RelaxedAggregatableLogFavBasedProducer_Model20m145k2020:
|
||||||
|
comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
# User embeddings - InterestedIn
|
||||||
|
"enable_LogFavBasedUserInterestedInFromAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FollowBasedUserInterestedInFromAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to FollowBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FavBasedUserInterestedIn_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FavBasedUserInterestedIn_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FollowBasedUserInterestedIn_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to FollowBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedUserInterestedIn_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FavBasedUserInterestedInFromPE_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to FavBasedUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FilteredUserInterestedIn_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FilteredUserInterestedIn_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_FilteredUserInterestedInFromPE_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to FilteredUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_UnfilteredUserInterestedIn_Model20m145kUpdated":
|
||||||
|
comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_UnfilteredUserInterestedIn_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_UserNextInterestedIn_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to UserNextInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedUserInterestedAverageAddressBookFromIIAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
"enable_LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
|
||||||
|
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
|
||||||
|
default_availability: 10000
|
||||||
|
|
||||||
|
# ---------- load shedding by caller id ----------
|
||||||
|
# To create a new decider, add here with the same format and caller's details :
|
||||||
|
# "representation-manager_load_shed_by_caller_id_twtr:{{role}}:{{name}}:{{environment}}:{{cluster}}"
|
||||||
|
# All the deciders below are generated by this script:
|
||||||
|
# ./strato/bin/fed deciders representation-manager --service-role=representation-manager --service-name=representation-manager
|
||||||
|
# If you need to run the script and paste the output, add ONLY the prod deciders here.
|
||||||
|
"representation-manager_load_shed_by_caller_id_all":
|
||||||
|
comment: "Reject all traffic from caller id: all"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:pdxa":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:pdxa"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoapi:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoapi:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:atla":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:atla"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:pdxa":
|
||||||
|
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:pdxa"
|
||||||
|
default_availability: 0
|
||||||
|
|
||||||
|
# ---------- Dark Traffic Proxy ----------
|
||||||
|
representation-manager_forward_dark_traffic:
|
||||||
|
comment: "Defines the percentage of traffic to forward to diffy-proxy. Set to 0 to disable dark traffic forwarding"
|
||||||
|
default_availability: 0
|
165
representation-manager/server/src/main/resources/logback.xml
Normal file
165
representation-manager/server/src/main/resources/logback.xml
Normal file
|
@ -0,0 +1,165 @@
|
||||||
|
<configuration>
|
||||||
|
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
|
||||||
|
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
<!-- Service Config -->
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
<property name="DEFAULT_SERVICE_PATTERN"
|
||||||
|
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
|
||||||
|
|
||||||
|
<property name="DEFAULT_ACCESS_PATTERN"
|
||||||
|
value="%msg"/>
|
||||||
|
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
<!-- Common Config -->
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
|
||||||
|
<!-- JUL/JDK14 to Logback bridge -->
|
||||||
|
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
|
||||||
|
<resetJUL>true</resetJUL>
|
||||||
|
</contextListener>
|
||||||
|
|
||||||
|
<!-- ====================================================================================== -->
|
||||||
|
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
|
||||||
|
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
|
||||||
|
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
|
||||||
|
<!-- ====================================================================================== -->
|
||||||
|
|
||||||
|
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||||
|
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||||
|
<file>${log.service.output}</file>
|
||||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||||
|
<!-- daily rollover -->
|
||||||
|
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
|
||||||
|
<!-- the maximum total size of all the log files -->
|
||||||
|
<totalSizeCap>3GB</totalSizeCap>
|
||||||
|
<!-- keep maximum 21 days' worth of history -->
|
||||||
|
<maxHistory>21</maxHistory>
|
||||||
|
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||||
|
</rollingPolicy>
|
||||||
|
<encoder>
|
||||||
|
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||||
|
</encoder>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||||
|
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||||
|
<file>${log.access.output}</file>
|
||||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||||
|
<!-- daily rollover -->
|
||||||
|
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
|
||||||
|
<!-- the maximum total size of all the log files -->
|
||||||
|
<totalSizeCap>100MB</totalSizeCap>
|
||||||
|
<!-- keep maximum 7 days' worth of history -->
|
||||||
|
<maxHistory>7</maxHistory>
|
||||||
|
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||||
|
</rollingPolicy>
|
||||||
|
<encoder>
|
||||||
|
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
|
||||||
|
</encoder>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<!--LogLens -->
|
||||||
|
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||||
|
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||||
|
<category>${log.lens.category}</category>
|
||||||
|
<index>${log.lens.index}</index>
|
||||||
|
<tag>${log.lens.tag}/service</tag>
|
||||||
|
<encoder>
|
||||||
|
<pattern>%msg</pattern>
|
||||||
|
</encoder>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<!-- LogLens Access -->
|
||||||
|
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||||
|
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||||
|
<category>${log.lens.category}</category>
|
||||||
|
<index>${log.lens.index}</index>
|
||||||
|
<tag>${log.lens.tag}/access</tag>
|
||||||
|
<encoder>
|
||||||
|
<pattern>%msg</pattern>
|
||||||
|
</encoder>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<!-- Pipeline Execution Logs -->
|
||||||
|
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||||
|
<file>allow_listed_pipeline_executions.log</file>
|
||||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||||
|
<!-- daily rollover -->
|
||||||
|
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
|
||||||
|
<!-- the maximum total size of all the log files -->
|
||||||
|
<totalSizeCap>100MB</totalSizeCap>
|
||||||
|
<!-- keep maximum 7 days' worth of history -->
|
||||||
|
<maxHistory>7</maxHistory>
|
||||||
|
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||||
|
</rollingPolicy>
|
||||||
|
<encoder>
|
||||||
|
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||||
|
</encoder>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
<!-- Primary Async Appenders -->
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
|
||||||
|
<property name="async_queue_size" value="${queue.size:-50000}"/>
|
||||||
|
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
|
||||||
|
|
||||||
|
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
|
||||||
|
<queueSize>${async_queue_size}</queueSize>
|
||||||
|
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||||
|
<appender-ref ref="SERVICE"/>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||||
|
<queueSize>${async_queue_size}</queueSize>
|
||||||
|
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||||
|
<appender-ref ref="ACCESS"/>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
|
||||||
|
<queueSize>${async_queue_size}</queueSize>
|
||||||
|
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||||
|
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
|
||||||
|
<queueSize>${async_queue_size}</queueSize>
|
||||||
|
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||||
|
<appender-ref ref="LOGLENS"/>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||||
|
<queueSize>${async_queue_size}</queueSize>
|
||||||
|
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||||
|
<appender-ref ref="LOGLENS-ACCESS"/>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
<!-- Package Config -->
|
||||||
|
<!-- ===================================================== -->
|
||||||
|
|
||||||
|
<!-- Per-Package Config -->
|
||||||
|
<logger name="com.twitter" level="INHERITED"/>
|
||||||
|
<logger name="com.twitter.wilyns" level="INHERITED"/>
|
||||||
|
<logger name="com.twitter.configbus.client.file" level="INHERITED"/>
|
||||||
|
<logger name="com.twitter.finagle.mux" level="INHERITED"/>
|
||||||
|
<logger name="com.twitter.finagle.serverset2" level="INHERITED"/>
|
||||||
|
<logger name="com.twitter.logging.ScribeHandler" level="INHERITED"/>
|
||||||
|
<logger name="com.twitter.zookeeper.client.internal" level="INHERITED"/>
|
||||||
|
|
||||||
|
<!-- Root Config -->
|
||||||
|
<!-- For all logs except access logs, disable logging below log_level level by default. This can be overriden in the per-package loggers, and dynamically in the admin panel of individual instances. -->
|
||||||
|
<root level="${log_level:-INFO}">
|
||||||
|
<appender-ref ref="ASYNC-SERVICE"/>
|
||||||
|
<appender-ref ref="ASYNC-LOGLENS"/>
|
||||||
|
</root>
|
||||||
|
|
||||||
|
<!-- Access Logging -->
|
||||||
|
<!-- Access logs are turned off by default -->
|
||||||
|
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter" level="OFF" additivity="false">
|
||||||
|
<appender-ref ref="ASYNC-ACCESS"/>
|
||||||
|
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
|
||||||
|
</logger>
|
||||||
|
|
||||||
|
</configuration>
|
|
@ -0,0 +1,13 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finatra/inject/inject-thrift-client",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,40 @@
|
||||||
|
package com.twitter.representation_manager
|
||||||
|
|
||||||
|
import com.google.inject.Module
|
||||||
|
import com.twitter.inject.thrift.modules.ThriftClientIdModule
|
||||||
|
import com.twitter.representation_manager.columns.topic.LocaleEntityIdSimClustersEmbeddingCol
|
||||||
|
import com.twitter.representation_manager.columns.topic.TopicIdSimClustersEmbeddingCol
|
||||||
|
import com.twitter.representation_manager.columns.tweet.TweetSimClustersEmbeddingCol
|
||||||
|
import com.twitter.representation_manager.columns.user.UserSimClustersEmbeddingCol
|
||||||
|
import com.twitter.representation_manager.modules.CacheModule
|
||||||
|
import com.twitter.representation_manager.modules.InterestsThriftClientModule
|
||||||
|
import com.twitter.representation_manager.modules.LegacyRMSConfigModule
|
||||||
|
import com.twitter.representation_manager.modules.StoreModule
|
||||||
|
import com.twitter.representation_manager.modules.TimerModule
|
||||||
|
import com.twitter.representation_manager.modules.UttClientModule
|
||||||
|
import com.twitter.strato.fed._
|
||||||
|
import com.twitter.strato.fed.server._
|
||||||
|
|
||||||
|
object RepresentationManagerFedServerMain extends RepresentationManagerFedServer
|
||||||
|
|
||||||
|
trait RepresentationManagerFedServer extends StratoFedServer {
|
||||||
|
override def dest: String = "/s/representation-manager/representation-manager"
|
||||||
|
override val modules: Seq[Module] =
|
||||||
|
Seq(
|
||||||
|
CacheModule,
|
||||||
|
InterestsThriftClientModule,
|
||||||
|
LegacyRMSConfigModule,
|
||||||
|
StoreModule,
|
||||||
|
ThriftClientIdModule,
|
||||||
|
TimerModule,
|
||||||
|
UttClientModule
|
||||||
|
)
|
||||||
|
|
||||||
|
override def columns: Seq[Class[_ <: StratoFed.Column]] =
|
||||||
|
Seq(
|
||||||
|
classOf[TweetSimClustersEmbeddingCol],
|
||||||
|
classOf[UserSimClustersEmbeddingCol],
|
||||||
|
classOf[TopicIdSimClustersEmbeddingCol],
|
||||||
|
classOf[LocaleEntityIdSimClustersEmbeddingCol]
|
||||||
|
)
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,26 @@
|
||||||
|
package com.twitter.representation_manager.columns
|
||||||
|
|
||||||
|
import com.twitter.strato.access.Access.LdapGroup
|
||||||
|
import com.twitter.strato.config.ContactInfo
|
||||||
|
import com.twitter.strato.config.FromColumns
|
||||||
|
import com.twitter.strato.config.Has
|
||||||
|
import com.twitter.strato.config.Prefix
|
||||||
|
import com.twitter.strato.config.ServiceIdentifierPattern
|
||||||
|
|
||||||
|
object ColumnConfigBase {
|
||||||
|
|
||||||
|
/****************** Internal permissions *******************/
|
||||||
|
val recosPermissions: Seq[com.twitter.strato.config.Policy] = Seq()
|
||||||
|
|
||||||
|
/****************** External permissions *******************/
|
||||||
|
// This is used to grant limited access to members outside of RP team.
|
||||||
|
val externalPermissions: Seq[com.twitter.strato.config.Policy] = Seq()
|
||||||
|
|
||||||
|
val contactInfo: ContactInfo = ContactInfo(
|
||||||
|
description = "Please contact Relevance Platform for more details",
|
||||||
|
contactEmail = "no-reply@twitter.com",
|
||||||
|
ldapGroup = "ldap",
|
||||||
|
jiraProject = "JIRA",
|
||||||
|
links = Seq("http://go/rms-runbook")
|
||||||
|
)
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finatra/inject/inject-core/src/main/scala",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||||
|
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,77 @@
|
||||||
|
package com.twitter.representation_manager.columns.topic
|
||||||
|
|
||||||
|
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||||
|
import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore
|
||||||
|
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
|
||||||
|
import com.twitter.stitch
|
||||||
|
import com.twitter.stitch.Stitch
|
||||||
|
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||||
|
import com.twitter.strato.catalog.OpMetadata
|
||||||
|
import com.twitter.strato.config.AnyOf
|
||||||
|
import com.twitter.strato.config.ContactInfo
|
||||||
|
import com.twitter.strato.config.FromColumns
|
||||||
|
import com.twitter.strato.config.Policy
|
||||||
|
import com.twitter.strato.config.Prefix
|
||||||
|
import com.twitter.strato.data.Conv
|
||||||
|
import com.twitter.strato.data.Description.PlainText
|
||||||
|
import com.twitter.strato.data.Lifecycle
|
||||||
|
import com.twitter.strato.fed._
|
||||||
|
import com.twitter.strato.thrift.ScroogeConv
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
class LocaleEntityIdSimClustersEmbeddingCol @Inject() (
|
||||||
|
embeddingStore: TopicSimClustersEmbeddingStore)
|
||||||
|
extends StratoFed.Column(
|
||||||
|
"recommendations/representation_manager/simClustersEmbedding.LocaleEntityId")
|
||||||
|
with StratoFed.Fetch.Stitch {
|
||||||
|
|
||||||
|
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||||
|
StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||||
|
|
||||||
|
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||||
|
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||||
|
Set(
|
||||||
|
Prefix("ml/featureStore/simClusters"),
|
||||||
|
))
|
||||||
|
|
||||||
|
override val policy: Policy = AnyOf({
|
||||||
|
colPermissions
|
||||||
|
})
|
||||||
|
|
||||||
|
override type Key = LocaleEntityId
|
||||||
|
override type View = SimClustersEmbeddingView
|
||||||
|
override type Value = SimClustersEmbedding
|
||||||
|
|
||||||
|
override val keyConv: Conv[Key] = ScroogeConv.fromStruct[LocaleEntityId]
|
||||||
|
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||||
|
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||||
|
|
||||||
|
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||||
|
|
||||||
|
override val metadata: OpMetadata = OpMetadata(
|
||||||
|
lifecycle = Some(Lifecycle.Production),
|
||||||
|
description = Some(
|
||||||
|
PlainText(
|
||||||
|
"The Topic SimClusters Embedding Endpoint in Representation Management Service with LocaleEntityId." +
|
||||||
|
" TDD: http://go/rms-tdd"))
|
||||||
|
)
|
||||||
|
|
||||||
|
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||||
|
val embeddingId = SimClustersEmbeddingId(
|
||||||
|
view.embeddingType,
|
||||||
|
view.modelVersion,
|
||||||
|
InternalId.LocaleEntityId(key)
|
||||||
|
)
|
||||||
|
|
||||||
|
storeStitch(embeddingId)
|
||||||
|
.map(embedding => found(embedding))
|
||||||
|
.handle {
|
||||||
|
case stitch.NotFound => missing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
package com.twitter.representation_manager.columns.topic
|
||||||
|
|
||||||
|
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||||
|
import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore
|
||||||
|
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||||
|
import com.twitter.stitch
|
||||||
|
import com.twitter.stitch.Stitch
|
||||||
|
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||||
|
import com.twitter.strato.catalog.OpMetadata
|
||||||
|
import com.twitter.strato.config.AnyOf
|
||||||
|
import com.twitter.strato.config.ContactInfo
|
||||||
|
import com.twitter.strato.config.FromColumns
|
||||||
|
import com.twitter.strato.config.Policy
|
||||||
|
import com.twitter.strato.config.Prefix
|
||||||
|
import com.twitter.strato.data.Conv
|
||||||
|
import com.twitter.strato.data.Description.PlainText
|
||||||
|
import com.twitter.strato.data.Lifecycle
|
||||||
|
import com.twitter.strato.fed._
|
||||||
|
import com.twitter.strato.thrift.ScroogeConv
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
class TopicIdSimClustersEmbeddingCol @Inject() (embeddingStore: TopicSimClustersEmbeddingStore)
|
||||||
|
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.TopicId")
|
||||||
|
with StratoFed.Fetch.Stitch {
|
||||||
|
|
||||||
|
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||||
|
StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||||
|
|
||||||
|
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||||
|
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||||
|
Set(
|
||||||
|
Prefix("ml/featureStore/simClusters"),
|
||||||
|
))
|
||||||
|
|
||||||
|
override val policy: Policy = AnyOf({
|
||||||
|
colPermissions
|
||||||
|
})
|
||||||
|
|
||||||
|
override type Key = TopicId
|
||||||
|
override type View = SimClustersEmbeddingView
|
||||||
|
override type Value = SimClustersEmbedding
|
||||||
|
|
||||||
|
override val keyConv: Conv[Key] = ScroogeConv.fromStruct[TopicId]
|
||||||
|
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||||
|
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||||
|
|
||||||
|
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||||
|
|
||||||
|
override val metadata: OpMetadata = OpMetadata(
|
||||||
|
lifecycle = Some(Lifecycle.Production),
|
||||||
|
description = Some(PlainText(
|
||||||
|
"The Topic SimClusters Embedding Endpoint in Representation Management Service with TopicId." +
|
||||||
|
" TDD: http://go/rms-tdd"))
|
||||||
|
)
|
||||||
|
|
||||||
|
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||||
|
val embeddingId = SimClustersEmbeddingId(
|
||||||
|
view.embeddingType,
|
||||||
|
view.modelVersion,
|
||||||
|
InternalId.TopicId(key)
|
||||||
|
)
|
||||||
|
|
||||||
|
storeStitch(embeddingId)
|
||||||
|
.map(embedding => found(embedding))
|
||||||
|
.handle {
|
||||||
|
case stitch.NotFound => missing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finatra/inject/inject-core/src/main/scala",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||||
|
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,73 @@
|
||||||
|
package com.twitter.representation_manager.columns.tweet
|
||||||
|
|
||||||
|
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||||
|
import com.twitter.representation_manager.store.TweetSimClustersEmbeddingStore
|
||||||
|
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.stitch
|
||||||
|
import com.twitter.stitch.Stitch
|
||||||
|
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||||
|
import com.twitter.strato.catalog.OpMetadata
|
||||||
|
import com.twitter.strato.config.AnyOf
|
||||||
|
import com.twitter.strato.config.ContactInfo
|
||||||
|
import com.twitter.strato.config.FromColumns
|
||||||
|
import com.twitter.strato.config.Policy
|
||||||
|
import com.twitter.strato.config.Prefix
|
||||||
|
import com.twitter.strato.data.Conv
|
||||||
|
import com.twitter.strato.data.Description.PlainText
|
||||||
|
import com.twitter.strato.data.Lifecycle
|
||||||
|
import com.twitter.strato.fed._
|
||||||
|
import com.twitter.strato.thrift.ScroogeConv
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
class TweetSimClustersEmbeddingCol @Inject() (embeddingStore: TweetSimClustersEmbeddingStore)
|
||||||
|
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.Tweet")
|
||||||
|
with StratoFed.Fetch.Stitch {
|
||||||
|
|
||||||
|
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||||
|
StitchOfReadableStore(embeddingStore.tweetSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||||
|
|
||||||
|
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||||
|
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||||
|
Set(
|
||||||
|
Prefix("ml/featureStore/simClusters"),
|
||||||
|
))
|
||||||
|
|
||||||
|
override val policy: Policy = AnyOf({
|
||||||
|
colPermissions
|
||||||
|
})
|
||||||
|
|
||||||
|
override type Key = Long // TweetId
|
||||||
|
override type View = SimClustersEmbeddingView
|
||||||
|
override type Value = SimClustersEmbedding
|
||||||
|
|
||||||
|
override val keyConv: Conv[Key] = Conv.long
|
||||||
|
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||||
|
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||||
|
|
||||||
|
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||||
|
|
||||||
|
override val metadata: OpMetadata = OpMetadata(
|
||||||
|
lifecycle = Some(Lifecycle.Production),
|
||||||
|
description = Some(
|
||||||
|
PlainText("The Tweet SimClusters Embedding Endpoint in Representation Management Service." +
|
||||||
|
" TDD: http://go/rms-tdd"))
|
||||||
|
)
|
||||||
|
|
||||||
|
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||||
|
val embeddingId = SimClustersEmbeddingId(
|
||||||
|
view.embeddingType,
|
||||||
|
view.modelVersion,
|
||||||
|
InternalId.TweetId(key)
|
||||||
|
)
|
||||||
|
|
||||||
|
storeStitch(embeddingId)
|
||||||
|
.map(embedding => found(embedding))
|
||||||
|
.handle {
|
||||||
|
case stitch.NotFound => missing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finatra/inject/inject-core/src/main/scala",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||||
|
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/fed/server",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,73 @@
|
||||||
|
package com.twitter.representation_manager.columns.user
|
||||||
|
|
||||||
|
import com.twitter.representation_manager.columns.ColumnConfigBase
|
||||||
|
import com.twitter.representation_manager.store.UserSimClustersEmbeddingStore
|
||||||
|
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.stitch
|
||||||
|
import com.twitter.stitch.Stitch
|
||||||
|
import com.twitter.stitch.storehaus.StitchOfReadableStore
|
||||||
|
import com.twitter.strato.catalog.OpMetadata
|
||||||
|
import com.twitter.strato.config.AnyOf
|
||||||
|
import com.twitter.strato.config.ContactInfo
|
||||||
|
import com.twitter.strato.config.FromColumns
|
||||||
|
import com.twitter.strato.config.Policy
|
||||||
|
import com.twitter.strato.config.Prefix
|
||||||
|
import com.twitter.strato.data.Conv
|
||||||
|
import com.twitter.strato.data.Description.PlainText
|
||||||
|
import com.twitter.strato.data.Lifecycle
|
||||||
|
import com.twitter.strato.fed._
|
||||||
|
import com.twitter.strato.thrift.ScroogeConv
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
class UserSimClustersEmbeddingCol @Inject() (embeddingStore: UserSimClustersEmbeddingStore)
|
||||||
|
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.User")
|
||||||
|
with StratoFed.Fetch.Stitch {
|
||||||
|
|
||||||
|
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
|
||||||
|
StitchOfReadableStore(embeddingStore.userSimClustersEmbeddingStore.mapValues(_.toThrift))
|
||||||
|
|
||||||
|
val colPermissions: Seq[com.twitter.strato.config.Policy] =
|
||||||
|
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
|
||||||
|
Set(
|
||||||
|
Prefix("ml/featureStore/simClusters"),
|
||||||
|
))
|
||||||
|
|
||||||
|
override val policy: Policy = AnyOf({
|
||||||
|
colPermissions
|
||||||
|
})
|
||||||
|
|
||||||
|
override type Key = Long // UserId
|
||||||
|
override type View = SimClustersEmbeddingView
|
||||||
|
override type Value = SimClustersEmbedding
|
||||||
|
|
||||||
|
override val keyConv: Conv[Key] = Conv.long
|
||||||
|
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
|
||||||
|
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
|
||||||
|
|
||||||
|
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
|
||||||
|
|
||||||
|
override val metadata: OpMetadata = OpMetadata(
|
||||||
|
lifecycle = Some(Lifecycle.Production),
|
||||||
|
description = Some(
|
||||||
|
PlainText("The User SimClusters Embedding Endpoint in Representation Management Service." +
|
||||||
|
" TDD: http://go/rms-tdd"))
|
||||||
|
)
|
||||||
|
|
||||||
|
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
|
||||||
|
val embeddingId = SimClustersEmbeddingId(
|
||||||
|
view.embeddingType,
|
||||||
|
view.modelVersion,
|
||||||
|
InternalId.UserId(key)
|
||||||
|
)
|
||||||
|
|
||||||
|
storeStitch(embeddingId)
|
||||||
|
.map(embedding => found(embedding))
|
||||||
|
.handle {
|
||||||
|
case stitch.NotFound => missing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"decider/src/main/scala",
|
||||||
|
"finagle/finagle-memcached",
|
||||||
|
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||||
|
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
|
||||||
|
"src/scala/com/twitter/simclusters_v2/common",
|
||||||
|
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,153 @@
|
||||||
|
package com.twitter.representation_manager.common
|
||||||
|
|
||||||
|
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||||
|
import com.twitter.conversions.DurationOps._
|
||||||
|
import com.twitter.finagle.memcached.Client
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.hashing.KeyHasher
|
||||||
|
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||||
|
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||||
|
import com.twitter.storehaus.ReadableStore
|
||||||
|
import com.twitter.util.Duration
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE - ALL the cache configs here are just placeholders, NONE of them is used anyweher in RMS yet
|
||||||
|
* */
|
||||||
|
sealed trait MemCacheParams
|
||||||
|
sealed trait MemCacheConfig
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This holds params that is required to set up a memcache cache for a single embedding store
|
||||||
|
* */
|
||||||
|
case class EnabledMemCacheParams(ttl: Duration) extends MemCacheParams
|
||||||
|
object DisabledMemCacheParams extends MemCacheParams
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We use this MemcacheConfig as the single source to set up the memcache for all RMS use cases
|
||||||
|
* NO OVERRIDE FROM CLIENT
|
||||||
|
* */
|
||||||
|
object MemCacheConfig {
|
||||||
|
val keyHasher: KeyHasher = KeyHasher.FNV1A_64
|
||||||
|
val hashKeyPrefix: String = "RMS"
|
||||||
|
val simclustersEmbeddingCacheKeyBuilder =
|
||||||
|
SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, hashKeyPrefix)
|
||||||
|
|
||||||
|
val cacheParamsMap: Map[
|
||||||
|
(EmbeddingType, ModelVersion),
|
||||||
|
MemCacheParams
|
||||||
|
] = Map(
|
||||||
|
// Tweet Embeddings
|
||||||
|
(LogFavBasedTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||||
|
(LogFavBasedTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||||
|
(LogFavLongestL2EmbeddingTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||||
|
(LogFavLongestL2EmbeddingTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes),
|
||||||
|
// User - KnownFor Embeddings
|
||||||
|
(FavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FollowBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(AggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl =
|
||||||
|
12.hours),
|
||||||
|
(RelaxedAggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl =
|
||||||
|
12.hours),
|
||||||
|
// User - InterestedIn Embeddings
|
||||||
|
(LogFavBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FollowBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FavBasedUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FollowBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(LogFavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FavBasedUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(FilteredUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(UnfilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(UnfilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(UserNextInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl =
|
||||||
|
30.minutes), //embedding is updated every 2 hours, keeping it lower to avoid staleness
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
// Topic Embeddings
|
||||||
|
(FavTfgTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
(LogFavBasedKgoApeTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
|
||||||
|
)
|
||||||
|
|
||||||
|
def getCacheSetup(
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion
|
||||||
|
): MemCacheParams = {
|
||||||
|
// When requested (embeddingType, modelVersion) doesn't exist, we return DisabledMemCacheParams
|
||||||
|
cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledMemCacheParams)
|
||||||
|
}
|
||||||
|
|
||||||
|
def getCacheKeyPrefix(embeddingType: EmbeddingType, modelVersion: ModelVersion) =
|
||||||
|
s"${embeddingType.value}_${modelVersion.value}_"
|
||||||
|
|
||||||
|
def getStatsName(embeddingType: EmbeddingType, modelVersion: ModelVersion) =
|
||||||
|
s"${embeddingType.name}_${modelVersion.name}_mem_cache"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a ReadableStore based on MemCacheConfig.
|
||||||
|
*
|
||||||
|
* If memcache is disabled, it will return a normal readable store wrapper of the rawStore,
|
||||||
|
* with SimClustersEmbedding as value;
|
||||||
|
* If memcache is enabled, it will return a ObservedMemcachedReadableStore wrapper of the rawStore,
|
||||||
|
* with memcache set up according to the EnabledMemCacheParams
|
||||||
|
* */
|
||||||
|
def buildMemCacheStoreForSimClustersEmbedding(
|
||||||
|
rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding],
|
||||||
|
cacheClient: Client,
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion,
|
||||||
|
stats: StatsReceiver
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val cacheParams = getCacheSetup(embeddingType, modelVersion)
|
||||||
|
val store = cacheParams match {
|
||||||
|
case DisabledMemCacheParams => rawStore
|
||||||
|
case EnabledMemCacheParams(ttl) =>
|
||||||
|
val memCacheKeyPrefix = MemCacheConfig.getCacheKeyPrefix(
|
||||||
|
embeddingType,
|
||||||
|
modelVersion
|
||||||
|
)
|
||||||
|
val statsName = MemCacheConfig.getStatsName(
|
||||||
|
embeddingType,
|
||||||
|
modelVersion
|
||||||
|
)
|
||||||
|
ObservedMemcachedReadableStore.fromCacheClient(
|
||||||
|
backingStore = rawStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = ttl
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = stats.scope(statsName),
|
||||||
|
keyToString = { k => memCacheKeyPrefix + k.toString }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
store.mapValues(SimClustersEmbedding(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
package com.twitter.representation_manager.common
|
||||||
|
|
||||||
|
import com.twitter.decider.Decider
|
||||||
|
import com.twitter.decider.RandomRecipient
|
||||||
|
import com.twitter.decider.Recipient
|
||||||
|
import com.twitter.simclusters_v2.common.DeciderGateBuilderWithIdHashing
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
case class RepresentationManagerDecider @Inject() (decider: Decider) {
|
||||||
|
|
||||||
|
val deciderGateBuilder = new DeciderGateBuilderWithIdHashing(decider)
|
||||||
|
|
||||||
|
def isAvailable(feature: String, recipient: Option[Recipient]): Boolean = {
|
||||||
|
decider.isAvailable(feature, recipient)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When useRandomRecipient is set to false, the decider is either completely on or off.
|
||||||
|
* When useRandomRecipient is set to true, the decider is on for the specified % of traffic.
|
||||||
|
*/
|
||||||
|
def isAvailable(feature: String, useRandomRecipient: Boolean = true): Boolean = {
|
||||||
|
if (useRandomRecipient) isAvailable(feature, Some(RandomRecipient))
|
||||||
|
else isAvailable(feature, None)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps",
|
||||||
|
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||||
|
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||||
|
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||||
|
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
|
||||||
|
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
|
||||||
|
"src/scala/com/twitter/ml/api/embedding",
|
||||||
|
"src/scala/com/twitter/simclusters_v2/common",
|
||||||
|
"src/scala/com/twitter/simclusters_v2/score",
|
||||||
|
"src/scala/com/twitter/simclusters_v2/summingbird/stores",
|
||||||
|
"src/scala/com/twitter/storehaus_internal/manhattan",
|
||||||
|
"src/scala/com/twitter/storehaus_internal/util",
|
||||||
|
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||||
|
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||||
|
"storage/clients/manhattan/client/src/main/scala",
|
||||||
|
"tweetypie/src/scala/com/twitter/tweetypie/util",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,846 @@
|
||||||
|
package com.twitter.representation_manager.migration
|
||||||
|
|
||||||
|
import com.twitter.bijection.Injection
|
||||||
|
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||||
|
import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore
|
||||||
|
import com.twitter.contentrecommender.store.InterestsOptOutStore
|
||||||
|
import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore
|
||||||
|
import com.twitter.contentrecommender.twistly
|
||||||
|
import com.twitter.conversions.DurationOps._
|
||||||
|
import com.twitter.decider.Decider
|
||||||
|
import com.twitter.escherbird.util.uttclient.CacheConfigV2
|
||||||
|
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
|
||||||
|
import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2
|
||||||
|
import com.twitter.escherbird.utt.strato.thriftscala.Environment
|
||||||
|
import com.twitter.finagle.ThriftMux
|
||||||
|
import com.twitter.finagle.memcached.Client
|
||||||
|
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||||
|
import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax
|
||||||
|
import com.twitter.finagle.mux.ClientDiscardedRequestException
|
||||||
|
import com.twitter.finagle.service.ReqRep
|
||||||
|
import com.twitter.finagle.service.ResponseClass
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.finagle.thrift.ClientId
|
||||||
|
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||||
|
import com.twitter.frigate.common.util.SeqLongInjection
|
||||||
|
import com.twitter.hashing.KeyHasher
|
||||||
|
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||||
|
import com.twitter.interests.thriftscala.InterestsThriftService
|
||||||
|
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||||
|
import com.twitter.relevance_platform.common.readablestore.ReadableStoreWithTimeout
|
||||||
|
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||||
|
import com.twitter.representation_manager.store.DeciderConstants
|
||||||
|
import com.twitter.representation_manager.store.DeciderKey
|
||||||
|
import com.twitter.simclusters_v2.common.ModelVersions
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder
|
||||||
|
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145k2020
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145kUpdated
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbedding
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||||
|
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||||
|
import com.twitter.storehaus.ReadableStore
|
||||||
|
import com.twitter.storehaus_internal.manhattan.Athena
|
||||||
|
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||||
|
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||||
|
import com.twitter.storehaus_internal.util.ApplicationID
|
||||||
|
import com.twitter.storehaus_internal.util.DatasetName
|
||||||
|
import com.twitter.storehaus_internal.util.HDFSPath
|
||||||
|
import com.twitter.strato.client.Strato
|
||||||
|
import com.twitter.strato.client.{Client => StratoClient}
|
||||||
|
import com.twitter.strato.thrift.ScroogeConvImplicits._
|
||||||
|
import com.twitter.tweetypie.util.UserId
|
||||||
|
import com.twitter.util.Duration
|
||||||
|
import com.twitter.util.Future
|
||||||
|
import com.twitter.util.Throw
|
||||||
|
import com.twitter.util.Timer
|
||||||
|
import javax.inject.Inject
|
||||||
|
import javax.inject.Named
|
||||||
|
import scala.reflect.ClassTag
|
||||||
|
|
||||||
|
class LegacyRMS @Inject() (
|
||||||
|
serviceIdentifier: ServiceIdentifier,
|
||||||
|
cacheClient: Client,
|
||||||
|
stats: StatsReceiver,
|
||||||
|
decider: Decider,
|
||||||
|
clientId: ClientId,
|
||||||
|
timer: Timer,
|
||||||
|
@Named("cacheHashKeyPrefix") val cacheHashKeyPrefix: String = "RMS",
|
||||||
|
@Named("useContentRecommenderConfiguration") val useContentRecommenderConfiguration: Boolean =
|
||||||
|
false) {
|
||||||
|
|
||||||
|
private val mhMtlsParams: ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams(
|
||||||
|
serviceIdentifier)
|
||||||
|
private val rmsDecider = RepresentationManagerDecider(decider)
|
||||||
|
val keyHasher: KeyHasher = KeyHasher.FNV1A_64
|
||||||
|
|
||||||
|
private val embeddingCacheKeyBuilder =
|
||||||
|
SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, cacheHashKeyPrefix)
|
||||||
|
private val statsReceiver = stats.scope("representation_management")
|
||||||
|
|
||||||
|
// Strato client, default timeout = 280ms
|
||||||
|
val stratoClient: StratoClient =
|
||||||
|
Strato.client
|
||||||
|
.withMutualTls(serviceIdentifier)
|
||||||
|
.build()
|
||||||
|
|
||||||
|
// Builds ThriftMux client builder for Content-Recommender service
|
||||||
|
private def makeThriftClientBuilder(
|
||||||
|
requestTimeout: Duration
|
||||||
|
): ThriftMux.Client = {
|
||||||
|
ThriftMux.client
|
||||||
|
.withClientId(clientId)
|
||||||
|
.withMutualTls(serviceIdentifier)
|
||||||
|
.withRequestTimeout(requestTimeout)
|
||||||
|
.withStatsReceiver(statsReceiver.scope("clnt"))
|
||||||
|
.withResponseClassifier {
|
||||||
|
case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private def makeThriftClient[ThriftServiceType: ClassTag](
|
||||||
|
dest: String,
|
||||||
|
label: String,
|
||||||
|
requestTimeout: Duration = 450.milliseconds
|
||||||
|
): ThriftServiceType = {
|
||||||
|
makeThriftClientBuilder(requestTimeout)
|
||||||
|
.build[ThriftServiceType](dest, label)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** *** SimCluster Embedding Stores ******/
|
||||||
|
implicit val simClustersEmbeddingIdInjection: Injection[SimClustersEmbeddingId, Array[Byte]] =
|
||||||
|
BinaryScalaCodec(SimClustersEmbeddingId)
|
||||||
|
implicit val simClustersEmbeddingInjection: Injection[ThriftSimClustersEmbedding, Array[Byte]] =
|
||||||
|
BinaryScalaCodec(ThriftSimClustersEmbedding)
|
||||||
|
implicit val simClustersMultiEmbeddingInjection: Injection[SimClustersMultiEmbedding, Array[
|
||||||
|
Byte
|
||||||
|
]] =
|
||||||
|
BinaryScalaCodec(SimClustersMultiEmbedding)
|
||||||
|
implicit val simClustersMultiEmbeddingIdInjection: Injection[SimClustersMultiEmbeddingId, Array[
|
||||||
|
Byte
|
||||||
|
]] =
|
||||||
|
BinaryScalaCodec(SimClustersMultiEmbeddingId)
|
||||||
|
|
||||||
|
def getEmbeddingsDataset(
|
||||||
|
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||||
|
datasetName: String
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = {
|
||||||
|
ManhattanRO.getReadableStoreWithMtls[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
ManhattanROConfig(
|
||||||
|
HDFSPath(""), // not needed
|
||||||
|
ApplicationID("content_recommender_athena"),
|
||||||
|
DatasetName(datasetName), // this should be correct
|
||||||
|
Athena
|
||||||
|
),
|
||||||
|
mhMtlsParams
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
PersistentTweetEmbeddingStore
|
||||||
|
.longestL2NormTweetEmbeddingStoreManhattan(
|
||||||
|
mhMtlsParams,
|
||||||
|
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||||
|
statsReceiver,
|
||||||
|
maxLength = 10,
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||||
|
backingStore = rawStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 15.minutes
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver =
|
||||||
|
statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_mem_cache"),
|
||||||
|
keyToString = { k =>
|
||||||
|
s"scez_l2:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||||
|
memcachedStore
|
||||||
|
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(
|
||||||
|
LogFavLongestL2EmbeddingTweet,
|
||||||
|
Model20m145k2020,
|
||||||
|
InternalId.TweetId(tweetId)) =>
|
||||||
|
tweetId
|
||||||
|
}
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
inMemoryCacheStore,
|
||||||
|
ttl = 12.minute,
|
||||||
|
maxKeys = 1048575,
|
||||||
|
cacheName = "log_fav_based_longest_l2_tweet_embedding_20m145k2020_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
PersistentTweetEmbeddingStore
|
||||||
|
.mostRecentTweetEmbeddingStoreManhattan(
|
||||||
|
mhMtlsParams,
|
||||||
|
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
|
||||||
|
statsReceiver
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||||
|
backingStore = rawStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 10.minutes
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_mem_cache"),
|
||||||
|
keyToString = { k =>
|
||||||
|
// SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId
|
||||||
|
s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145KUpdated}_$k"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
memcachedStore
|
||||||
|
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(
|
||||||
|
LogFavBasedTweet,
|
||||||
|
Model20m145kUpdated,
|
||||||
|
InternalId.TweetId(tweetId)) =>
|
||||||
|
tweetId
|
||||||
|
}
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
inMemoryCacheStore,
|
||||||
|
ttl = 5.minute,
|
||||||
|
maxKeys = 1048575, // 200MB
|
||||||
|
cacheName = "log_fav_based_tweet_embedding_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("log_fav_based_tweet_embedding_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
PersistentTweetEmbeddingStore
|
||||||
|
.mostRecentTweetEmbeddingStoreManhattan(
|
||||||
|
mhMtlsParams,
|
||||||
|
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||||
|
statsReceiver,
|
||||||
|
maxLength = 10,
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||||
|
backingStore = rawStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 15.minutes
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_mem_cache"),
|
||||||
|
keyToString = { k =>
|
||||||
|
// SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId
|
||||||
|
s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||||
|
memcachedStore
|
||||||
|
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(
|
||||||
|
LogFavBasedTweet,
|
||||||
|
Model20m145k2020,
|
||||||
|
InternalId.TweetId(tweetId)) =>
|
||||||
|
tweetId
|
||||||
|
}
|
||||||
|
.mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
inMemoryCacheStore,
|
||||||
|
ttl = 12.minute,
|
||||||
|
maxKeys = 16777215,
|
||||||
|
cacheName = "log_fav_based_tweet_embedding_20m145k2020_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val favBasedTfgTopicEmbedding2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val stratoStore =
|
||||||
|
StratoFetchableStore
|
||||||
|
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
"recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020")
|
||||||
|
|
||||||
|
val truncatedStore = stratoStore.mapValues { embedding =>
|
||||||
|
SimClustersEmbedding(embedding, truncate = 50)
|
||||||
|
}
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from(
|
||||||
|
ObservedReadableStore(truncatedStore)(
|
||||||
|
statsReceiver.scope("fav_tfg_topic_embedding_2020_cache_backing_store")),
|
||||||
|
ttl = 12.hours,
|
||||||
|
maxKeys = 262143, // 200MB
|
||||||
|
cacheName = "fav_tfg_topic_embedding_2020_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("fav_tfg_topic_embedding_2020_cache"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
ObservedReadableStore(
|
||||||
|
StratoFetchableStore
|
||||||
|
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
|
||||||
|
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(
|
||||||
|
AggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020,
|
||||||
|
internalId) =>
|
||||||
|
SimClustersEmbeddingId(AggregatableLogFavBasedProducer, Model20m145k2020, internalId)
|
||||||
|
}
|
||||||
|
.mapValues(embedding => SimClustersEmbedding(embedding, 50))
|
||||||
|
)(statsReceiver.scope("aggregatable_producer_embeddings_by_logfav_score_2020"))
|
||||||
|
}
|
||||||
|
|
||||||
|
val interestService: InterestsThriftService.MethodPerEndpoint =
|
||||||
|
makeThriftClient[InterestsThriftService.MethodPerEndpoint](
|
||||||
|
"/s/interests-thrift-service/interests-thrift-service",
|
||||||
|
"interests_thrift_service"
|
||||||
|
)
|
||||||
|
|
||||||
|
val interestsOptOutStore: InterestsOptOutStore = InterestsOptOutStore(interestService)
|
||||||
|
|
||||||
|
// Save 2 ^ 18 UTTs. Promising 100% cache rate
|
||||||
|
lazy val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143)
|
||||||
|
lazy val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2(
|
||||||
|
getTaxonomyConfig = defaultCacheConfigV2,
|
||||||
|
getUttTaxonomyConfig = defaultCacheConfigV2,
|
||||||
|
getLeafIds = defaultCacheConfigV2,
|
||||||
|
getLeafUttEntities = defaultCacheConfigV2
|
||||||
|
)
|
||||||
|
|
||||||
|
// CachedUttClient to use StratoClient
|
||||||
|
lazy val cachedUttClientV2: CachedUttClientV2 = new CachedUttClientV2(
|
||||||
|
stratoClient = stratoClient,
|
||||||
|
env = Environment.Prod,
|
||||||
|
cacheConfigs = uttClientCacheConfigsV2,
|
||||||
|
statsReceiver = statsReceiver.scope("cached_utt_client")
|
||||||
|
)
|
||||||
|
|
||||||
|
lazy val semanticCoreTopicSeedStore: ReadableStore[
|
||||||
|
SemanticCoreTopicSeedStore.Key,
|
||||||
|
Seq[UserId]
|
||||||
|
] = {
|
||||||
|
/*
|
||||||
|
Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case)
|
||||||
|
Assume ~10k active topic/languages ~= 650MB (worst case)
|
||||||
|
*/
|
||||||
|
val underlying = new SemanticCoreTopicSeedStore(cachedUttClientV2, interestsOptOutStore)(
|
||||||
|
statsReceiver.scope("semantic_core_topic_seed_store"))
|
||||||
|
|
||||||
|
val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||||
|
backingStore = underlying,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = SeqLongInjection,
|
||||||
|
statsReceiver = statsReceiver.scope("topic_producer_seed_store_mem_cache"),
|
||||||
|
keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" }
|
||||||
|
)
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]](
|
||||||
|
store = memcacheStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 20e3.toInt,
|
||||||
|
cacheName = "topic_producer_seed_store_cache",
|
||||||
|
windowSize = 5000
|
||||||
|
)(statsReceiver.scope("topic_producer_seed_store_cache"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val logFavBasedApeEntity20M145K2020EmbeddingStore: ApeEntityEmbeddingStore = {
|
||||||
|
val apeStore = logFavBasedApe20M145K2020EmbeddingStore.composeKeyMapping[UserId]({ id =>
|
||||||
|
SimClustersEmbeddingId(
|
||||||
|
AggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020,
|
||||||
|
InternalId.UserId(id))
|
||||||
|
})
|
||||||
|
|
||||||
|
new ApeEntityEmbeddingStore(
|
||||||
|
semanticCoreSeedStore = semanticCoreTopicSeedStore,
|
||||||
|
aggregatableProducerEmbeddingStore = apeStore,
|
||||||
|
statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val logFavBasedApeEntity20M145K2020EmbeddingCachedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val truncatedStore =
|
||||||
|
logFavBasedApeEntity20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = truncatedStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_mem_cache"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
val inMemoryCachedStore =
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 262143,
|
||||||
|
cacheName = "log_fav_based_ape_entity_2020_embedding_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_cached_store"))
|
||||||
|
|
||||||
|
DeciderableReadableStore(
|
||||||
|
inMemoryCachedStore,
|
||||||
|
rmsDecider.deciderGateBuilder.idGateWithHashing[SimClustersEmbeddingId](
|
||||||
|
DeciderKey.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore),
|
||||||
|
statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_deciderable_store")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
ObservedReadableStore(
|
||||||
|
StratoFetchableStore
|
||||||
|
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
"recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020")
|
||||||
|
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020,
|
||||||
|
internalId) =>
|
||||||
|
SimClustersEmbeddingId(
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020,
|
||||||
|
internalId)
|
||||||
|
}
|
||||||
|
.mapValues(embedding => SimClustersEmbedding(embedding).truncate(50))
|
||||||
|
)(statsReceiver.scope(
|
||||||
|
"aggregatable_producer_embeddings_by_logfav_score_relaxed_fav_engagement_threshold_2020"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val truncatedStore =
|
||||||
|
relaxedLogFavBasedApe20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = truncatedStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver =
|
||||||
|
statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_mem_cache"),
|
||||||
|
keyToString = { k: SimClustersEmbeddingId => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 262143,
|
||||||
|
cacheName = "relaxed_log_fav_based_ape_entity_2020_embedding_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_cache_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val underlyingStore = ProducerClusterEmbeddingReadableStores
|
||||||
|
.getProducerTopKSimClusters2020EmbeddingsStore(
|
||||||
|
mhMtlsParams
|
||||||
|
).composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(
|
||||||
|
FavBasedProducer,
|
||||||
|
Model20m145k2020,
|
||||||
|
InternalId.UserId(userId)) =>
|
||||||
|
userId
|
||||||
|
}.mapValues { topSimClustersWithScore =>
|
||||||
|
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters.take(10))
|
||||||
|
}
|
||||||
|
|
||||||
|
// same memcache config as for favBasedUserInterestedIn20M145K2020Store
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = underlyingStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 24.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_mem_cache"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 12.hours,
|
||||||
|
maxKeys = 16777215,
|
||||||
|
cacheName = "fav_based_producer_embedding_20M_145K_2020_embedding_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_embedding_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val interestedIn20M145KUpdatedStore: ReadableStore[UserId, ClustersUserIsInterestedIn] = {
|
||||||
|
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
modelVersion = ModelVersions.Model20M145KUpdated
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val interestedIn20M145K2020Store: ReadableStore[UserId, ClustersUserIsInterestedIn] = {
|
||||||
|
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
modelVersion = ModelVersions.Model20M145K2020
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val InterestedInFromPE20M145KUpdatedStore: ReadableStore[
|
||||||
|
UserId,
|
||||||
|
ClustersUserIsInterestedIn
|
||||||
|
] = {
|
||||||
|
UserInterestedInReadableStore.defaultIIPEStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
modelVersion = ModelVersions.Model20M145KUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val simClustersInterestedInStore: ReadableStore[
|
||||||
|
(UserId, ModelVersion),
|
||||||
|
ClustersUserIsInterestedIn
|
||||||
|
] = {
|
||||||
|
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||||
|
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||||
|
k match {
|
||||||
|
case (userId, Model20m145kUpdated) =>
|
||||||
|
interestedIn20M145KUpdatedStore.get(userId)
|
||||||
|
case (userId, Model20m145k2020) =>
|
||||||
|
interestedIn20M145K2020Store.get(userId)
|
||||||
|
case _ =>
|
||||||
|
Future.None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[
|
||||||
|
(UserId, ModelVersion),
|
||||||
|
ClustersUserIsInterestedIn
|
||||||
|
] = {
|
||||||
|
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||||
|
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||||
|
k match {
|
||||||
|
case (userId, ModelVersion.Model20m145kUpdated) =>
|
||||||
|
InterestedInFromPE20M145KUpdatedStore.get(userId)
|
||||||
|
case _ =>
|
||||||
|
Future.None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val userInterestedInStore =
|
||||||
|
new twistly.interestedin.EmbeddingStore(
|
||||||
|
interestedInStore = simClustersInterestedInStore,
|
||||||
|
interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore,
|
||||||
|
statsReceiver = statsReceiver
|
||||||
|
)
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val underlyingStore =
|
||||||
|
UserInterestedInReadableStore
|
||||||
|
.defaultSimClustersEmbeddingStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
EmbeddingType.FavBasedUserInterestedIn,
|
||||||
|
ModelVersion.Model20m145kUpdated)
|
||||||
|
.mapValues(_.toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = underlyingStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_mem_cache"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 262143,
|
||||||
|
cacheName = "fav_based_user_interested_in_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("fav_based_user_interested_in_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val LogFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val underlyingStore =
|
||||||
|
UserInterestedInReadableStore
|
||||||
|
.defaultIIAPESimClustersEmbeddingStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
EmbeddingType.LogFavBasedUserInterestedInFromAPE,
|
||||||
|
ModelVersion.Model20m145k2020)
|
||||||
|
.mapValues(_.toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = underlyingStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_from_ape_mem_cache"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 262143,
|
||||||
|
cacheName = "log_fav_based_user_interested_in_from_ape_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("log_fav_based_user_interested_in_from_ape_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val FollowBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val underlyingStore =
|
||||||
|
UserInterestedInReadableStore
|
||||||
|
.defaultIIAPESimClustersEmbeddingStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
EmbeddingType.FollowBasedUserInterestedInFromAPE,
|
||||||
|
ModelVersion.Model20m145k2020)
|
||||||
|
.mapValues(_.toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = underlyingStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("follow_based_user_interested_in_from_ape_mem_cache"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 262143,
|
||||||
|
cacheName = "follow_based_user_interested_in_from_ape_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("follow_based_user_interested_in_from_ape_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// production
|
||||||
|
lazy val favBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val underlyingStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] =
|
||||||
|
UserInterestedInReadableStore
|
||||||
|
.defaultSimClustersEmbeddingStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
EmbeddingType.FavBasedUserInterestedIn,
|
||||||
|
ModelVersion.Model20m145k2020).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = underlyingStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_2020_mem_cache"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val underlyingStore =
|
||||||
|
UserInterestedInReadableStore
|
||||||
|
.defaultSimClustersEmbeddingStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
EmbeddingType.LogFavBasedUserInterestedIn,
|
||||||
|
ModelVersion.Model20m145k2020)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = underlyingStore.mapValues(_.toThrift),
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_2020_store"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 262143,
|
||||||
|
cacheName = "log_fav_based_user_interested_in_2020_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("log_fav_based_user_interested_in_2020_store"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Production
|
||||||
|
lazy val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val underlyingStore =
|
||||||
|
UserInterestedInReadableStore
|
||||||
|
.defaultIIPESimClustersEmbeddingStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
EmbeddingType.FavBasedUserInterestedInFromPE,
|
||||||
|
ModelVersion.Model20m145kUpdated)
|
||||||
|
.mapValues(_.toThrift)
|
||||||
|
|
||||||
|
val memcachedStore = ObservedMemcachedReadableStore
|
||||||
|
.fromCacheClient(
|
||||||
|
backingStore = underlyingStore,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours
|
||||||
|
)(
|
||||||
|
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
|
||||||
|
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_from_pe_mem_cache"),
|
||||||
|
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
|
||||||
|
).mapValues(SimClustersEmbedding(_))
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
|
||||||
|
memcachedStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 262143,
|
||||||
|
cacheName = "fav_based_user_interested_in_from_pe_cache",
|
||||||
|
windowSize = 10000L
|
||||||
|
)(statsReceiver.scope("fav_based_user_interested_in_from_pe_cache"))
|
||||||
|
}
|
||||||
|
|
||||||
|
private val underlyingStores: Map[
|
||||||
|
(EmbeddingType, ModelVersion),
|
||||||
|
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||||
|
] = Map(
|
||||||
|
// Tweet Embeddings
|
||||||
|
(LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore,
|
||||||
|
(LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore,
|
||||||
|
(
|
||||||
|
LogFavLongestL2EmbeddingTweet,
|
||||||
|
Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore,
|
||||||
|
// Entity Embeddings
|
||||||
|
(FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding2020Store,
|
||||||
|
(
|
||||||
|
LogFavBasedKgoApeTopic,
|
||||||
|
Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingCachedStore,
|
||||||
|
// KnownFor Embeddings
|
||||||
|
(FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore,
|
||||||
|
(
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore,
|
||||||
|
// InterestedIn Embeddings
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedInFromAPE,
|
||||||
|
Model20m145k2020) -> LogFavBasedInterestedInFromAPE20M145K2020Store,
|
||||||
|
(
|
||||||
|
FollowBasedUserInterestedInFromAPE,
|
||||||
|
Model20m145k2020) -> FollowBasedInterestedInFromAPE20M145K2020Store,
|
||||||
|
(FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore,
|
||||||
|
(FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store,
|
||||||
|
(LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store,
|
||||||
|
(
|
||||||
|
FavBasedUserInterestedInFromPE,
|
||||||
|
Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore,
|
||||||
|
(FilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore,
|
||||||
|
(FilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore,
|
||||||
|
(FilteredUserInterestedInFromPE, Model20m145kUpdated) -> userInterestedInStore,
|
||||||
|
(UnfilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore,
|
||||||
|
(UnfilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore,
|
||||||
|
)
|
||||||
|
|
||||||
|
val simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val underlying: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||||
|
SimClustersEmbeddingStore.buildWithDecider(
|
||||||
|
underlyingStores = underlyingStores,
|
||||||
|
decider = rmsDecider.decider,
|
||||||
|
statsReceiver = statsReceiver.scope("simClusters_embeddings_store_deciderable")
|
||||||
|
)
|
||||||
|
|
||||||
|
val underlyingWithTimeout: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
|
||||||
|
new ReadableStoreWithTimeout(
|
||||||
|
rs = underlying,
|
||||||
|
decider = rmsDecider.decider,
|
||||||
|
enableTimeoutDeciderKey = DeciderConstants.enableSimClustersEmbeddingStoreTimeouts,
|
||||||
|
timeoutValueKey = DeciderConstants.simClustersEmbeddingStoreTimeoutValueMillis,
|
||||||
|
timer = timer,
|
||||||
|
statsReceiver = statsReceiver.scope("simClusters_embedding_store_timeouts")
|
||||||
|
)
|
||||||
|
|
||||||
|
ObservedReadableStore(
|
||||||
|
store = underlyingWithTimeout
|
||||||
|
)(statsReceiver.scope("simClusters_embeddings_store"))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||||
|
"finagle/finagle-stats",
|
||||||
|
"finatra/inject/inject-core/src/main/scala",
|
||||||
|
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||||
|
"interests-service/thrift/src/main/thrift:thrift-scala",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||||
|
"servo/util",
|
||||||
|
"src/scala/com/twitter/storehaus_internal/manhattan",
|
||||||
|
"src/scala/com/twitter/storehaus_internal/memcache",
|
||||||
|
"src/scala/com/twitter/storehaus_internal/util",
|
||||||
|
"strato/src/main/scala/com/twitter/strato/client",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,34 @@
|
||||||
|
package com.twitter.representation_manager.modules
|
||||||
|
|
||||||
|
import com.google.inject.Provides
|
||||||
|
import com.twitter.finagle.memcached.Client
|
||||||
|
import javax.inject.Singleton
|
||||||
|
import com.twitter.conversions.DurationOps._
|
||||||
|
import com.twitter.inject.TwitterModule
|
||||||
|
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.storehaus_internal.memcache.MemcacheStore
|
||||||
|
import com.twitter.storehaus_internal.util.ClientName
|
||||||
|
import com.twitter.storehaus_internal.util.ZkEndPoint
|
||||||
|
|
||||||
|
object CacheModule extends TwitterModule {
|
||||||
|
|
||||||
|
private val cacheDest = flag[String]("cache_module.dest", "Path to memcache service")
|
||||||
|
private val timeout = flag[Int]("memcache.timeout", "Memcache client timeout")
|
||||||
|
private val retries = flag[Int]("memcache.retries", "Memcache timeout retries")
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
def providesCache(
|
||||||
|
serviceIdentifier: ServiceIdentifier,
|
||||||
|
stats: StatsReceiver
|
||||||
|
): Client =
|
||||||
|
MemcacheStore.memcachedClient(
|
||||||
|
name = ClientName("memcache_representation_manager"),
|
||||||
|
dest = ZkEndPoint(cacheDest()),
|
||||||
|
timeout = timeout().milliseconds,
|
||||||
|
retries = retries(),
|
||||||
|
statsReceiver = stats.scope("cache_client"),
|
||||||
|
serviceIdentifier = serviceIdentifier
|
||||||
|
)
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
package com.twitter.representation_manager.modules
|
||||||
|
|
||||||
|
import com.google.inject.Provides
|
||||||
|
import com.twitter.conversions.DurationOps._
|
||||||
|
import com.twitter.finagle.ThriftMux
|
||||||
|
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||||
|
import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax
|
||||||
|
import com.twitter.finagle.mux.ClientDiscardedRequestException
|
||||||
|
import com.twitter.finagle.service.ReqRep
|
||||||
|
import com.twitter.finagle.service.ResponseClass
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.finagle.thrift.ClientId
|
||||||
|
import com.twitter.inject.TwitterModule
|
||||||
|
import com.twitter.interests.thriftscala.InterestsThriftService
|
||||||
|
import com.twitter.util.Throw
|
||||||
|
import javax.inject.Singleton
|
||||||
|
|
||||||
|
object InterestsThriftClientModule extends TwitterModule {
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
def providesInterestsThriftClient(
|
||||||
|
clientId: ClientId,
|
||||||
|
serviceIdentifier: ServiceIdentifier,
|
||||||
|
statsReceiver: StatsReceiver
|
||||||
|
): InterestsThriftService.MethodPerEndpoint = {
|
||||||
|
ThriftMux.client
|
||||||
|
.withClientId(clientId)
|
||||||
|
.withMutualTls(serviceIdentifier)
|
||||||
|
.withRequestTimeout(450.milliseconds)
|
||||||
|
.withStatsReceiver(statsReceiver.scope("InterestsThriftClient"))
|
||||||
|
.withResponseClassifier {
|
||||||
|
case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable
|
||||||
|
}
|
||||||
|
.build[InterestsThriftService.MethodPerEndpoint](
|
||||||
|
dest = "/s/interests-thrift-service/interests-thrift-service",
|
||||||
|
label = "interests_thrift_service"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
package com.twitter.representation_manager.modules
|
||||||
|
|
||||||
|
import com.google.inject.Provides
|
||||||
|
import com.twitter.inject.TwitterModule
|
||||||
|
import javax.inject.Named
|
||||||
|
import javax.inject.Singleton
|
||||||
|
|
||||||
|
object LegacyRMSConfigModule extends TwitterModule {
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
@Named("cacheHashKeyPrefix")
|
||||||
|
def providesCacheHashKeyPrefix: String = "RMS"
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
@Named("useContentRecommenderConfiguration")
|
||||||
|
def providesUseContentRecommenderConfiguration: Boolean = false
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
package com.twitter.representation_manager.modules
|
||||||
|
|
||||||
|
import com.google.inject.Provides
|
||||||
|
import javax.inject.Singleton
|
||||||
|
import com.twitter.inject.TwitterModule
|
||||||
|
import com.twitter.decider.Decider
|
||||||
|
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||||
|
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||||
|
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||||
|
|
||||||
|
object StoreModule extends TwitterModule {
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
def providesMhMtlsParams(
|
||||||
|
serviceIdentifier: ServiceIdentifier
|
||||||
|
): ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
def providesRmsDecider(
|
||||||
|
decider: Decider
|
||||||
|
): RepresentationManagerDecider = RepresentationManagerDecider(decider)
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
package com.twitter.representation_manager.modules
|
||||||
|
|
||||||
|
import com.google.inject.Provides
|
||||||
|
import com.twitter.finagle.util.DefaultTimer
|
||||||
|
import com.twitter.inject.TwitterModule
|
||||||
|
import com.twitter.util.Timer
|
||||||
|
import javax.inject.Singleton
|
||||||
|
|
||||||
|
object TimerModule extends TwitterModule {
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
def providesTimer: Timer = DefaultTimer
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
package com.twitter.representation_manager.modules
|
||||||
|
|
||||||
|
import com.google.inject.Provides
|
||||||
|
import com.twitter.escherbird.util.uttclient.CacheConfigV2
|
||||||
|
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
|
||||||
|
import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2
|
||||||
|
import com.twitter.escherbird.utt.strato.thriftscala.Environment
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.inject.TwitterModule
|
||||||
|
import com.twitter.strato.client.{Client => StratoClient}
|
||||||
|
import javax.inject.Singleton
|
||||||
|
|
||||||
|
object UttClientModule extends TwitterModule {
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
@Provides
|
||||||
|
def providesUttClient(
|
||||||
|
stratoClient: StratoClient,
|
||||||
|
statsReceiver: StatsReceiver
|
||||||
|
): CachedUttClientV2 = {
|
||||||
|
// Save 2 ^ 18 UTTs. Promising 100% cache rate
|
||||||
|
val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143)
|
||||||
|
|
||||||
|
val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2(
|
||||||
|
getTaxonomyConfig = defaultCacheConfigV2,
|
||||||
|
getUttTaxonomyConfig = defaultCacheConfigV2,
|
||||||
|
getLeafIds = defaultCacheConfigV2,
|
||||||
|
getLeafUttEntities = defaultCacheConfigV2
|
||||||
|
)
|
||||||
|
|
||||||
|
// CachedUttClient to use StratoClient
|
||||||
|
new CachedUttClientV2(
|
||||||
|
stratoClient = stratoClient,
|
||||||
|
env = Environment.Prod,
|
||||||
|
cacheConfigs = uttClientCacheConfigsV2,
|
||||||
|
statsReceiver = statsReceiver.scope("cached_utt_client")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
scala_library(
|
||||||
|
compiler_option_sets = ["fatal_warnings"],
|
||||||
|
platform = "java8",
|
||||||
|
tags = ["bazel-compatible"],
|
||||||
|
dependencies = [
|
||||||
|
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps",
|
||||||
|
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||||
|
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||||
|
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
|
||||||
|
"src/scala/com/twitter/simclusters_v2/stores",
|
||||||
|
"src/scala/com/twitter/simclusters_v2/summingbird/stores",
|
||||||
|
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||||
|
"storage/clients/manhattan/client/src/main/scala",
|
||||||
|
"tweetypie/src/scala/com/twitter/tweetypie/util",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,39 @@
|
||||||
|
package com.twitter.representation_manager.store
|
||||||
|
|
||||||
|
import com.twitter.servo.decider.DeciderKeyEnum
|
||||||
|
|
||||||
|
object DeciderConstants {
|
||||||
|
// Deciders inherited from CR and RSX and only used in LegacyRMS
|
||||||
|
// Their value are manipulated by CR and RSX's yml file and their decider dashboard
|
||||||
|
// We will remove them after migration completed
|
||||||
|
val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore =
|
||||||
|
"enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore"
|
||||||
|
|
||||||
|
val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore =
|
||||||
|
"enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore"
|
||||||
|
|
||||||
|
val enablelogFavBased20M145K2020TweetEmbeddingStoreTimeouts =
|
||||||
|
"enable_log_fav_based_tweet_embedding_20m145k2020_timeouts"
|
||||||
|
val logFavBased20M145K2020TweetEmbeddingStoreTimeoutValueMillis =
|
||||||
|
"log_fav_based_tweet_embedding_20m145k2020_timeout_value_millis"
|
||||||
|
|
||||||
|
val enablelogFavBased20M145KUpdatedTweetEmbeddingStoreTimeouts =
|
||||||
|
"enable_log_fav_based_tweet_embedding_20m145kUpdated_timeouts"
|
||||||
|
val logFavBased20M145KUpdatedTweetEmbeddingStoreTimeoutValueMillis =
|
||||||
|
"log_fav_based_tweet_embedding_20m145kUpdated_timeout_value_millis"
|
||||||
|
|
||||||
|
val enableSimClustersEmbeddingStoreTimeouts = "enable_sim_clusters_embedding_store_timeouts"
|
||||||
|
val simClustersEmbeddingStoreTimeoutValueMillis =
|
||||||
|
"sim_clusters_embedding_store_timeout_value_millis"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Necessary for using servo Gates
|
||||||
|
object DeciderKey extends DeciderKeyEnum {
|
||||||
|
val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore: Value = Value(
|
||||||
|
DeciderConstants.enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore
|
||||||
|
)
|
||||||
|
|
||||||
|
val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore: Value = Value(
|
||||||
|
DeciderConstants.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore
|
||||||
|
)
|
||||||
|
}
|
|
@ -0,0 +1,198 @@
|
||||||
|
package com.twitter.representation_manager.store
|
||||||
|
|
||||||
|
import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore
|
||||||
|
import com.twitter.contentrecommender.store.InterestsOptOutStore
|
||||||
|
import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore
|
||||||
|
import com.twitter.conversions.DurationOps._
|
||||||
|
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
|
||||||
|
import com.twitter.finagle.memcached.Client
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||||
|
import com.twitter.frigate.common.util.SeqLongInjection
|
||||||
|
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||||
|
import com.twitter.interests.thriftscala.InterestsThriftService
|
||||||
|
import com.twitter.representation_manager.common.MemCacheConfig
|
||||||
|
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||||
|
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||||
|
import com.twitter.storehaus.ReadableStore
|
||||||
|
import com.twitter.strato.client.{Client => StratoClient}
|
||||||
|
import com.twitter.tweetypie.util.UserId
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
class TopicSimClustersEmbeddingStore @Inject() (
|
||||||
|
stratoClient: StratoClient,
|
||||||
|
cacheClient: Client,
|
||||||
|
globalStats: StatsReceiver,
|
||||||
|
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||||
|
rmsDecider: RepresentationManagerDecider,
|
||||||
|
interestService: InterestsThriftService.MethodPerEndpoint,
|
||||||
|
uttClient: CachedUttClientV2) {
|
||||||
|
|
||||||
|
private val stats = globalStats.scope(this.getClass.getSimpleName)
|
||||||
|
private val interestsOptOutStore = InterestsOptOutStore(interestService)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note this is NOT an embedding store. It is a list of author account ids we use to represent
|
||||||
|
* topics
|
||||||
|
*/
|
||||||
|
private val semanticCoreTopicSeedStore: ReadableStore[
|
||||||
|
SemanticCoreTopicSeedStore.Key,
|
||||||
|
Seq[UserId]
|
||||||
|
] = {
|
||||||
|
/*
|
||||||
|
Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case)
|
||||||
|
Assume ~10k active topic/languages ~= 650MB (worst case)
|
||||||
|
*/
|
||||||
|
val underlying = new SemanticCoreTopicSeedStore(uttClient, interestsOptOutStore)(
|
||||||
|
stats.scope("semantic_core_topic_seed_store"))
|
||||||
|
|
||||||
|
val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||||
|
backingStore = underlying,
|
||||||
|
cacheClient = cacheClient,
|
||||||
|
ttl = 12.hours)(
|
||||||
|
valueInjection = SeqLongInjection,
|
||||||
|
statsReceiver = stats.scope("topic_producer_seed_store_mem_cache"),
|
||||||
|
keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" }
|
||||||
|
)
|
||||||
|
|
||||||
|
ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]](
|
||||||
|
store = memcacheStore,
|
||||||
|
ttl = 6.hours,
|
||||||
|
maxKeys = 20e3.toInt,
|
||||||
|
cacheName = "topic_producer_seed_store_cache",
|
||||||
|
windowSize = 5000
|
||||||
|
)(stats.scope("topic_producer_seed_store_cache"))
|
||||||
|
}
|
||||||
|
|
||||||
|
private val favBasedTfgTopicEmbedding20m145k2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
StratoFetchableStore
|
||||||
|
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
"recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020").mapValues(
|
||||||
|
embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
|
||||||
|
.composeKeyMapping[LocaleEntityId] { localeEntityId =>
|
||||||
|
SimClustersEmbeddingId(
|
||||||
|
FavTfgTopic,
|
||||||
|
Model20m145k2020,
|
||||||
|
InternalId.LocaleEntityId(localeEntityId))
|
||||||
|
}
|
||||||
|
|
||||||
|
buildLocaleEntityIdMemCacheStore(rawStore, FavTfgTopic, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedApeEntity20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val apeStore = StratoFetchableStore
|
||||||
|
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
|
||||||
|
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50))
|
||||||
|
.composeKeyMapping[UserId]({ id =>
|
||||||
|
SimClustersEmbeddingId(
|
||||||
|
AggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020,
|
||||||
|
InternalId.UserId(id))
|
||||||
|
})
|
||||||
|
val rawStore = new ApeEntityEmbeddingStore(
|
||||||
|
semanticCoreSeedStore = semanticCoreTopicSeedStore,
|
||||||
|
aggregatableProducerEmbeddingStore = apeStore,
|
||||||
|
statsReceiver = stats.scope("log_fav_based_ape_entity_2020_embedding_store"))
|
||||||
|
.mapValues(embedding => SimClustersEmbedding(embedding.toThrift, truncate = 50).toThrift)
|
||||||
|
.composeKeyMapping[TopicId] { topicId =>
|
||||||
|
SimClustersEmbeddingId(
|
||||||
|
LogFavBasedKgoApeTopic,
|
||||||
|
Model20m145k2020,
|
||||||
|
InternalId.TopicId(topicId))
|
||||||
|
}
|
||||||
|
|
||||||
|
buildTopicIdMemCacheStore(rawStore, LogFavBasedKgoApeTopic, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def buildTopicIdMemCacheStore(
|
||||||
|
rawStore: ReadableStore[TopicId, ThriftSimClustersEmbedding],
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val observedStore: ObservedReadableStore[TopicId, ThriftSimClustersEmbedding] =
|
||||||
|
ObservedReadableStore(
|
||||||
|
store = rawStore
|
||||||
|
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||||
|
|
||||||
|
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
|
||||||
|
topicId
|
||||||
|
}
|
||||||
|
|
||||||
|
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||||
|
storeWithKeyMapping,
|
||||||
|
cacheClient,
|
||||||
|
embeddingType,
|
||||||
|
modelVersion,
|
||||||
|
stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def buildLocaleEntityIdMemCacheStore(
|
||||||
|
rawStore: ReadableStore[LocaleEntityId, ThriftSimClustersEmbedding],
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val observedStore: ObservedReadableStore[LocaleEntityId, ThriftSimClustersEmbedding] =
|
||||||
|
ObservedReadableStore(
|
||||||
|
store = rawStore
|
||||||
|
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||||
|
|
||||||
|
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) =>
|
||||||
|
localeEntityId
|
||||||
|
}
|
||||||
|
|
||||||
|
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||||
|
storeWithKeyMapping,
|
||||||
|
cacheClient,
|
||||||
|
embeddingType,
|
||||||
|
modelVersion,
|
||||||
|
stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val underlyingStores: Map[
|
||||||
|
(EmbeddingType, ModelVersion),
|
||||||
|
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||||
|
] = Map(
|
||||||
|
// Topic Embeddings
|
||||||
|
(FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding20m145k2020Store,
|
||||||
|
(LogFavBasedKgoApeTopic, Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingStore,
|
||||||
|
)
|
||||||
|
|
||||||
|
val topicSimClustersEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
SimClustersEmbeddingStore.buildWithDecider(
|
||||||
|
underlyingStores = underlyingStores,
|
||||||
|
decider = rmsDecider.decider,
|
||||||
|
statsReceiver = stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,141 @@
|
||||||
|
package com.twitter.representation_manager.store
|
||||||
|
|
||||||
|
import com.twitter.finagle.memcached.Client
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||||
|
import com.twitter.representation_manager.common.MemCacheConfig
|
||||||
|
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.common.TweetId
|
||||||
|
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||||
|
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||||
|
import com.twitter.storehaus.ReadableStore
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
class TweetSimClustersEmbeddingStore @Inject() (
|
||||||
|
cacheClient: Client,
|
||||||
|
globalStats: StatsReceiver,
|
||||||
|
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||||
|
rmsDecider: RepresentationManagerDecider) {
|
||||||
|
|
||||||
|
private val stats = globalStats.scope(this.getClass.getSimpleName)
|
||||||
|
|
||||||
|
val logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
PersistentTweetEmbeddingStore
|
||||||
|
.longestL2NormTweetEmbeddingStoreManhattan(
|
||||||
|
mhMtlsParams,
|
||||||
|
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
|
||||||
|
stats
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
PersistentTweetEmbeddingStore
|
||||||
|
.longestL2NormTweetEmbeddingStoreManhattan(
|
||||||
|
mhMtlsParams,
|
||||||
|
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||||
|
stats
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
PersistentTweetEmbeddingStore
|
||||||
|
.mostRecentTweetEmbeddingStoreManhattan(
|
||||||
|
mhMtlsParams,
|
||||||
|
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
|
||||||
|
stats
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
PersistentTweetEmbeddingStore
|
||||||
|
.mostRecentTweetEmbeddingStoreManhattan(
|
||||||
|
mhMtlsParams,
|
||||||
|
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
|
||||||
|
stats
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def buildMemCacheStore(
|
||||||
|
rawStore: ReadableStore[TweetId, ThriftSimClustersEmbedding],
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val observedStore: ObservedReadableStore[TweetId, ThriftSimClustersEmbedding] =
|
||||||
|
ObservedReadableStore(
|
||||||
|
store = rawStore
|
||||||
|
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||||
|
|
||||||
|
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) =>
|
||||||
|
tweetId
|
||||||
|
}
|
||||||
|
|
||||||
|
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||||
|
storeWithKeyMapping,
|
||||||
|
cacheClient,
|
||||||
|
embeddingType,
|
||||||
|
modelVersion,
|
||||||
|
stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val underlyingStores: Map[
|
||||||
|
(EmbeddingType, ModelVersion),
|
||||||
|
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||||
|
] = Map(
|
||||||
|
// Tweet Embeddings
|
||||||
|
(LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore,
|
||||||
|
(LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore,
|
||||||
|
(
|
||||||
|
LogFavLongestL2EmbeddingTweet,
|
||||||
|
Model20m145kUpdated) -> logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore,
|
||||||
|
(
|
||||||
|
LogFavLongestL2EmbeddingTweet,
|
||||||
|
Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore,
|
||||||
|
)
|
||||||
|
|
||||||
|
val tweetSimClustersEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
SimClustersEmbeddingStore.buildWithDecider(
|
||||||
|
underlyingStores = underlyingStores,
|
||||||
|
decider = rmsDecider.decider,
|
||||||
|
statsReceiver = stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,602 @@
|
||||||
|
package com.twitter.representation_manager.store
|
||||||
|
|
||||||
|
import com.twitter.contentrecommender.twistly
|
||||||
|
import com.twitter.finagle.memcached.Client
|
||||||
|
import com.twitter.finagle.stats.StatsReceiver
|
||||||
|
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||||
|
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||||
|
import com.twitter.representation_manager.common.MemCacheConfig
|
||||||
|
import com.twitter.representation_manager.common.RepresentationManagerDecider
|
||||||
|
import com.twitter.simclusters_v2.common.ModelVersions
|
||||||
|
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.getStore
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.modelVersionToDatasetMap
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.knownModelVersions
|
||||||
|
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.toSimClustersEmbedding
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
|
||||||
|
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||||
|
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||||
|
import com.twitter.storehaus.ReadableStore
|
||||||
|
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||||
|
import com.twitter.storehaus_internal.manhattan.ManhattanCluster
|
||||||
|
import com.twitter.strato.client.{Client => StratoClient}
|
||||||
|
import com.twitter.strato.thrift.ScroogeConvImplicits._
|
||||||
|
import com.twitter.tweetypie.util.UserId
|
||||||
|
import com.twitter.util.Future
|
||||||
|
import javax.inject.Inject
|
||||||
|
|
||||||
|
class UserSimClustersEmbeddingStore @Inject() (
|
||||||
|
stratoClient: StratoClient,
|
||||||
|
cacheClient: Client,
|
||||||
|
globalStats: StatsReceiver,
|
||||||
|
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||||
|
rmsDecider: RepresentationManagerDecider) {
|
||||||
|
|
||||||
|
private val stats = globalStats.scope(this.getClass.getSimpleName)
|
||||||
|
|
||||||
|
private val favBasedProducer20M145KUpdatedEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore = ProducerClusterEmbeddingReadableStores
|
||||||
|
.getProducerTopKSimClustersEmbeddingsStore(
|
||||||
|
mhMtlsParams
|
||||||
|
).mapValues { topSimClustersWithScore =>
|
||||||
|
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
|
||||||
|
}.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||||
|
userId
|
||||||
|
}
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, FavBasedProducer, Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore = ProducerClusterEmbeddingReadableStores
|
||||||
|
.getProducerTopKSimClusters2020EmbeddingsStore(
|
||||||
|
mhMtlsParams
|
||||||
|
).mapValues { topSimClustersWithScore =>
|
||||||
|
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
|
||||||
|
}.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||||
|
userId
|
||||||
|
}
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, FavBasedProducer, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val followBasedProducer20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore = ProducerClusterEmbeddingReadableStores
|
||||||
|
.getProducerTopKSimClustersEmbeddingsByFollowStore(
|
||||||
|
mhMtlsParams
|
||||||
|
).mapValues { topSimClustersWithScore =>
|
||||||
|
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
|
||||||
|
}.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
|
||||||
|
userId
|
||||||
|
}
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, FollowBasedProducer, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore = StratoFetchableStore
|
||||||
|
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
|
||||||
|
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, AggregatableLogFavBasedProducer, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
ThriftSimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
StratoFetchableStore
|
||||||
|
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
|
||||||
|
stratoClient,
|
||||||
|
"recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020")
|
||||||
|
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildMemCacheStore(
|
||||||
|
rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore,
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore = rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore
|
||||||
|
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145kUpdated,
|
||||||
|
internalId) =>
|
||||||
|
SimClustersEmbeddingId(
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020,
|
||||||
|
internalId)
|
||||||
|
}
|
||||||
|
|
||||||
|
buildMemCacheStore(rawStore, RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedInFromAPE,
|
||||||
|
Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val followBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls,
|
||||||
|
FollowBasedUserInterestedInFromAPE,
|
||||||
|
Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||||
|
FavBasedUserInterestedIn,
|
||||||
|
Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val favBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||||
|
FavBasedUserInterestedIn,
|
||||||
|
Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val followBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||||
|
FollowBasedUserInterestedIn,
|
||||||
|
Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedIn,
|
||||||
|
Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultIIPESimClustersEmbeddingStoreWithMtls,
|
||||||
|
FavBasedUserInterestedInFromPE,
|
||||||
|
Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val twistlyUserInterestedInStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
ThriftSimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val interestedIn20M145KUpdatedStore = {
|
||||||
|
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
modelVersion = ModelVersions.Model20M145KUpdated
|
||||||
|
)
|
||||||
|
}
|
||||||
|
val interestedIn20M145K2020Store = {
|
||||||
|
UserInterestedInReadableStore.defaultStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
modelVersion = ModelVersions.Model20M145K2020
|
||||||
|
)
|
||||||
|
}
|
||||||
|
val interestedInFromPE20M145KUpdatedStore = {
|
||||||
|
UserInterestedInReadableStore.defaultIIPEStoreWithMtls(
|
||||||
|
mhMtlsParams,
|
||||||
|
modelVersion = ModelVersions.Model20M145KUpdated)
|
||||||
|
}
|
||||||
|
val simClustersInterestedInStore: ReadableStore[
|
||||||
|
(UserId, ModelVersion),
|
||||||
|
ClustersUserIsInterestedIn
|
||||||
|
] = {
|
||||||
|
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||||
|
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||||
|
k match {
|
||||||
|
case (userId, Model20m145kUpdated) =>
|
||||||
|
interestedIn20M145KUpdatedStore.get(userId)
|
||||||
|
case (userId, Model20m145k2020) =>
|
||||||
|
interestedIn20M145K2020Store.get(userId)
|
||||||
|
case _ =>
|
||||||
|
Future.None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[
|
||||||
|
(UserId, ModelVersion),
|
||||||
|
ClustersUserIsInterestedIn
|
||||||
|
] = {
|
||||||
|
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
|
||||||
|
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
|
||||||
|
k match {
|
||||||
|
case (userId, ModelVersion.Model20m145kUpdated) =>
|
||||||
|
interestedInFromPE20M145KUpdatedStore.get(userId)
|
||||||
|
case _ =>
|
||||||
|
Future.None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new twistly.interestedin.EmbeddingStore(
|
||||||
|
interestedInStore = simClustersInterestedInStore,
|
||||||
|
interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore,
|
||||||
|
statsReceiver = stats
|
||||||
|
).mapValues(_.toThrift)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val userNextInterestedIn20m145k2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildUserInterestedInStore(
|
||||||
|
UserInterestedInReadableStore.defaultNextInterestedInStoreWithMtls,
|
||||||
|
UserNextInterestedIn,
|
||||||
|
Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val filteredUserInterestedIn20m145kUpdatedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val filteredUserInterestedIn20m145k2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val filteredUserInterestedInFromPE20m145kUpdatedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildMemCacheStore(
|
||||||
|
twistlyUserInterestedInStore,
|
||||||
|
FilteredUserInterestedInFromPE,
|
||||||
|
Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val unfilteredUserInterestedIn20m145kUpdatedStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildMemCacheStore(
|
||||||
|
twistlyUserInterestedInStore,
|
||||||
|
UnfilteredUserInterestedIn,
|
||||||
|
Model20m145kUpdated)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val unfilteredUserInterestedIn20m145k2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
buildMemCacheStore(twistlyUserInterestedInStore, UnfilteredUserInterestedIn, Model20m145k2020)
|
||||||
|
}
|
||||||
|
|
||||||
|
// [Experimental] User InterestedIn, generated by aggregating IIAPE embedding from AddressBook
|
||||||
|
|
||||||
|
private val logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val datasetName = "addressbook_sims_embedding_iiape_maxpooling"
|
||||||
|
val appId = "wtf_embedding_apollo"
|
||||||
|
buildUserInterestedInStoreGeneric(
|
||||||
|
simClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020,
|
||||||
|
datasetName = datasetName,
|
||||||
|
appId = appId,
|
||||||
|
manhattanCluster = Apollo
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val datasetName = "addressbook_sims_embedding_iiape_average"
|
||||||
|
val appId = "wtf_embedding_apollo"
|
||||||
|
buildUserInterestedInStoreGeneric(
|
||||||
|
simClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020,
|
||||||
|
datasetName = datasetName,
|
||||||
|
appId = appId,
|
||||||
|
manhattanCluster = Apollo
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val datasetName = "addressbook_sims_embedding_iiape_booktype_maxpooling"
|
||||||
|
val appId = "wtf_embedding_apollo"
|
||||||
|
buildUserInterestedInStoreGeneric(
|
||||||
|
simClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020,
|
||||||
|
datasetName = datasetName,
|
||||||
|
appId = appId,
|
||||||
|
manhattanCluster = Apollo
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val datasetName = "addressbook_sims_embedding_iiape_largestdim_maxpooling"
|
||||||
|
val appId = "wtf_embedding_apollo"
|
||||||
|
buildUserInterestedInStoreGeneric(
|
||||||
|
simClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020,
|
||||||
|
datasetName = datasetName,
|
||||||
|
appId = appId,
|
||||||
|
manhattanCluster = Apollo
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val datasetName = "addressbook_sims_embedding_iiape_louvain_maxpooling"
|
||||||
|
val appId = "wtf_embedding_apollo"
|
||||||
|
buildUserInterestedInStoreGeneric(
|
||||||
|
simClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020,
|
||||||
|
datasetName = datasetName,
|
||||||
|
appId = appId,
|
||||||
|
manhattanCluster = Apollo
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val datasetName = "addressbook_sims_embedding_iiape_connected_maxpooling"
|
||||||
|
val appId = "wtf_embedding_apollo"
|
||||||
|
buildUserInterestedInStoreGeneric(
|
||||||
|
simClustersEmbeddingStoreWithMtls,
|
||||||
|
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020,
|
||||||
|
datasetName = datasetName,
|
||||||
|
appId = appId,
|
||||||
|
manhattanCluster = Apollo
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper func to build a readable store for some UserInterestedIn embeddings with
|
||||||
|
* 1. A storeFunc from UserInterestedInReadableStore
|
||||||
|
* 2. EmbeddingType
|
||||||
|
* 3. ModelVersion
|
||||||
|
* 4. MemCacheConfig
|
||||||
|
* */
|
||||||
|
private def buildUserInterestedInStore(
|
||||||
|
storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion) => ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
],
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion
|
||||||
|
): ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore = storeFunc(mhMtlsParams, embeddingType, modelVersion)
|
||||||
|
.mapValues(_.toThrift)
|
||||||
|
val observedStore = ObservedReadableStore(
|
||||||
|
store = rawStore
|
||||||
|
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||||
|
|
||||||
|
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||||
|
observedStore,
|
||||||
|
cacheClient,
|
||||||
|
embeddingType,
|
||||||
|
modelVersion,
|
||||||
|
stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def buildUserInterestedInStoreGeneric(
|
||||||
|
storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion, String, String,
|
||||||
|
ManhattanCluster) => ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
],
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion,
|
||||||
|
datasetName: String,
|
||||||
|
appId: String,
|
||||||
|
manhattanCluster: ManhattanCluster
|
||||||
|
): ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
val rawStore =
|
||||||
|
storeFunc(mhMtlsParams, embeddingType, modelVersion, datasetName, appId, manhattanCluster)
|
||||||
|
.mapValues(_.toThrift)
|
||||||
|
val observedStore = ObservedReadableStore(
|
||||||
|
store = rawStore
|
||||||
|
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||||
|
|
||||||
|
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||||
|
observedStore,
|
||||||
|
cacheClient,
|
||||||
|
embeddingType,
|
||||||
|
modelVersion,
|
||||||
|
stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def simClustersEmbeddingStoreWithMtls(
|
||||||
|
mhMtlsParams: ManhattanKVClientMtlsParams,
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion,
|
||||||
|
datasetName: String,
|
||||||
|
appId: String,
|
||||||
|
manhattanCluster: ManhattanCluster
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
|
||||||
|
if (!modelVersionToDatasetMap.contains(ModelVersions.toKnownForModelVersion(modelVersion))) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
|
||||||
|
}
|
||||||
|
getStore(appId, mhMtlsParams, datasetName, manhattanCluster)
|
||||||
|
.composeKeyMapping[SimClustersEmbeddingId] {
|
||||||
|
case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId))
|
||||||
|
if theEmbeddingType == embeddingType && theModelVersion == modelVersion =>
|
||||||
|
userId
|
||||||
|
}.mapValues(toSimClustersEmbedding(_, embeddingType))
|
||||||
|
}
|
||||||
|
|
||||||
|
private def buildMemCacheStore(
|
||||||
|
rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding],
|
||||||
|
embeddingType: EmbeddingType,
|
||||||
|
modelVersion: ModelVersion
|
||||||
|
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
|
||||||
|
val observedStore = ObservedReadableStore(
|
||||||
|
store = rawStore
|
||||||
|
)(stats.scope(embeddingType.name).scope(modelVersion.name))
|
||||||
|
|
||||||
|
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
|
||||||
|
observedStore,
|
||||||
|
cacheClient,
|
||||||
|
embeddingType,
|
||||||
|
modelVersion,
|
||||||
|
stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private val underlyingStores: Map[
|
||||||
|
(EmbeddingType, ModelVersion),
|
||||||
|
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
|
||||||
|
] = Map(
|
||||||
|
// KnownFor Embeddings
|
||||||
|
(FavBasedProducer, Model20m145kUpdated) -> favBasedProducer20M145KUpdatedEmbeddingStore,
|
||||||
|
(FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore,
|
||||||
|
(FollowBasedProducer, Model20m145k2020) -> followBasedProducer20M145K2020EmbeddingStore,
|
||||||
|
(AggregatableLogFavBasedProducer, Model20m145k2020) -> logFavBasedApe20M145K2020EmbeddingStore,
|
||||||
|
(
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145kUpdated) -> relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore,
|
||||||
|
(
|
||||||
|
RelaxedAggregatableLogFavBasedProducer,
|
||||||
|
Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingStore,
|
||||||
|
// InterestedIn Embeddings
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedInFromAPE,
|
||||||
|
Model20m145k2020) -> logFavBasedInterestedInFromAPE20M145K2020Store,
|
||||||
|
(
|
||||||
|
FollowBasedUserInterestedInFromAPE,
|
||||||
|
Model20m145k2020) -> followBasedInterestedInFromAPE20M145K2020Store,
|
||||||
|
(FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore,
|
||||||
|
(FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store,
|
||||||
|
(FollowBasedUserInterestedIn, Model20m145k2020) -> followBasedUserInterestedIn20M145K2020Store,
|
||||||
|
(LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store,
|
||||||
|
(
|
||||||
|
FavBasedUserInterestedInFromPE,
|
||||||
|
Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore,
|
||||||
|
(FilteredUserInterestedIn, Model20m145kUpdated) -> filteredUserInterestedIn20m145kUpdatedStore,
|
||||||
|
(FilteredUserInterestedIn, Model20m145k2020) -> filteredUserInterestedIn20m145k2020Store,
|
||||||
|
(
|
||||||
|
FilteredUserInterestedInFromPE,
|
||||||
|
Model20m145kUpdated) -> filteredUserInterestedInFromPE20m145kUpdatedStore,
|
||||||
|
(
|
||||||
|
UnfilteredUserInterestedIn,
|
||||||
|
Model20m145kUpdated) -> unfilteredUserInterestedIn20m145kUpdatedStore,
|
||||||
|
(UnfilteredUserInterestedIn, Model20m145k2020) -> unfilteredUserInterestedIn20m145k2020Store,
|
||||||
|
(UserNextInterestedIn, Model20m145k2020) -> userNextInterestedIn20m145k2020Store,
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store,
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||||
|
(
|
||||||
|
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
|
||||||
|
Model20m145k2020) -> logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store,
|
||||||
|
)
|
||||||
|
|
||||||
|
val userSimClustersEmbeddingStore: ReadableStore[
|
||||||
|
SimClustersEmbeddingId,
|
||||||
|
SimClustersEmbedding
|
||||||
|
] = {
|
||||||
|
SimClustersEmbeddingStore.buildWithDecider(
|
||||||
|
underlyingStores = underlyingStores,
|
||||||
|
decider = rmsDecider.decider,
|
||||||
|
statsReceiver = stats
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
18
representation-manager/server/src/main/thrift/BUILD
Normal file
18
representation-manager/server/src/main/thrift/BUILD
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
create_thrift_libraries(
|
||||||
|
base_name = "thrift",
|
||||||
|
sources = [
|
||||||
|
"com/twitter/representation_manager/service.thrift",
|
||||||
|
],
|
||||||
|
platform = "java8",
|
||||||
|
tags = [
|
||||||
|
"bazel-compatible",
|
||||||
|
],
|
||||||
|
dependency_roots = [
|
||||||
|
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift",
|
||||||
|
],
|
||||||
|
generate_languages = [
|
||||||
|
"java",
|
||||||
|
"scala",
|
||||||
|
"strato",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,14 @@
|
||||||
|
namespace java com.twitter.representation_manager.thriftjava
|
||||||
|
#@namespace scala com.twitter.representation_manager.thriftscala
|
||||||
|
#@namespace strato com.twitter.representation_manager
|
||||||
|
|
||||||
|
include "com/twitter/simclusters_v2/online_store.thrift"
|
||||||
|
include "com/twitter/simclusters_v2/identifier.thrift"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A uniform column view for all kinds of SimClusters based embeddings.
|
||||||
|
**/
|
||||||
|
struct SimClustersEmbeddingView {
|
||||||
|
1: required identifier.EmbeddingType embeddingType
|
||||||
|
2: required online_store.ModelVersion modelVersion
|
||||||
|
}(persisted = 'false', hasPersonalData = 'false')
|
Loading…
Reference in a new issue