Latest navi open source refresh

latest code change including the global thread pool

Closes twitter/the-algorithm#452
Closes twitter/the-algorithm#505
This commit is contained in:
twitter-team 2023-04-14 10:15:14 -07:00
parent 6e5c875a69
commit 4df87a278e
12 changed files with 112 additions and 68 deletions

View file

@ -31,6 +31,11 @@ In navi/navi, you can run the following commands:
- `scripts/run_onnx.sh` for [Onnx](https://onnx.ai/) - `scripts/run_onnx.sh` for [Onnx](https://onnx.ai/)
Do note that you need to create a models directory and create some versions, preferably using epoch time, e.g., `1679693908377`. Do note that you need to create a models directory and create some versions, preferably using epoch time, e.g., `1679693908377`.
so the models structure looks like:
models/
-web_click
- 1809000
- 1809010
## Build ## Build
You can adapt the above scripts to build using Cargo. You can adapt the above scripts to build using Cargo.

View file

@ -3,7 +3,6 @@ name = "dr_transform"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
@ -12,7 +11,6 @@ bpr_thrift = { path = "../thrift_bpr_adapter/thrift/"}
segdense = { path = "../segdense/"} segdense = { path = "../segdense/"}
thrift = "0.17.0" thrift = "0.17.0"
ndarray = "0.15" ndarray = "0.15"
ort = {git ="https://github.com/pykeio/ort.git", tag="v1.14.2"}
base64 = "0.20.0" base64 = "0.20.0"
npyz = "0.7.2" npyz = "0.7.2"
log = "0.4.17" log = "0.4.17"
@ -21,6 +19,11 @@ prometheus = "0.13.1"
once_cell = "1.17.0" once_cell = "1.17.0"
rand = "0.8.5" rand = "0.8.5"
itertools = "0.10.5" itertools = "0.10.5"
anyhow = "1.0.70"
[target.'cfg(not(target_os="linux"))'.dependencies]
ort = {git ="https://github.com/pykeio/ort.git", features=["profiling"], tag="v1.14.6"}
[target.'cfg(target_os="linux")'.dependencies]
ort = {git ="https://github.com/pykeio/ort.git", features=["profiling", "tensorrt", "cuda", "copy-dylibs"], tag="v1.14.6"}
[dev-dependencies] [dev-dependencies]
criterion = "0.3.0" criterion = "0.3.0"

View file

@ -3,3 +3,4 @@ pub mod converter;
#[cfg(test)] #[cfg(test)]
mod test; mod test;
pub mod util; pub mod util;
pub extern crate ort;

View file

@ -1,8 +1,7 @@
[package] [package]
name = "navi" name = "navi"
version = "2.0.42" version = "2.0.45"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[[bin]] [[bin]]
name = "navi" name = "navi"
@ -16,12 +15,19 @@ required-features=["torch"]
name = "navi_onnx" name = "navi_onnx"
path = "src/bin/navi_onnx.rs" path = "src/bin/navi_onnx.rs"
required-features=["onnx"] required-features=["onnx"]
[[bin]]
name = "navi_onnx_test"
path = "src/bin/bin_tests/navi_onnx_test.rs"
[[bin]]
name = "navi_torch_test"
path = "src/bin/bin_tests/navi_torch_test.rs"
required-features=["torch"]
[features] [features]
default=[] default=[]
navi_console=[] navi_console=[]
torch=["tch"] torch=["tch"]
onnx=["ort"] onnx=[]
tf=["tensorflow"] tf=["tensorflow"]
[dependencies] [dependencies]
itertools = "0.10.5" itertools = "0.10.5"
@ -47,6 +53,7 @@ parking_lot = "0.12.1"
rand = "0.8.5" rand = "0.8.5"
rand_pcg = "0.3.1" rand_pcg = "0.3.1"
random = "0.12.2" random = "0.12.2"
x509-parser = "0.15.0"
sha256 = "1.0.3" sha256 = "1.0.3"
tonic = { version = "0.6.2", features=['compression', 'tls'] } tonic = { version = "0.6.2", features=['compression', 'tls'] }
tokio = { version = "1.17.0", features = ["macros", "rt-multi-thread", "fs", "process"] } tokio = { version = "1.17.0", features = ["macros", "rt-multi-thread", "fs", "process"] }
@ -55,16 +62,12 @@ npyz = "0.7.3"
base64 = "0.21.0" base64 = "0.21.0"
histogram = "0.6.9" histogram = "0.6.9"
tch = {version = "0.10.3", optional = true} tch = {version = "0.10.3", optional = true}
tensorflow = { version = "0.20.0", optional = true } tensorflow = { version = "0.18.0", optional = true }
once_cell = {version = "1.17.1"} once_cell = {version = "1.17.1"}
ndarray = "0.15" ndarray = "0.15"
serde = "1.0.154" serde = "1.0.154"
serde_json = "1.0.94" serde_json = "1.0.94"
dr_transform = { path = "../dr_transform"} dr_transform = { path = "../dr_transform"}
[target.'cfg(not(target_os="linux"))'.dependencies]
ort = {git ="https://github.com/pykeio/ort.git", features=["profiling"], optional = true, tag="v1.14.2"}
[target.'cfg(target_os="linux")'.dependencies]
ort = {git ="https://github.com/pykeio/ort.git", features=["profiling", "tensorrt", "cuda", "copy-dylibs"], optional = true, tag="v1.14.2"}
[build-dependencies] [build-dependencies]
tonic-build = {version = "0.6.2", features=['prost', "compression"] } tonic-build = {version = "0.6.2", features=['prost', "compression"] }
[profile.release] [profile.release]
@ -74,3 +77,5 @@ ndarray-rand = "0.14.0"
tokio-test = "*" tokio-test = "*"
assert_cmd = "2.0" assert_cmd = "2.0"
criterion = "0.4.0" criterion = "0.4.0"

View file

@ -1,10 +1,9 @@
#!/bin/sh #!/bin/sh
#RUST_LOG=debug LD_LIBRARY_PATH=so/onnx/lib target/release/navi_onnx --port 30 --num-worker-threads 8 --intra-op-parallelism 8 --inter-op-parallelism 8 \ #RUST_LOG=debug LD_LIBRARY_PATH=so/onnx/lib target/release/navi_onnx --port 30 --num-worker-threads 8 --intra-op-parallelism 8 --inter-op-parallelism 8 \
RUST_LOG=info LD_LIBRARY_PATH=so/onnx/lib cargo run --bin navi_onnx --features onnx -- \ RUST_LOG=info LD_LIBRARY_PATH=so/onnx/lib cargo run --bin navi_onnx --features onnx -- \
--port 30 --num-worker-threads 8 --intra-op-parallelism 8 --inter-op-parallelism 8 \ --port 8030 --num-worker-threads 8 \
--model-check-interval-secs 30 \ --model-check-interval-secs 30 \
--model-dir models/int8 \
--output caligrated_probabilities \
--input "" \
--modelsync-cli "echo" \ --modelsync-cli "echo" \
--onnx-ep-options use_arena=true --onnx-ep-options use_arena=true \
--model-dir models/prod_home --output caligrated_probabilities --input "" --intra-op-parallelism 8 --inter-op-parallelism 8 --max-batch-size 1 --batch-time-out-millis 1 \
--model-dir models/prod_home1 --output caligrated_probabilities --input "" --intra-op-parallelism 8 --inter-op-parallelism 8 --max-batch-size 1 --batch-time-out-millis 1 \

View file

@ -1,11 +1,24 @@
use anyhow::Result; use anyhow::Result;
use log::info;
use navi::cli_args::{ARGS, MODEL_SPECS}; use navi::cli_args::{ARGS, MODEL_SPECS};
use navi::onnx_model::onnx::OnnxModel; use navi::onnx_model::onnx::OnnxModel;
use navi::{bootstrap, metrics}; use navi::{bootstrap, metrics};
fn main() -> Result<()> { fn main() -> Result<()> {
env_logger::init(); env_logger::init();
assert_eq!(MODEL_SPECS.len(), ARGS.inter_op_parallelism.len()); info!("global: {:?}", ARGS.onnx_global_thread_pool_options);
let assert_session_params = if ARGS.onnx_global_thread_pool_options.is_empty() {
// std::env::set_var("OMP_NUM_THREADS", "1");
info!("now we use per session thread pool");
MODEL_SPECS.len()
}
else {
info!("now we use global thread pool");
0
};
assert_eq!(assert_session_params, ARGS.inter_op_parallelism.len());
assert_eq!(assert_session_params, ARGS.inter_op_parallelism.len());
metrics::register_custom_metrics(); metrics::register_custom_metrics();
bootstrap::bootstrap(OnnxModel::new) bootstrap::bootstrap(OnnxModel::new)
} }

View file

@ -207,6 +207,9 @@ impl<T: Model> PredictionService for PredictService<T> {
PredictResult::DropDueToOverload => Err(Status::resource_exhausted("")), PredictResult::DropDueToOverload => Err(Status::resource_exhausted("")),
PredictResult::ModelNotFound(idx) => { PredictResult::ModelNotFound(idx) => {
Err(Status::not_found(format!("model index {}", idx))) Err(Status::not_found(format!("model index {}", idx)))
},
PredictResult::ModelNotReady(idx) => {
Err(Status::unavailable(format!("model index {}", idx)))
} }
PredictResult::ModelVersionNotFound(idx, version) => Err( PredictResult::ModelVersionNotFound(idx, version) => Err(
Status::not_found(format!("model index:{}, version {}", idx, version)), Status::not_found(format!("model index:{}, version {}", idx, version)),

View file

@ -87,13 +87,11 @@ pub struct Args {
pub intra_op_parallelism: Vec<String>, pub intra_op_parallelism: Vec<String>,
#[clap( #[clap(
long, long,
default_value = "14",
help = "number of threads to parallelize computations of the graph" help = "number of threads to parallelize computations of the graph"
)] )]
pub inter_op_parallelism: Vec<String>, pub inter_op_parallelism: Vec<String>,
#[clap( #[clap(
long, long,
default_value = "serving_default",
help = "signature of a serving. only TF" help = "signature of a serving. only TF"
)] )]
pub serving_sig: Vec<String>, pub serving_sig: Vec<String>,
@ -107,10 +105,12 @@ pub struct Args {
help = "max warmup records to use. warmup only implemented for TF" help = "max warmup records to use. warmup only implemented for TF"
)] )]
pub max_warmup_records: usize, pub max_warmup_records: usize,
#[clap(long, value_parser = Args::parse_key_val::<String, String>, value_delimiter=',')]
pub onnx_global_thread_pool_options: Vec<(String, String)>,
#[clap( #[clap(
long, long,
default_value = "true", default_value = "true",
help = "when to use graph parallelization. only for ONNX" help = "when to use graph parallelization. only for ONNX"
)] )]
pub onnx_use_parallel_mode: String, pub onnx_use_parallel_mode: String,
// #[clap(long, default_value = "false")] // #[clap(long, default_value = "false")]

View file

@ -146,6 +146,7 @@ pub enum PredictResult {
Ok(Vec<TensorScores>, i64), Ok(Vec<TensorScores>, i64),
DropDueToOverload, DropDueToOverload,
ModelNotFound(usize), ModelNotFound(usize),
ModelNotReady(usize),
ModelVersionNotFound(usize, i64), ModelVersionNotFound(usize, i64),
} }

View file

@ -13,21 +13,22 @@ pub mod onnx {
use dr_transform::converter::{BatchPredictionRequestToTorchTensorConverter, Converter}; use dr_transform::converter::{BatchPredictionRequestToTorchTensorConverter, Converter};
use itertools::Itertools; use itertools::Itertools;
use log::{debug, info}; use log::{debug, info};
use ort::environment::Environment; use dr_transform::ort::environment::Environment;
use ort::session::Session; use dr_transform::ort::session::Session;
use ort::tensor::InputTensor; use dr_transform::ort::tensor::InputTensor;
use ort::{ExecutionProvider, GraphOptimizationLevel, SessionBuilder}; use dr_transform::ort::{ExecutionProvider, GraphOptimizationLevel, SessionBuilder};
use dr_transform::ort::LoggingLevel;
use serde_json::Value; use serde_json::Value;
use std::fmt::{Debug, Display}; use std::fmt::{Debug, Display};
use std::sync::Arc; use std::sync::Arc;
use std::{fmt, fs}; use std::{fmt, fs};
use tokio::time::Instant; use tokio::time::Instant;
lazy_static! { lazy_static! {
pub static ref ENVIRONMENT: Arc<Environment> = Arc::new( pub static ref ENVIRONMENT: Arc<Environment> = Arc::new(
Environment::builder() Environment::builder()
.with_name("onnx home") .with_name("onnx home")
.with_log_level(ort::LoggingLevel::Error) .with_log_level(LoggingLevel::Error)
.with_global_thread_pool(ARGS.onnx_global_thread_pool_options.clone())
.build() .build()
.unwrap() .unwrap()
); );
@ -101,23 +102,30 @@ pub mod onnx {
let meta_info = format!("{}/{}/{}", ARGS.model_dir[idx], version, META_INFO); let meta_info = format!("{}/{}/{}", ARGS.model_dir[idx], version, META_INFO);
let mut builder = SessionBuilder::new(&ENVIRONMENT)? let mut builder = SessionBuilder::new(&ENVIRONMENT)?
.with_optimization_level(GraphOptimizationLevel::Level3)? .with_optimization_level(GraphOptimizationLevel::Level3)?
.with_parallel_execution(ARGS.onnx_use_parallel_mode == "true")? .with_parallel_execution(ARGS.onnx_use_parallel_mode == "true")?;
.with_inter_threads( if ARGS.onnx_global_thread_pool_options.is_empty() {
utils::get_config_or( builder = builder
model_config, .with_inter_threads(
"inter_op_parallelism", utils::get_config_or(
&ARGS.inter_op_parallelism[idx], model_config,
) "inter_op_parallelism",
.parse()?, &ARGS.inter_op_parallelism[idx],
)? )
.with_intra_threads( .parse()?,
utils::get_config_or( )?
model_config, .with_intra_threads(
"intra_op_parallelism", utils::get_config_or(
&ARGS.intra_op_parallelism[idx], model_config,
) "intra_op_parallelism",
.parse()?, &ARGS.intra_op_parallelism[idx],
)? )
.parse()?,
)?;
}
else {
builder = builder.with_disable_per_session_threads()?;
}
builder = builder
.with_memory_pattern(ARGS.onnx_use_memory_pattern == "true")? .with_memory_pattern(ARGS.onnx_use_memory_pattern == "true")?
.with_execution_providers(&OnnxModel::ep_choices())?; .with_execution_providers(&OnnxModel::ep_choices())?;
match &ARGS.profiling { match &ARGS.profiling {

View file

@ -1,7 +1,7 @@
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use arrayvec::ArrayVec; use arrayvec::ArrayVec;
use itertools::Itertools; use itertools::Itertools;
use log::{error, info, warn}; use log::{error, info};
use std::fmt::{Debug, Display}; use std::fmt::{Debug, Display};
use std::string::String; use std::string::String;
use std::sync::Arc; use std::sync::Arc;
@ -179,17 +179,17 @@ impl<T: Model> PredictService<T> {
//initialize the latest version array //initialize the latest version array
let mut cur_versions = vec!["".to_owned(); MODEL_SPECS.len()]; let mut cur_versions = vec!["".to_owned(); MODEL_SPECS.len()];
loop { loop {
let config = utils::read_config(&meta_file).unwrap_or_else(|e| {
warn!("config file {} not found due to: {}", meta_file, e);
Value::Null
});
info!("***polling for models***"); //nice deliminter info!("***polling for models***"); //nice deliminter
info!("config:{}", config);
if let Some(ref cli) = ARGS.modelsync_cli { if let Some(ref cli) = ARGS.modelsync_cli {
if let Err(e) = call_external_modelsync(cli, &cur_versions).await { if let Err(e) = call_external_modelsync(cli, &cur_versions).await {
error!("model sync cli running error:{}", e) error!("model sync cli running error:{}", e)
} }
} }
let config = utils::read_config(&meta_file).unwrap_or_else(|e| {
info!("config file {} not found due to: {}", meta_file, e);
Value::Null
});
info!("config:{}", config);
for (idx, cur_version) in cur_versions.iter_mut().enumerate() { for (idx, cur_version) in cur_versions.iter_mut().enumerate() {
let model_dir = &ARGS.model_dir[idx]; let model_dir = &ARGS.model_dir[idx];
PredictService::scan_load_latest_model_from_model_dir( PredictService::scan_load_latest_model_from_model_dir(
@ -229,26 +229,32 @@ impl<T: Model> PredictService<T> {
let no_more_msg = match msg { let no_more_msg = match msg {
Ok(PredictMessage::Predict(model_spec_at, version, val, resp, ts)) => { Ok(PredictMessage::Predict(model_spec_at, version, val, resp, ts)) => {
if let Some(model_predictors) = all_model_predictors.get_mut(model_spec_at) { if let Some(model_predictors) = all_model_predictors.get_mut(model_spec_at) {
match version { if model_predictors.is_empty() {
None => model_predictors[0].push(val, resp, ts), resp.send(PredictResult::ModelNotReady(model_spec_at))
Some(the_version) => match model_predictors .unwrap_or_else(|e| error!("cannot send back model not ready error: {:?}", e));
.iter_mut() }
.find(|x| x.model.version() == the_version) else {
{ match version {
None => resp None => model_predictors[0].push(val, resp, ts),
.send(PredictResult::ModelVersionNotFound( Some(the_version) => match model_predictors
model_spec_at, .iter_mut()
the_version, .find(|x| x.model.version() == the_version)
)) {
.unwrap_or_else(|e| { None => resp
error!("cannot send back version error: {:?}", e) .send(PredictResult::ModelVersionNotFound(
}), model_spec_at,
Some(predictor) => predictor.push(val, resp, ts), the_version,
}, ))
.unwrap_or_else(|e| {
error!("cannot send back version error: {:?}", e)
}),
Some(predictor) => predictor.push(val, resp, ts),
},
}
} }
} else { } else {
resp.send(PredictResult::ModelNotFound(model_spec_at)) resp.send(PredictResult::ModelNotFound(model_spec_at))
.unwrap_or_else(|e| error!("cannot send back model error: {:?}", e)) .unwrap_or_else(|e| error!("cannot send back model not found error: {:?}", e))
} }
MPSC_CHANNEL_SIZE.dec(); MPSC_CHANNEL_SIZE.dec();
false false

View file

@ -3,9 +3,9 @@ name = "segdense"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
env_logger = "0.10.0"
serde = { version = "1.0.104", features = ["derive"] } serde = { version = "1.0.104", features = ["derive"] }
serde_json = "1.0.48" serde_json = "1.0.48"
log = "0.4.17" log = "0.4.17"