mirror of
https://gitlab.com/fabinfra/fabaccess/bffh.git
synced 2024-11-22 06:47:56 +01:00
Merge branch 'feature/runtime-improvements' into development
* feature/runtime-improvements: Runtime things furthermore Allow tracking cgroups with futures Oh whoops handle that Get started on supervision trees Attach a GroupID to all LightProcs Noting down improvement ideas for procs More ideas about how to record data A number of small updates batched into one commit Improve Drop guards Even more console shenanigans tracing more data Some bits work \o/ Console is attached and compiles More console features Use `ManuallyDrop` instead of `mem::forget` where appropiate More console implementation stuff Start on the runtime console subscriber
This commit is contained in:
commit
fac0a9ba94
994
Cargo.lock
generated
994
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -49,6 +49,7 @@ dirs = "4.0.0"
|
||||
|
||||
# Runtime
|
||||
executor = { path = "runtime/executor" }
|
||||
console = { path = "runtime/console" }
|
||||
|
||||
# Catch&Handle POSIX process signals
|
||||
signal-hook = "0.3.13"
|
||||
@ -72,9 +73,9 @@ rust-argon2 = "0.8.3"
|
||||
rand = "0.8.4"
|
||||
|
||||
# Async aware logging and tracing
|
||||
tracing = "0.1.28"
|
||||
tracing-subscriber = { version = "0.2.25", features = ["env-filter"] }
|
||||
tracing-futures = { version = "0.2.5", features = ["futures-03"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "registry", "std"] }
|
||||
tracing-futures = { version = "0.2", features = ["futures-03"] }
|
||||
|
||||
# API
|
||||
api = { path = "api" }
|
||||
@ -123,4 +124,4 @@ tempfile = "3.2"
|
||||
shadow-rs = "0.11"
|
||||
|
||||
[workspace]
|
||||
members = ["modules/*", "api"]
|
||||
members = ["runtime/*", "modules/*", "api"]
|
||||
|
@ -51,7 +51,7 @@ impl AuditLog {
|
||||
let mut ser = Serializer::new(&mut writer);
|
||||
line.serialize(&mut ser)
|
||||
.expect("failed to serialize audit log line");
|
||||
writer.write("\n".as_bytes())?;
|
||||
writer.write_all("\n".as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ use async_net::TcpListener;
|
||||
use capnp_rpc::rpc_twoparty_capnp::Side;
|
||||
use capnp_rpc::twoparty::VatNetwork;
|
||||
use capnp_rpc::RpcSystem;
|
||||
use executor::prelude::Executor;
|
||||
use executor::prelude::{Executor, GroupId, SupervisionRegistry};
|
||||
use futures_rustls::server::TlsStream;
|
||||
use futures_rustls::TlsAcceptor;
|
||||
use futures_util::stream::FuturesUnordered;
|
||||
@ -167,6 +167,7 @@ impl APIServer {
|
||||
tracing::error!("Error during RPC handling: {}", e);
|
||||
}
|
||||
};
|
||||
self.executor.spawn_local(f);
|
||||
let cgroup = SupervisionRegistry::with(SupervisionRegistry::new_group);
|
||||
self.executor.spawn_local_cgroup(f, cgroup);
|
||||
}
|
||||
}
|
||||
|
35
bffhd/lib.rs
35
bffhd/lib.rs
@ -62,6 +62,7 @@ use crate::users::db::UserDB;
|
||||
use crate::users::Users;
|
||||
use executor::pool::Executor;
|
||||
use signal_hook::consts::signal::*;
|
||||
use tracing::Span;
|
||||
|
||||
pub struct Diflouroborane {
|
||||
config: Config,
|
||||
@ -70,20 +71,44 @@ pub struct Diflouroborane {
|
||||
pub users: Users,
|
||||
pub roles: Roles,
|
||||
pub resources: ResourcesHandle,
|
||||
span: Span,
|
||||
}
|
||||
|
||||
pub static RESOURCES: OnceCell<ResourcesHandle> = OnceCell::new();
|
||||
|
||||
impl Diflouroborane {
|
||||
pub fn new(config: Config) -> miette::Result<Self> {
|
||||
logging::init(&config.logging);
|
||||
let mut server = logging::init(&config.logging);
|
||||
let span = tracing::info_span!(
|
||||
target: "bffh",
|
||||
"bffh"
|
||||
);
|
||||
let span2 = span.clone();
|
||||
let _guard = span2.enter();
|
||||
tracing::info!(version = env::VERSION, "Starting BFFH");
|
||||
|
||||
let span = tracing::info_span!("setup");
|
||||
let _guard = span.enter();
|
||||
|
||||
let executor = Executor::new();
|
||||
|
||||
if let Some(aggregator) = server.aggregator.take() {
|
||||
executor.spawn(aggregator.run());
|
||||
}
|
||||
tracing::info!("Server is being spawned");
|
||||
let handle = executor.spawn(server.serve());
|
||||
std::thread::spawn(move || {
|
||||
let result = async_io::block_on(handle);
|
||||
match result {
|
||||
Some(Ok(())) => {
|
||||
tracing::info!("console server finished without error");
|
||||
}
|
||||
Some(Err(error)) => {
|
||||
tracing::info!(%error, "console server finished with error");
|
||||
}
|
||||
None => {
|
||||
tracing::info!("console server finished with panic");
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let env = StateDB::open_env(&config.db_path)?;
|
||||
|
||||
let statedb = StateDB::create_with_env(env.clone())?;
|
||||
@ -111,10 +136,12 @@ impl Diflouroborane {
|
||||
users,
|
||||
roles,
|
||||
resources,
|
||||
span,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn run(&mut self) -> miette::Result<()> {
|
||||
let _guard = self.span.enter();
|
||||
let mut signals = signal_hook_async_std::Signals::new(&[SIGINT, SIGQUIT, SIGTERM])
|
||||
.into_diagnostic()
|
||||
.wrap_err("Failed to construct signal handler")?;
|
||||
|
@ -1,6 +1,7 @@
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LogConfig {
|
||||
@ -24,21 +25,25 @@ impl Default for LogConfig {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn init(config: &LogConfig) {
|
||||
pub fn init(config: &LogConfig) -> console::Server {
|
||||
let (console, server) = console::ConsoleLayer::new();
|
||||
|
||||
let filter = if let Some(ref filter) = config.filter {
|
||||
EnvFilter::new(filter.as_str())
|
||||
} else {
|
||||
EnvFilter::from_env("BFFH_LOG")
|
||||
};
|
||||
|
||||
let builder = tracing_subscriber::fmt().with_env_filter(filter);
|
||||
let format = &config.format;
|
||||
// TODO: Restore output format settings being settable
|
||||
let fmt_layer = tracing_subscriber::fmt::layer().with_filter(filter);
|
||||
|
||||
let format = config.format.to_lowercase();
|
||||
match format.as_str() {
|
||||
"compact" => builder.compact().init(),
|
||||
"pretty" => builder.pretty().init(),
|
||||
"full" => builder.init(),
|
||||
_ => builder.init(),
|
||||
}
|
||||
tracing::info!(format = format.as_str(), "Logging initialized")
|
||||
tracing_subscriber::registry()
|
||||
.with(fmt_layer)
|
||||
.with(console)
|
||||
.init();
|
||||
|
||||
tracing::info!(format = format.as_str(), "Logging initialized");
|
||||
|
||||
server
|
||||
}
|
||||
|
30
runtime/console/Cargo.toml
Normal file
30
runtime/console/Cargo.toml
Normal file
@ -0,0 +1,30 @@
|
||||
[package]
|
||||
name = "console"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
console-api = "0.3"
|
||||
prost-types = "0.10"
|
||||
tonic = { version = "0.7.2", default_features = false, features = [] }
|
||||
hyper = { version = "0.14", default_features = false, features = ["http2", "server", "stream"] }
|
||||
thread_local = "1.1"
|
||||
tracing = "0.1"
|
||||
tracing-core = "0.1"
|
||||
tracing-subscriber = { version = "0.3", default_features = false, features = ["registry"] }
|
||||
crossbeam-utils = "0.8"
|
||||
crossbeam-channel = "0.5"
|
||||
async-net = "1.6"
|
||||
async-compat = "0.2"
|
||||
async-channel = "1.6"
|
||||
async-oneshot = "0.5"
|
||||
async-io = "1.7"
|
||||
tokio-util = "0.7"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1.19", default_features = false, features = []}
|
||||
hdrhistogram = "7.5"
|
||||
|
||||
[dev-dependencies]
|
||||
tracing-subscriber = "0.3"
|
461
runtime/console/src/aggregate.rs
Normal file
461
runtime/console/src/aggregate.rs
Normal file
@ -0,0 +1,461 @@
|
||||
use crate::id_map::{IdMap, ToProto};
|
||||
use crate::server::{Watch, WatchRequest};
|
||||
use crate::stats::{TimeAnchor, Unsent};
|
||||
use crate::{server, stats};
|
||||
use crate::{Event, Shared};
|
||||
use console_api::{async_ops, instrument, resources, tasks};
|
||||
use crossbeam_channel::{Receiver, TryRecvError};
|
||||
use futures_util::{FutureExt, StreamExt};
|
||||
use std::collections::HashMap;
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tracing::span;
|
||||
use tracing_core::Metadata;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Resource {
|
||||
id: span::Id,
|
||||
is_dirty: AtomicBool,
|
||||
parent_id: Option<span::Id>,
|
||||
metadata: &'static Metadata<'static>,
|
||||
concrete_type: String,
|
||||
kind: resources::resource::Kind,
|
||||
location: Option<console_api::Location>,
|
||||
is_internal: bool,
|
||||
}
|
||||
|
||||
/// Represents static data for tasks
|
||||
#[derive(Debug)]
|
||||
struct Task {
|
||||
id: span::Id,
|
||||
is_dirty: AtomicBool,
|
||||
metadata: &'static Metadata<'static>,
|
||||
fields: Vec<console_api::Field>,
|
||||
location: Option<console_api::Location>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct AsyncOp {
|
||||
id: span::Id,
|
||||
is_dirty: AtomicBool,
|
||||
parent_id: Option<span::Id>,
|
||||
resource_id: span::Id,
|
||||
metadata: &'static Metadata<'static>,
|
||||
source: String,
|
||||
}
|
||||
|
||||
impl ToProto for Task {
|
||||
type Output = tasks::Task;
|
||||
|
||||
fn to_proto(&self, _: &stats::TimeAnchor) -> Self::Output {
|
||||
tasks::Task {
|
||||
id: Some(self.id.clone().into()),
|
||||
// TODO: more kinds of tasks...
|
||||
kind: tasks::task::Kind::Spawn as i32,
|
||||
metadata: Some(self.metadata.into()),
|
||||
parents: Vec::new(), // TODO: implement parents nicely
|
||||
fields: self.fields.clone(),
|
||||
location: self.location.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Unsent for Task {
|
||||
fn take_unsent(&self) -> bool {
|
||||
self.is_dirty.swap(false, Ordering::AcqRel)
|
||||
}
|
||||
|
||||
fn is_unsent(&self) -> bool {
|
||||
self.is_dirty.load(Ordering::Acquire)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToProto for Resource {
|
||||
type Output = resources::Resource;
|
||||
|
||||
fn to_proto(&self, _: &stats::TimeAnchor) -> Self::Output {
|
||||
resources::Resource {
|
||||
id: Some(self.id.clone().into()),
|
||||
parent_resource_id: self.parent_id.clone().map(Into::into),
|
||||
kind: Some(self.kind.clone()),
|
||||
metadata: Some(self.metadata.into()),
|
||||
concrete_type: self.concrete_type.clone(),
|
||||
location: self.location.clone(),
|
||||
is_internal: self.is_internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Unsent for Resource {
|
||||
fn take_unsent(&self) -> bool {
|
||||
self.is_dirty.swap(false, Ordering::AcqRel)
|
||||
}
|
||||
|
||||
fn is_unsent(&self) -> bool {
|
||||
self.is_dirty.load(Ordering::Acquire)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToProto for AsyncOp {
|
||||
type Output = async_ops::AsyncOp;
|
||||
|
||||
fn to_proto(&self, _: &stats::TimeAnchor) -> Self::Output {
|
||||
async_ops::AsyncOp {
|
||||
id: Some(self.id.clone().into()),
|
||||
metadata: Some(self.metadata.into()),
|
||||
resource_id: Some(self.resource_id.clone().into()),
|
||||
source: self.source.clone(),
|
||||
parent_async_op_id: self.parent_id.clone().map(Into::into),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Unsent for AsyncOp {
|
||||
fn take_unsent(&self) -> bool {
|
||||
self.is_dirty.swap(false, Ordering::AcqRel)
|
||||
}
|
||||
|
||||
fn is_unsent(&self) -> bool {
|
||||
self.is_dirty.load(Ordering::Acquire)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
pub(crate) enum Include {
|
||||
All,
|
||||
UpdatedOnly,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Aggregator {
|
||||
shared: Arc<Shared>,
|
||||
events: Receiver<Event>,
|
||||
rpcs: async_channel::Receiver<server::Command>,
|
||||
watchers: Vec<Watch<instrument::Update>>,
|
||||
details_watchers: HashMap<span::Id, Vec<Watch<tasks::TaskDetails>>>,
|
||||
all_metadata: Vec<console_api::register_metadata::NewMetadata>,
|
||||
new_metadata: Vec<console_api::register_metadata::NewMetadata>,
|
||||
running: bool,
|
||||
publish_interval: Duration,
|
||||
base_time: TimeAnchor,
|
||||
tasks: IdMap<Task>,
|
||||
task_stats: IdMap<Arc<stats::TaskStats>>,
|
||||
resources: IdMap<Resource>,
|
||||
resource_stats: IdMap<Arc<stats::ResourceStats>>,
|
||||
async_ops: IdMap<AsyncOp>,
|
||||
async_op_stats: IdMap<Arc<stats::AsyncOpStats>>,
|
||||
poll_ops: Vec<console_api::resources::PollOp>,
|
||||
}
|
||||
|
||||
impl Aggregator {
|
||||
pub(crate) fn new(
|
||||
shared: Arc<Shared>,
|
||||
events: Receiver<Event>,
|
||||
rpcs: async_channel::Receiver<server::Command>,
|
||||
) -> Self {
|
||||
Self {
|
||||
shared,
|
||||
events,
|
||||
rpcs,
|
||||
watchers: Vec::new(),
|
||||
details_watchers: HashMap::default(),
|
||||
running: true,
|
||||
publish_interval: Duration::from_secs(1),
|
||||
all_metadata: Vec::new(),
|
||||
new_metadata: Vec::new(),
|
||||
base_time: TimeAnchor::new(),
|
||||
tasks: IdMap::default(),
|
||||
task_stats: IdMap::default(),
|
||||
resources: IdMap::default(),
|
||||
resource_stats: IdMap::default(),
|
||||
async_ops: IdMap::default(),
|
||||
async_op_stats: IdMap::default(),
|
||||
poll_ops: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_instrument_subscription(&mut self, subscription: Watch<instrument::Update>) {
|
||||
tracing::debug!("new instrument subscription");
|
||||
|
||||
let task_update = Some(self.task_update(Include::All));
|
||||
let resource_update = Some(self.resource_update(Include::All));
|
||||
let async_op_update = Some(self.async_op_update(Include::All));
|
||||
let now = Instant::now();
|
||||
|
||||
let update = &instrument::Update {
|
||||
task_update,
|
||||
resource_update,
|
||||
async_op_update,
|
||||
now: Some(self.base_time.to_timestamp(now)),
|
||||
new_metadata: Some(console_api::RegisterMetadata {
|
||||
metadata: (self.all_metadata).clone(),
|
||||
}),
|
||||
};
|
||||
|
||||
// Send the initial state --- if this fails, the subscription is already dead
|
||||
if subscription.update(update) {
|
||||
self.watchers.push(subscription)
|
||||
}
|
||||
}
|
||||
|
||||
/// Add the task details subscription to the watchers after sending the first update,
|
||||
/// if the task is found.
|
||||
fn add_task_detail_subscription(
|
||||
&mut self,
|
||||
watch_request: WatchRequest<console_api::tasks::TaskDetails>,
|
||||
) {
|
||||
let WatchRequest {
|
||||
id,
|
||||
mut stream_sender,
|
||||
buffer,
|
||||
} = watch_request;
|
||||
tracing::debug!(id = ?id, "new task details subscription");
|
||||
if let Some(stats) = self.task_stats.get(&id) {
|
||||
let (tx, rx) = async_channel::bounded(buffer);
|
||||
let subscription = Watch(tx);
|
||||
let now = Some(self.base_time.to_timestamp(Instant::now()));
|
||||
// Send back the stream receiver.
|
||||
// Then send the initial state --- if this fails, the subscription is already dead.
|
||||
if stream_sender.send(rx).is_ok()
|
||||
&& subscription.update(&console_api::tasks::TaskDetails {
|
||||
task_id: Some(id.clone().into()),
|
||||
now,
|
||||
poll_times_histogram: Some(stats.poll_duration_histogram()),
|
||||
})
|
||||
{
|
||||
self.details_watchers
|
||||
.entry(id.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(subscription);
|
||||
}
|
||||
}
|
||||
// If the task is not found, drop `stream_sender` which will result in a not found error
|
||||
}
|
||||
|
||||
fn task_update(&mut self, include: Include) -> tasks::TaskUpdate {
|
||||
tasks::TaskUpdate {
|
||||
new_tasks: self.tasks.as_proto_list(include, &self.base_time),
|
||||
stats_update: self.task_stats.as_proto(include, &self.base_time),
|
||||
dropped_events: self.shared.dropped_tasks.swap(0, Ordering::AcqRel) as u64,
|
||||
}
|
||||
}
|
||||
|
||||
fn resource_update(&mut self, include: Include) -> resources::ResourceUpdate {
|
||||
let new_poll_ops = match include {
|
||||
Include::All => self.poll_ops.clone(),
|
||||
Include::UpdatedOnly => std::mem::take(&mut self.poll_ops),
|
||||
};
|
||||
resources::ResourceUpdate {
|
||||
new_resources: self.resources.as_proto_list(include, &self.base_time),
|
||||
stats_update: self.resource_stats.as_proto(include, &self.base_time),
|
||||
new_poll_ops,
|
||||
dropped_events: self.shared.dropped_resources.swap(0, Ordering::AcqRel) as u64,
|
||||
}
|
||||
}
|
||||
|
||||
fn async_op_update(&mut self, include: Include) -> async_ops::AsyncOpUpdate {
|
||||
async_ops::AsyncOpUpdate {
|
||||
new_async_ops: self.async_ops.as_proto_list(include, &self.base_time),
|
||||
stats_update: self.async_op_stats.as_proto(include, &self.base_time),
|
||||
dropped_events: self.shared.dropped_async_ops.swap(0, Ordering::AcqRel) as u64,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
let mut timer = StreamExt::fuse(async_io::Timer::interval(self.publish_interval));
|
||||
loop {
|
||||
let mut recv = self.rpcs.recv().fuse();
|
||||
let should_send: bool = futures_util::select! {
|
||||
_ = timer.next() => self.running,
|
||||
cmd = recv => {
|
||||
match cmd {
|
||||
Ok(server::Command::Instrument(subscription)) => {
|
||||
self.add_instrument_subscription(subscription);
|
||||
}
|
||||
Ok(server::Command::WatchTaskDetail(request)) => {
|
||||
}
|
||||
Ok(server::Command::Pause) => {
|
||||
self.running = false;
|
||||
}
|
||||
Ok(server::Command::Resume) => {
|
||||
self.running = true;
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::debug!("rpc channel closed, exiting");
|
||||
return
|
||||
}
|
||||
}
|
||||
false
|
||||
},
|
||||
};
|
||||
|
||||
// drain and aggregate buffered events.
|
||||
//
|
||||
// Note: we *don't* want to actually await the call to `recv` --- we
|
||||
// don't want the aggregator task to be woken on every event,
|
||||
// because it will then be woken when its own `poll` calls are
|
||||
// exited. that would result in a busy-loop. instead, we only want
|
||||
// to be woken when the flush interval has elapsed, or when the
|
||||
// channel is almost full.
|
||||
while let Ok(event) = self.events.try_recv() {
|
||||
self.update_state(event);
|
||||
}
|
||||
if let Err(TryRecvError::Disconnected) = self.events.try_recv() {
|
||||
tracing::debug!("event channel closed; terminating");
|
||||
return;
|
||||
}
|
||||
|
||||
// flush data to clients, if there are any currently subscribed
|
||||
// watchers and we should send a new update.
|
||||
if !self.watchers.is_empty() && should_send {
|
||||
self.publish();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn publish(&mut self) {
|
||||
let new_metadata = if !self.new_metadata.is_empty() {
|
||||
Some(console_api::RegisterMetadata {
|
||||
metadata: std::mem::take(&mut self.new_metadata),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let task_update = Some(self.task_update(Include::UpdatedOnly));
|
||||
let resource_update = Some(self.resource_update(Include::UpdatedOnly));
|
||||
let async_op_update = Some(self.async_op_update(Include::UpdatedOnly));
|
||||
|
||||
let update = instrument::Update {
|
||||
now: Some(self.base_time.to_timestamp(Instant::now())),
|
||||
new_metadata,
|
||||
task_update,
|
||||
resource_update,
|
||||
async_op_update,
|
||||
};
|
||||
|
||||
self.watchers
|
||||
.retain(|watch: &Watch<instrument::Update>| watch.update(&update));
|
||||
|
||||
let stats = &self.task_stats;
|
||||
// Assuming there are much fewer task details subscribers than there are
|
||||
// stats updates, iterate over `details_watchers` and compact the map.
|
||||
self.details_watchers.retain(|id, watchers| {
|
||||
if let Some(task_stats) = stats.get(id) {
|
||||
let details = tasks::TaskDetails {
|
||||
task_id: Some(id.clone().into()),
|
||||
now: Some(self.base_time.to_timestamp(Instant::now())),
|
||||
poll_times_histogram: Some(task_stats.poll_duration_histogram()),
|
||||
};
|
||||
watchers.retain(|watch| watch.update(&details));
|
||||
!watchers.is_empty()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Update the current state with data from a single event.
|
||||
fn update_state(&mut self, event: Event) {
|
||||
// do state update
|
||||
match event {
|
||||
Event::Metadata(meta) => {
|
||||
self.all_metadata.push(meta.into());
|
||||
self.new_metadata.push(meta.into());
|
||||
}
|
||||
|
||||
Event::Spawn {
|
||||
id,
|
||||
metadata,
|
||||
stats,
|
||||
fields,
|
||||
location,
|
||||
} => {
|
||||
self.tasks.insert(
|
||||
id.clone(),
|
||||
Task {
|
||||
id: id.clone(),
|
||||
is_dirty: AtomicBool::new(true),
|
||||
metadata,
|
||||
fields,
|
||||
location,
|
||||
// TODO: parents
|
||||
},
|
||||
);
|
||||
|
||||
self.task_stats.insert(id, stats);
|
||||
}
|
||||
|
||||
Event::Resource {
|
||||
id,
|
||||
parent_id,
|
||||
metadata,
|
||||
kind,
|
||||
concrete_type,
|
||||
location,
|
||||
is_internal,
|
||||
stats,
|
||||
} => {
|
||||
self.resources.insert(
|
||||
id.clone(),
|
||||
Resource {
|
||||
id: id.clone(),
|
||||
is_dirty: AtomicBool::new(true),
|
||||
parent_id,
|
||||
kind,
|
||||
metadata,
|
||||
concrete_type,
|
||||
location,
|
||||
is_internal,
|
||||
},
|
||||
);
|
||||
|
||||
self.resource_stats.insert(id, stats);
|
||||
}
|
||||
|
||||
Event::PollOp {
|
||||
metadata,
|
||||
resource_id,
|
||||
op_name,
|
||||
async_op_id,
|
||||
task_id,
|
||||
is_ready,
|
||||
} => {
|
||||
let poll_op = resources::PollOp {
|
||||
metadata: Some(metadata.into()),
|
||||
resource_id: Some(resource_id.into()),
|
||||
name: op_name,
|
||||
task_id: Some(task_id.into()),
|
||||
async_op_id: Some(async_op_id.into()),
|
||||
is_ready,
|
||||
};
|
||||
|
||||
self.poll_ops.push(poll_op);
|
||||
}
|
||||
|
||||
Event::AsyncResourceOp {
|
||||
id,
|
||||
source,
|
||||
resource_id,
|
||||
metadata,
|
||||
parent_id,
|
||||
stats,
|
||||
} => {
|
||||
self.async_ops.insert(
|
||||
id.clone(),
|
||||
AsyncOp {
|
||||
id: id.clone(),
|
||||
is_dirty: AtomicBool::new(true),
|
||||
resource_id,
|
||||
metadata,
|
||||
source,
|
||||
parent_id,
|
||||
},
|
||||
);
|
||||
|
||||
self.async_op_stats.insert(id, stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
113
runtime/console/src/attribute.rs
Normal file
113
runtime/console/src/attribute.rs
Normal file
@ -0,0 +1,113 @@
|
||||
use std::collections::HashMap;
|
||||
use tracing_core::span::Id;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct Attributes {
|
||||
attributes: HashMap<FieldKey, console_api::Attribute>,
|
||||
}
|
||||
|
||||
impl Attributes {
|
||||
pub(crate) fn values(&self) -> impl Iterator<Item = &console_api::Attribute> {
|
||||
self.attributes.values()
|
||||
}
|
||||
|
||||
pub(crate) fn update(&mut self, id: &Id, update: &Update) {
|
||||
let field_name = match update.field.name.as_ref() {
|
||||
Some(name) => name.clone(),
|
||||
None => {
|
||||
tracing::warn!(?update.field, "field missing name, skipping...");
|
||||
return;
|
||||
}
|
||||
};
|
||||
let update_id = id.clone();
|
||||
let key = FieldKey {
|
||||
update_id,
|
||||
field_name,
|
||||
};
|
||||
|
||||
self.attributes
|
||||
.entry(key)
|
||||
.and_modify(|attr| update_attribute(attr, update))
|
||||
.or_insert_with(|| update.clone().into());
|
||||
}
|
||||
}
|
||||
|
||||
fn update_attribute(attribute: &mut console_api::Attribute, update: &Update) {
|
||||
use console_api::field::Value::*;
|
||||
let attribute_val = attribute.field.as_mut().and_then(|a| a.value.as_mut());
|
||||
let update_val = update.field.value.clone();
|
||||
let update_name = update.field.name.clone();
|
||||
match (attribute_val, update_val) {
|
||||
(Some(BoolVal(v)), Some(BoolVal(upd))) => *v = upd,
|
||||
|
||||
(Some(StrVal(v)), Some(StrVal(upd))) => *v = upd,
|
||||
|
||||
(Some(DebugVal(v)), Some(DebugVal(upd))) => *v = upd,
|
||||
|
||||
(Some(U64Val(v)), Some(U64Val(upd))) => match update.op {
|
||||
Some(UpdateOp::Add) => *v = v.saturating_add(upd),
|
||||
|
||||
Some(UpdateOp::Sub) => *v = v.saturating_sub(upd),
|
||||
|
||||
Some(UpdateOp::Override) => *v = upd,
|
||||
|
||||
None => tracing::warn!(
|
||||
"numeric attribute update {:?} needs to have an op field",
|
||||
update_name
|
||||
),
|
||||
},
|
||||
|
||||
(Some(I64Val(v)), Some(I64Val(upd))) => match update.op {
|
||||
Some(UpdateOp::Add) => *v = v.saturating_add(upd),
|
||||
|
||||
Some(UpdateOp::Sub) => *v = v.saturating_sub(upd),
|
||||
|
||||
Some(UpdateOp::Override) => *v = upd,
|
||||
|
||||
None => tracing::warn!(
|
||||
"numeric attribute update {:?} needs to have an op field",
|
||||
update_name
|
||||
),
|
||||
},
|
||||
|
||||
(val, update) => {
|
||||
tracing::warn!(
|
||||
"attribute {:?} cannot be updated by update {:?}",
|
||||
val,
|
||||
update
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Update> for console_api::Attribute {
|
||||
fn from(upd: Update) -> Self {
|
||||
console_api::Attribute {
|
||||
field: Some(upd.field),
|
||||
unit: upd.unit,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Update {
|
||||
pub(crate) field: console_api::Field,
|
||||
pub(crate) op: Option<UpdateOp>,
|
||||
pub(crate) unit: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum UpdateOp {
|
||||
Add,
|
||||
Override,
|
||||
Sub,
|
||||
}
|
||||
|
||||
/// Represents a key for a `proto::field::Name`. Because the
|
||||
/// proto::field::Name might not be unique we also include the
|
||||
/// resource id in this key
|
||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||
struct FieldKey {
|
||||
update_id: Id,
|
||||
field_name: console_api::field::Name,
|
||||
}
|
75
runtime/console/src/callsites.rs
Normal file
75
runtime/console/src/callsites.rs
Normal file
@ -0,0 +1,75 @@
|
||||
use std::fmt;
|
||||
use std::fmt::Formatter;
|
||||
use std::ptr;
|
||||
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
|
||||
use tracing::Metadata;
|
||||
|
||||
pub(crate) struct Callsites<const MAX_CALLSITES: usize> {
|
||||
array: [AtomicPtr<Metadata<'static>>; MAX_CALLSITES],
|
||||
len: AtomicUsize,
|
||||
}
|
||||
|
||||
impl<const MAX_CALLSITES: usize> Callsites<MAX_CALLSITES> {
|
||||
pub(crate) fn insert(&self, callsite: &'static Metadata<'static>) {
|
||||
if self.contains(callsite) {
|
||||
return;
|
||||
}
|
||||
|
||||
let idx = self.len.fetch_add(1, Ordering::AcqRel);
|
||||
if idx <= MAX_CALLSITES {
|
||||
self.array[idx]
|
||||
.compare_exchange(
|
||||
ptr::null_mut(),
|
||||
callsite as *const _ as *mut _,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
)
|
||||
.expect("would have clobbered callsite array");
|
||||
} else {
|
||||
todo!("Need to spill callsite over into backup storage");
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn contains(&self, callsite: &Metadata<'static>) -> bool {
|
||||
let mut idx = 0;
|
||||
let mut end = self.len.load(Ordering::Acquire);
|
||||
while {
|
||||
for cs in &self.array[idx..end] {
|
||||
let ptr = cs.load(Ordering::Acquire);
|
||||
let meta = unsafe { &*ptr };
|
||||
if meta.callsite() == callsite.callsite() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
idx = end;
|
||||
|
||||
// Check if new callsites were added since we iterated
|
||||
end = self.len.load(Ordering::Acquire);
|
||||
end > idx
|
||||
} {}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl<const MAX_CALLSITES: usize> Default for Callsites<MAX_CALLSITES> {
|
||||
fn default() -> Self {
|
||||
const NULLPTR: AtomicPtr<Metadata<'static>> = AtomicPtr::new(ptr::null_mut());
|
||||
Self {
|
||||
array: [NULLPTR; MAX_CALLSITES],
|
||||
len: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const MAX_CALLSITES: usize> fmt::Debug for Callsites<MAX_CALLSITES> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
let len = self.len.load(Ordering::Acquire);
|
||||
f.debug_struct("Callsites")
|
||||
.field("MAX_CALLSITES", &MAX_CALLSITES)
|
||||
.field("array", &&self.array[..len])
|
||||
.field("len", &len)
|
||||
.finish()
|
||||
}
|
||||
}
|
66
runtime/console/src/event.rs
Normal file
66
runtime/console/src/event.rs
Normal file
@ -0,0 +1,66 @@
|
||||
use crate::stats;
|
||||
use console_api::resources;
|
||||
use std::sync::Arc;
|
||||
use tracing::span;
|
||||
use tracing_core::Metadata;
|
||||
|
||||
pub(crate) enum Event {
|
||||
Metadata(&'static Metadata<'static>),
|
||||
Spawn {
|
||||
id: span::Id,
|
||||
metadata: &'static Metadata<'static>,
|
||||
stats: Arc<stats::TaskStats>,
|
||||
fields: Vec<console_api::Field>,
|
||||
location: Option<console_api::Location>,
|
||||
},
|
||||
Resource {
|
||||
id: span::Id,
|
||||
parent_id: Option<span::Id>,
|
||||
metadata: &'static Metadata<'static>,
|
||||
concrete_type: String,
|
||||
kind: resources::resource::Kind,
|
||||
location: Option<console_api::Location>,
|
||||
is_internal: bool,
|
||||
stats: Arc<stats::ResourceStats>,
|
||||
},
|
||||
PollOp {
|
||||
metadata: &'static Metadata<'static>,
|
||||
resource_id: span::Id,
|
||||
op_name: String,
|
||||
async_op_id: span::Id,
|
||||
task_id: span::Id,
|
||||
is_ready: bool,
|
||||
},
|
||||
AsyncResourceOp {
|
||||
id: span::Id,
|
||||
parent_id: Option<span::Id>,
|
||||
resource_id: span::Id,
|
||||
metadata: &'static Metadata<'static>,
|
||||
source: String,
|
||||
|
||||
stats: Arc<stats::AsyncOpStats>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Copy)]
|
||||
pub(crate) enum WakeOp {
|
||||
Wake { self_wake: bool },
|
||||
WakeByRef { self_wake: bool },
|
||||
Clone,
|
||||
Drop,
|
||||
}
|
||||
|
||||
impl WakeOp {
|
||||
/// Returns `true` if `self` is a `Wake` or `WakeByRef` event.
|
||||
pub(crate) fn is_wake(self) -> bool {
|
||||
matches!(self, Self::Wake { .. } | Self::WakeByRef { .. })
|
||||
}
|
||||
|
||||
pub(crate) fn self_wake(self, self_wake: bool) -> Self {
|
||||
match self {
|
||||
Self::Wake { .. } => Self::Wake { self_wake },
|
||||
Self::WakeByRef { .. } => Self::WakeByRef { self_wake },
|
||||
x => x,
|
||||
}
|
||||
}
|
||||
}
|
126
runtime/console/src/id_map.rs
Normal file
126
runtime/console/src/id_map.rs
Normal file
@ -0,0 +1,126 @@
|
||||
use crate::aggregate::Include;
|
||||
use crate::stats::{DroppedAt, TimeAnchor, Unsent};
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
use tracing_core::span::Id;
|
||||
|
||||
pub(crate) trait ToProto {
|
||||
type Output;
|
||||
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct IdMap<T> {
|
||||
data: HashMap<Id, T>,
|
||||
}
|
||||
|
||||
impl<T> Default for IdMap<T> {
|
||||
fn default() -> Self {
|
||||
IdMap {
|
||||
data: HashMap::<Id, T>::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Unsent> IdMap<T> {
|
||||
pub(crate) fn insert(&mut self, id: Id, data: T) {
|
||||
self.data.insert(id, data);
|
||||
}
|
||||
|
||||
pub(crate) fn since_last_update(&mut self) -> impl Iterator<Item = (&Id, &mut T)> {
|
||||
self.data.iter_mut().filter_map(|(id, data)| {
|
||||
if data.take_unsent() {
|
||||
Some((id, data))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn all(&self) -> impl Iterator<Item = (&Id, &T)> {
|
||||
self.data.iter()
|
||||
}
|
||||
|
||||
pub(crate) fn get(&self, id: &Id) -> Option<&T> {
|
||||
self.data.get(id)
|
||||
}
|
||||
|
||||
pub(crate) fn as_proto_list(
|
||||
&mut self,
|
||||
include: Include,
|
||||
base_time: &TimeAnchor,
|
||||
) -> Vec<T::Output>
|
||||
where
|
||||
T: ToProto,
|
||||
{
|
||||
match include {
|
||||
Include::UpdatedOnly => self
|
||||
.since_last_update()
|
||||
.map(|(_, d)| d.to_proto(base_time))
|
||||
.collect(),
|
||||
Include::All => self.all().map(|(_, d)| d.to_proto(base_time)).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_proto(
|
||||
&mut self,
|
||||
include: Include,
|
||||
base_time: &TimeAnchor,
|
||||
) -> HashMap<u64, T::Output>
|
||||
where
|
||||
T: ToProto,
|
||||
{
|
||||
match include {
|
||||
Include::UpdatedOnly => self
|
||||
.since_last_update()
|
||||
.map(|(id, d)| (id.into_u64(), d.to_proto(base_time)))
|
||||
.collect(),
|
||||
Include::All => self
|
||||
.all()
|
||||
.map(|(id, d)| (id.into_u64(), d.to_proto(base_time)))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn drop_closed<R: DroppedAt + Unsent>(
|
||||
&mut self,
|
||||
stats: &mut IdMap<R>,
|
||||
now: Instant,
|
||||
retention: Duration,
|
||||
has_watchers: bool,
|
||||
) {
|
||||
let _span = tracing::debug_span!(
|
||||
"drop_closed",
|
||||
entity = %std::any::type_name::<T>(),
|
||||
stats = %std::any::type_name::<R>(),
|
||||
)
|
||||
.entered();
|
||||
|
||||
// drop closed entities
|
||||
tracing::trace!(?retention, has_watchers, "dropping closed");
|
||||
|
||||
stats.data.retain(|id, stats| {
|
||||
if let Some(dropped_at) = stats.dropped_at() {
|
||||
let dropped_for = now.checked_duration_since(dropped_at).unwrap_or_default();
|
||||
let dirty = stats.is_unsent();
|
||||
let should_drop =
|
||||
// if there are any clients watching, retain all dirty tasks regardless of age
|
||||
(dirty && has_watchers)
|
||||
|| dropped_for > retention;
|
||||
tracing::trace!(
|
||||
stats.id = ?id,
|
||||
stats.dropped_at = ?dropped_at,
|
||||
stats.dropped_for = ?dropped_for,
|
||||
stats.dirty = dirty,
|
||||
should_drop,
|
||||
);
|
||||
return !should_drop;
|
||||
}
|
||||
|
||||
true
|
||||
});
|
||||
|
||||
// drop closed entities which no longer have stats.
|
||||
self.data.retain(|id, _| stats.data.contains_key(id));
|
||||
}
|
||||
}
|
514
runtime/console/src/lib.rs
Normal file
514
runtime/console/src/lib.rs
Normal file
@ -0,0 +1,514 @@
|
||||
use crossbeam_channel::{Sender, TrySendError};
|
||||
use std::borrow::Borrow;
|
||||
use std::cell::RefCell;
|
||||
use std::net::IpAddr;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use thread_local::ThreadLocal;
|
||||
use tracing::span;
|
||||
use tracing_core::span::Attributes;
|
||||
use tracing_core::{Interest, Metadata, Subscriber};
|
||||
use tracing_subscriber::layer::{Context, Filter};
|
||||
use tracing_subscriber::registry::{LookupSpan, SpanRef};
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
mod aggregate;
|
||||
mod attribute;
|
||||
mod callsites;
|
||||
mod event;
|
||||
mod id_map;
|
||||
mod server;
|
||||
mod stack;
|
||||
mod stats;
|
||||
mod visitors;
|
||||
|
||||
use crate::aggregate::Aggregator;
|
||||
use crate::callsites::Callsites;
|
||||
use crate::visitors::{
|
||||
AsyncOpVisitor, PollOpVisitor, ResourceVisitor, ResourceVisitorResult, StateUpdateVisitor,
|
||||
TaskVisitor, WakerVisitor,
|
||||
};
|
||||
use event::Event;
|
||||
pub use server::Server;
|
||||
use stack::SpanStack;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ConsoleLayer {
|
||||
current_spans: ThreadLocal<RefCell<SpanStack>>,
|
||||
|
||||
tx: Sender<Event>,
|
||||
shared: Arc<Shared>,
|
||||
|
||||
spawn_callsites: Callsites<8>,
|
||||
waker_callsites: Callsites<8>,
|
||||
resource_callsites: Callsites<8>,
|
||||
|
||||
/// Set of callsites for spans representing async operations on resources
|
||||
///
|
||||
/// TODO: Take some time to determine more reasonable numbers
|
||||
async_op_callsites: Callsites<32>,
|
||||
|
||||
/// Set of callsites for spans representing async op poll operations
|
||||
///
|
||||
/// TODO: Take some time to determine more reasonable numbers
|
||||
async_op_poll_callsites: Callsites<32>,
|
||||
|
||||
/// Set of callsites for events representing poll operation invocations on resources
|
||||
///
|
||||
/// TODO: Take some time to determine more reasonable numbers
|
||||
poll_op_callsites: Callsites<32>,
|
||||
|
||||
/// Set of callsites for events representing state attribute state updates on resources
|
||||
///
|
||||
/// TODO: Take some time to determine more reasonable numbers
|
||||
resource_state_update_callsites: Callsites<32>,
|
||||
|
||||
/// Set of callsites for events representing state attribute state updates on async resource ops
|
||||
///
|
||||
/// TODO: Take some time to determine more reasonable numbers
|
||||
async_op_state_update_callsites: Callsites<32>,
|
||||
|
||||
max_poll_duration_nanos: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Builder {
|
||||
/// Network Address the console server will listen on
|
||||
server_addr: IpAddr,
|
||||
/// Network Port the console server will listen on
|
||||
server_port: u16,
|
||||
|
||||
/// Number of events that can be buffered before events are dropped.
|
||||
///
|
||||
/// A smaller number will reduce the memory footprint but may lead to more events being dropped
|
||||
/// during activity bursts.
|
||||
event_buffer_capacity: usize,
|
||||
|
||||
client_buffer_capacity: usize,
|
||||
|
||||
poll_duration_max: Duration,
|
||||
}
|
||||
impl Builder {
|
||||
pub fn build(self) -> (ConsoleLayer, Server) {
|
||||
ConsoleLayer::build(self)
|
||||
}
|
||||
}
|
||||
impl Default for Builder {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
// Listen on `::1` (aka localhost) by default
|
||||
server_addr: Server::DEFAULT_ADDR,
|
||||
server_port: Server::DEFAULT_PORT,
|
||||
event_buffer_capacity: ConsoleLayer::DEFAULT_EVENT_BUFFER_CAPACITY,
|
||||
client_buffer_capacity: 1024,
|
||||
poll_duration_max: ConsoleLayer::DEFAULT_POLL_DURATION_MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Shared {
|
||||
dropped_tasks: AtomicUsize,
|
||||
dropped_resources: AtomicUsize,
|
||||
dropped_async_ops: AtomicUsize,
|
||||
}
|
||||
|
||||
impl ConsoleLayer {
|
||||
pub fn new() -> (Self, Server) {
|
||||
Self::builder().build()
|
||||
}
|
||||
pub fn builder() -> Builder {
|
||||
Builder::default()
|
||||
}
|
||||
fn build(config: Builder) -> (Self, Server) {
|
||||
tracing::debug!(
|
||||
?config.server_addr,
|
||||
config.event_buffer_capacity,
|
||||
"configured console subscriber"
|
||||
);
|
||||
|
||||
let (tx, events) = crossbeam_channel::bounded(config.event_buffer_capacity);
|
||||
let shared = Arc::new(Shared::default());
|
||||
let (subscribe, rpcs) = async_channel::bounded(config.client_buffer_capacity);
|
||||
let aggregator = Aggregator::new(shared.clone(), events, rpcs);
|
||||
let server = Server::new(aggregator, config.client_buffer_capacity, subscribe);
|
||||
let layer = Self {
|
||||
current_spans: ThreadLocal::new(),
|
||||
tx,
|
||||
shared,
|
||||
spawn_callsites: Callsites::default(),
|
||||
waker_callsites: Callsites::default(),
|
||||
resource_callsites: Callsites::default(),
|
||||
async_op_callsites: Callsites::default(),
|
||||
async_op_poll_callsites: Callsites::default(),
|
||||
poll_op_callsites: Callsites::default(),
|
||||
resource_state_update_callsites: Callsites::default(),
|
||||
async_op_state_update_callsites: Callsites::default(),
|
||||
max_poll_duration_nanos: config.poll_duration_max.as_nanos() as u64,
|
||||
};
|
||||
|
||||
(layer, server)
|
||||
}
|
||||
}
|
||||
|
||||
impl ConsoleLayer {
|
||||
const DEFAULT_EVENT_BUFFER_CAPACITY: usize = 1024;
|
||||
const DEFAULT_CLIENT_BUFFER_CAPACITY: usize = 1024;
|
||||
|
||||
/// The default maximum value for task poll duration histograms.
|
||||
///
|
||||
/// Any poll duration exceeding this will be clamped to this value. By
|
||||
/// default, the maximum poll duration is one second.
|
||||
///
|
||||
/// See also [`Builder::poll_duration_histogram_max`].
|
||||
pub const DEFAULT_POLL_DURATION_MAX: Duration = Duration::from_secs(1);
|
||||
|
||||
fn is_spawn(&self, metadata: &Metadata<'static>) -> bool {
|
||||
self.spawn_callsites.contains(metadata)
|
||||
}
|
||||
|
||||
fn is_waker(&self, metadata: &Metadata<'static>) -> bool {
|
||||
self.waker_callsites.contains(metadata)
|
||||
}
|
||||
|
||||
fn is_resource(&self, meta: &'static Metadata<'static>) -> bool {
|
||||
self.resource_callsites.contains(meta)
|
||||
}
|
||||
|
||||
fn is_async_op(&self, meta: &'static Metadata<'static>) -> bool {
|
||||
self.async_op_callsites.contains(meta)
|
||||
}
|
||||
|
||||
fn is_id_spawned<S>(&self, id: &span::Id, cx: &Context<'_, S>) -> bool
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
{
|
||||
cx.span(id)
|
||||
.map(|span| self.is_spawn(span.metadata()))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_id_resource<S>(&self, id: &span::Id, cx: &Context<'_, S>) -> bool
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
{
|
||||
cx.span(id)
|
||||
.map(|span| self.is_resource(span.metadata()))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_id_async_op<S>(&self, id: &span::Id, cx: &Context<'_, S>) -> bool
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
{
|
||||
cx.span(id)
|
||||
.map(|span| self.is_async_op(span.metadata()))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn first_entered<P>(&self, stack: &SpanStack, p: P) -> Option<span::Id>
|
||||
where
|
||||
P: Fn(&span::Id) -> bool,
|
||||
{
|
||||
stack
|
||||
.stack()
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|id| p(id.id()))
|
||||
.map(|id| id.id())
|
||||
.cloned()
|
||||
}
|
||||
|
||||
fn send_stats<S>(
|
||||
&self,
|
||||
dropped: &AtomicUsize,
|
||||
mkEvent: impl FnOnce() -> (Event, S),
|
||||
) -> Option<S> {
|
||||
if self.tx.is_full() {
|
||||
dropped.fetch_add(1, Ordering::Release);
|
||||
return None;
|
||||
}
|
||||
|
||||
let (event, stats) = mkEvent();
|
||||
match self.tx.try_send(event) {
|
||||
Ok(()) => Some(stats),
|
||||
Err(TrySendError::Full(_)) => {
|
||||
dropped.fetch_add(1, Ordering::Release);
|
||||
None
|
||||
}
|
||||
Err(TrySendError::Disconnected(_)) => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn send_metadata(&self, dropped: &AtomicUsize, event: Event) -> bool {
|
||||
self.send_stats(dropped, || (event, ())).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Layer<S> for ConsoleLayer
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
{
|
||||
fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest {
|
||||
let dropped = match (metadata.name(), metadata.target()) {
|
||||
(_, TaskVisitor::SPAWN_TARGET) | (TaskVisitor::SPAWN_NAME, _) => {
|
||||
self.spawn_callsites.insert(metadata);
|
||||
&self.shared.dropped_tasks
|
||||
}
|
||||
(_, WakerVisitor::WAKER_EVENT_TARGET) => {
|
||||
self.waker_callsites.insert(metadata);
|
||||
&self.shared.dropped_tasks
|
||||
}
|
||||
(ResourceVisitor::RES_SPAN_NAME, _) => {
|
||||
self.resource_callsites.insert(metadata);
|
||||
&self.shared.dropped_resources
|
||||
}
|
||||
(AsyncOpVisitor::ASYNC_OP_SPAN_NAME, _) => {
|
||||
self.async_op_callsites.insert(metadata);
|
||||
&self.shared.dropped_async_ops
|
||||
}
|
||||
(AsyncOpVisitor::ASYNC_OP_POLL_NAME, _) => {
|
||||
self.async_op_poll_callsites.insert(metadata);
|
||||
&self.shared.dropped_async_ops
|
||||
}
|
||||
(_, PollOpVisitor::POLL_OP_EVENT_TARGET) => {
|
||||
self.poll_op_callsites.insert(metadata);
|
||||
&self.shared.dropped_async_ops
|
||||
}
|
||||
(_, StateUpdateVisitor::RE_STATE_UPDATE_EVENT_TARGET) => {
|
||||
self.resource_state_update_callsites.insert(metadata);
|
||||
&self.shared.dropped_resources
|
||||
}
|
||||
(_, StateUpdateVisitor::AO_STATE_UPDATE_EVENT_TARGET) => {
|
||||
self.async_op_state_update_callsites.insert(metadata);
|
||||
&self.shared.dropped_async_ops
|
||||
}
|
||||
(_, _) => &self.shared.dropped_tasks,
|
||||
};
|
||||
|
||||
self.send_metadata(dropped, Event::Metadata(metadata));
|
||||
|
||||
Interest::always()
|
||||
}
|
||||
|
||||
fn on_new_span(&self, attrs: &Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
|
||||
let metadata = attrs.metadata();
|
||||
if self.is_spawn(metadata) {
|
||||
let at = Instant::now();
|
||||
let mut task_visitor = TaskVisitor::new(metadata.into());
|
||||
attrs.record(&mut task_visitor);
|
||||
let (fields, location) = task_visitor.result();
|
||||
if let Some(stats) = self.send_stats(&self.shared.dropped_tasks, move || {
|
||||
let stats = Arc::new(stats::TaskStats::new(self.max_poll_duration_nanos, at));
|
||||
let event = Event::Spawn {
|
||||
id: id.clone(),
|
||||
stats: stats.clone(),
|
||||
metadata,
|
||||
fields,
|
||||
location,
|
||||
};
|
||||
(event, stats)
|
||||
}) {
|
||||
ctx.span(id)
|
||||
.expect("`on_new_span` called with nonexistent span. This is a tracing bug.")
|
||||
.extensions_mut()
|
||||
.insert(stats);
|
||||
}
|
||||
} else if self.is_resource(metadata) {
|
||||
let at = Instant::now();
|
||||
let mut resource_visitor = ResourceVisitor::default();
|
||||
attrs.record(&mut resource_visitor);
|
||||
if let Some(result) = resource_visitor.result() {
|
||||
let ResourceVisitorResult {
|
||||
concrete_type,
|
||||
kind,
|
||||
location,
|
||||
is_internal,
|
||||
inherit_child_attrs,
|
||||
} = result;
|
||||
let parent_id = self.current_spans.get().and_then(|stack| {
|
||||
self.first_entered(&stack.borrow(), |id| self.is_id_resource(id, &ctx))
|
||||
});
|
||||
if let Some(stats) = self.send_stats(&self.shared.dropped_resources, move || {
|
||||
let stats = Arc::new(stats::ResourceStats::new(
|
||||
at,
|
||||
inherit_child_attrs,
|
||||
parent_id.clone(),
|
||||
));
|
||||
let event = Event::Resource {
|
||||
id: id.clone(),
|
||||
parent_id,
|
||||
metadata,
|
||||
concrete_type,
|
||||
kind,
|
||||
location,
|
||||
is_internal,
|
||||
stats: stats.clone(),
|
||||
};
|
||||
(event, stats)
|
||||
}) {
|
||||
ctx.span(id)
|
||||
.expect("if `on_new_span` was called, the span must exist; this is a `tracing` bug!")
|
||||
.extensions_mut()
|
||||
.insert(stats);
|
||||
}
|
||||
}
|
||||
} else if self.is_async_op(metadata) {
|
||||
let at = Instant::now();
|
||||
let mut async_op_visitor = AsyncOpVisitor::default();
|
||||
attrs.record(&mut async_op_visitor);
|
||||
if let Some((source, inherit_child_attrs)) = async_op_visitor.result() {
|
||||
let resource_id = self.current_spans.get().and_then(|stack| {
|
||||
self.first_entered(&stack.borrow(), |id| self.is_id_resource(id, &ctx))
|
||||
});
|
||||
|
||||
let parent_id = self.current_spans.get().and_then(|stack| {
|
||||
self.first_entered(&stack.borrow(), |id| self.is_id_async_op(id, &ctx))
|
||||
});
|
||||
|
||||
if let Some(resource_id) = resource_id {
|
||||
if let Some(stats) =
|
||||
self.send_stats(&self.shared.dropped_async_ops, move || {
|
||||
let stats = Arc::new(stats::AsyncOpStats::new(
|
||||
at,
|
||||
inherit_child_attrs,
|
||||
parent_id.clone(),
|
||||
));
|
||||
let event = Event::AsyncResourceOp {
|
||||
id: id.clone(),
|
||||
parent_id,
|
||||
resource_id,
|
||||
metadata,
|
||||
source,
|
||||
stats: stats.clone(),
|
||||
};
|
||||
(event, stats)
|
||||
})
|
||||
{
|
||||
ctx.span(id)
|
||||
.expect("if `on_new_span` was called, the span must exist; this is a `tracing` bug!")
|
||||
.extensions_mut()
|
||||
.insert(stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) {
|
||||
let metadata = event.metadata();
|
||||
if self.waker_callsites.contains(metadata) {
|
||||
let at = Instant::now();
|
||||
let mut visitor = WakerVisitor::default();
|
||||
event.record(&mut visitor);
|
||||
if let Some((id, mut op)) = visitor.result() {
|
||||
if let Some(span) = ctx.span(&id) {
|
||||
let exts = span.extensions();
|
||||
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
|
||||
if op.is_wake() {
|
||||
let self_wake = self
|
||||
.current_spans
|
||||
.get()
|
||||
.map(|spans| spans.borrow().iter().any(|span| span == &id))
|
||||
.unwrap_or(false);
|
||||
op = op.self_wake(self_wake);
|
||||
}
|
||||
|
||||
stats.record_wake_op(op, at);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if self.poll_op_callsites.contains(metadata) {
|
||||
}
|
||||
}
|
||||
|
||||
fn on_enter(&self, id: &span::Id, cx: Context<'_, S>) {
|
||||
fn update<S: Subscriber + for<'a> LookupSpan<'a>>(
|
||||
span: &SpanRef<S>,
|
||||
at: Option<Instant>,
|
||||
) -> Option<Instant> {
|
||||
let exts = span.extensions();
|
||||
// if the span we are entering is a task or async op, record the
|
||||
// poll stats.
|
||||
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
|
||||
let at = at.unwrap_or_else(Instant::now);
|
||||
stats.start_poll(at);
|
||||
Some(at)
|
||||
} else if let Some(stats) = exts.get::<Arc<stats::AsyncOpStats>>() {
|
||||
let at = at.unwrap_or_else(Instant::now);
|
||||
stats.start_poll(at);
|
||||
Some(at)
|
||||
// otherwise, is the span a resource? in that case, we also want
|
||||
// to enter it, although we don't care about recording poll
|
||||
// stats.
|
||||
} else if exts.get::<Arc<stats::ResourceStats>>().is_some() {
|
||||
Some(at.unwrap_or_else(Instant::now))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(span) = cx.span(id) {
|
||||
if let Some(now) = update(&span, None) {
|
||||
if let Some(parent) = span.parent() {
|
||||
update(&parent, Some(now));
|
||||
}
|
||||
self.current_spans
|
||||
.get_or_default()
|
||||
.borrow_mut()
|
||||
.push(id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn on_exit(&self, id: &span::Id, cx: Context<'_, S>) {
|
||||
fn update<S: Subscriber + for<'a> LookupSpan<'a>>(
|
||||
span: &SpanRef<S>,
|
||||
at: Option<Instant>,
|
||||
) -> Option<Instant> {
|
||||
let exts = span.extensions();
|
||||
// if the span we are entering is a task or async op, record the
|
||||
// poll stats.
|
||||
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
|
||||
let at = at.unwrap_or_else(Instant::now);
|
||||
stats.end_poll(at);
|
||||
Some(at)
|
||||
} else if let Some(stats) = exts.get::<Arc<stats::AsyncOpStats>>() {
|
||||
let at = at.unwrap_or_else(Instant::now);
|
||||
stats.end_poll(at);
|
||||
Some(at)
|
||||
// otherwise, is the span a resource? in that case, we also want
|
||||
// to enter it, although we don't care about recording poll
|
||||
// stats.
|
||||
} else if exts.get::<Arc<stats::ResourceStats>>().is_some() {
|
||||
Some(at.unwrap_or_else(Instant::now))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(span) = cx.span(id) {
|
||||
if let Some(now) = update(&span, None) {
|
||||
if let Some(parent) = span.parent() {
|
||||
update(&parent, Some(now));
|
||||
}
|
||||
self.current_spans.get_or_default().borrow_mut().pop(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn on_close(&self, id: span::Id, cx: Context<'_, S>) {
|
||||
if let Some(span) = cx.span(&id) {
|
||||
let now = Instant::now();
|
||||
let exts = span.extensions();
|
||||
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
|
||||
stats.drop_task(now);
|
||||
} else if let Some(stats) = exts.get::<Arc<stats::AsyncOpStats>>() {
|
||||
stats.drop_async_op(now);
|
||||
} else if let Some(stats) = exts.get::<Arc<stats::ResourceStats>>() {
|
||||
stats.drop_resource(now);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
223
runtime/console/src/server.rs
Normal file
223
runtime/console/src/server.rs
Normal file
@ -0,0 +1,223 @@
|
||||
use crate::Aggregator;
|
||||
use async_channel::{Receiver, Sender};
|
||||
use async_compat::CompatExt;
|
||||
use console_api::instrument;
|
||||
use console_api::instrument::instrument_server::{Instrument, InstrumentServer};
|
||||
use console_api::tasks;
|
||||
use futures_util::TryStreamExt;
|
||||
use std::error::Error;
|
||||
use std::future::Future;
|
||||
use std::io::IoSlice;
|
||||
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::AsyncRead as TokioAsyncRead;
|
||||
use tokio::io::{AsyncWrite as TokioAsyncWrite, ReadBuf};
|
||||
use tonic::transport::server::Connected;
|
||||
use tonic::Status;
|
||||
use tracing_core::span::Id;
|
||||
|
||||
struct StreamWrapper<T>(T);
|
||||
impl<T> Connected for StreamWrapper<T> {
|
||||
type ConnectInfo = ();
|
||||
|
||||
fn connect_info(&self) -> Self::ConnectInfo {
|
||||
()
|
||||
}
|
||||
}
|
||||
impl<T: TokioAsyncWrite + Unpin> TokioAsyncWrite for StreamWrapper<T> {
|
||||
fn poll_write(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &[u8],
|
||||
) -> Poll<Result<usize, std::io::Error>> {
|
||||
TokioAsyncWrite::poll_write(Pin::new(&mut self.0), cx, buf)
|
||||
}
|
||||
|
||||
fn poll_flush(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Result<(), std::io::Error>> {
|
||||
TokioAsyncWrite::poll_flush(Pin::new(&mut self.0), cx)
|
||||
}
|
||||
|
||||
fn poll_shutdown(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Result<(), std::io::Error>> {
|
||||
TokioAsyncWrite::poll_shutdown(Pin::new(&mut self.0), cx)
|
||||
}
|
||||
|
||||
fn poll_write_vectored(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
bufs: &[IoSlice<'_>],
|
||||
) -> Poll<Result<usize, std::io::Error>> {
|
||||
TokioAsyncWrite::poll_write_vectored(Pin::new(&mut self.0), cx, bufs)
|
||||
}
|
||||
|
||||
fn is_write_vectored(&self) -> bool {
|
||||
TokioAsyncWrite::is_write_vectored(&self.0)
|
||||
}
|
||||
}
|
||||
impl<T: TokioAsyncRead + Unpin> TokioAsyncRead for StreamWrapper<T> {
|
||||
fn poll_read(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &mut ReadBuf<'_>,
|
||||
) -> Poll<std::io::Result<()>> {
|
||||
TokioAsyncRead::poll_read(Pin::new(&mut self.0), cx, buf)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Server {
|
||||
pub aggregator: Option<Aggregator>,
|
||||
client_buffer_size: usize,
|
||||
subscribe: Sender<Command>,
|
||||
}
|
||||
|
||||
impl Server {
|
||||
//pub(crate) const DEFAULT_ADDR: IpAddr = IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1));
|
||||
pub(crate) const DEFAULT_ADDR: IpAddr = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1));
|
||||
pub(crate) const DEFAULT_PORT: u16 = 49289;
|
||||
|
||||
pub(crate) fn new(
|
||||
aggregator: Aggregator,
|
||||
client_buffer_size: usize,
|
||||
subscribe: Sender<Command>,
|
||||
) -> Self {
|
||||
Self {
|
||||
aggregator: Some(aggregator),
|
||||
client_buffer_size,
|
||||
subscribe,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn serve(
|
||||
mut self, /*, incoming: I */
|
||||
) -> Result<(), Box<dyn Error + Send + Sync + 'static>> {
|
||||
let svc = InstrumentServer::new(self);
|
||||
|
||||
tonic::transport::Server::builder()
|
||||
.add_service(svc)
|
||||
.serve(SocketAddr::new(Self::DEFAULT_ADDR, Self::DEFAULT_PORT))
|
||||
.compat()
|
||||
.await?;
|
||||
|
||||
// TODO: Kill the aggregator task if the serve task has ended.
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Watch<T>(pub(crate) Sender<Result<T, tonic::Status>>);
|
||||
impl<T: Clone> Watch<T> {
|
||||
pub fn update(&self, update: &T) -> bool {
|
||||
self.0.try_send(Ok(update.clone())).is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct WatchRequest<T> {
|
||||
pub id: Id,
|
||||
pub stream_sender: async_oneshot::Sender<Receiver<Result<T, tonic::Status>>>,
|
||||
pub buffer: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum Command {
|
||||
Instrument(Watch<instrument::Update>),
|
||||
WatchTaskDetail(WatchRequest<tasks::TaskDetails>),
|
||||
Pause,
|
||||
Resume,
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl Instrument for Server {
|
||||
type WatchUpdatesStream = async_channel::Receiver<Result<instrument::Update, Status>>;
|
||||
|
||||
async fn watch_updates(
|
||||
&self,
|
||||
request: tonic::Request<instrument::InstrumentRequest>,
|
||||
) -> Result<tonic::Response<Self::WatchUpdatesStream>, tonic::Status> {
|
||||
match request.remote_addr() {
|
||||
Some(addr) => tracing::debug!(client.addr = %addr, "starting a new watch"),
|
||||
None => tracing::debug!(client.addr = %"<unknown>", "starting a new watch"),
|
||||
}
|
||||
|
||||
if !self.subscribe.is_full() {
|
||||
let (tx, rx) = async_channel::bounded(self.client_buffer_size);
|
||||
self.subscribe.send(Command::Instrument(Watch(tx))).await;
|
||||
tracing::debug!("watch started");
|
||||
Ok(tonic::Response::new(rx))
|
||||
} else {
|
||||
Err(tonic::Status::internal(
|
||||
"cannot start new watch, aggregation task is not running",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
type WatchTaskDetailsStream = async_channel::Receiver<Result<tasks::TaskDetails, Status>>;
|
||||
|
||||
async fn watch_task_details(
|
||||
&self,
|
||||
request: tonic::Request<instrument::TaskDetailsRequest>,
|
||||
) -> Result<tonic::Response<Self::WatchTaskDetailsStream>, tonic::Status> {
|
||||
let task_id = request
|
||||
.into_inner()
|
||||
.id
|
||||
.ok_or_else(|| tonic::Status::invalid_argument("missing task_id"))?
|
||||
.id;
|
||||
|
||||
// `tracing` reserves span ID 0 for niche optimization for `Option<Id>`.
|
||||
let id = std::num::NonZeroU64::new(task_id)
|
||||
.map(Id::from_non_zero_u64)
|
||||
.ok_or_else(|| tonic::Status::invalid_argument("task_id cannot be 0"))?;
|
||||
|
||||
if !self.subscribe.is_full() {
|
||||
// Check with the aggregator task to request a stream if the task exists.
|
||||
let (stream_sender, stream_recv) = async_oneshot::oneshot();
|
||||
self.subscribe
|
||||
.send(Command::WatchTaskDetail(WatchRequest {
|
||||
id,
|
||||
stream_sender,
|
||||
buffer: self.client_buffer_size,
|
||||
}))
|
||||
.await;
|
||||
// If the aggregator drops the sender, the task doesn't exist.
|
||||
let rx = stream_recv.await.map_err(|_| {
|
||||
tracing::warn!(id = ?task_id, "requested task not found");
|
||||
tonic::Status::not_found("task not found")
|
||||
})?;
|
||||
|
||||
tracing::debug!(id = ?task_id, "task details watch started");
|
||||
Ok(tonic::Response::new(rx))
|
||||
} else {
|
||||
Err(tonic::Status::internal(
|
||||
"cannot start new watch, aggregation task is not running",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
async fn pause(
|
||||
&self,
|
||||
_request: tonic::Request<instrument::PauseRequest>,
|
||||
) -> Result<tonic::Response<instrument::PauseResponse>, tonic::Status> {
|
||||
self.subscribe.send(Command::Pause).await.map_err(|_| {
|
||||
tonic::Status::internal("cannot pause, aggregation task is not running")
|
||||
})?;
|
||||
Ok(tonic::Response::new(instrument::PauseResponse {}))
|
||||
}
|
||||
|
||||
async fn resume(
|
||||
&self,
|
||||
_request: tonic::Request<instrument::ResumeRequest>,
|
||||
) -> Result<tonic::Response<instrument::ResumeResponse>, tonic::Status> {
|
||||
self.subscribe.send(Command::Resume).await.map_err(|_| {
|
||||
tonic::Status::internal("cannot resume, aggregation task is not running")
|
||||
})?;
|
||||
Ok(tonic::Response::new(instrument::ResumeResponse {}))
|
||||
}
|
||||
}
|
64
runtime/console/src/stack.rs
Normal file
64
runtime/console/src/stack.rs
Normal file
@ -0,0 +1,64 @@
|
||||
use tracing_core::span::Id;
|
||||
|
||||
// This has been copied from tracing-subscriber. Once the library adds
|
||||
// the ability to iterate over entered spans, this code will
|
||||
// no longer be needed here
|
||||
//
|
||||
// https://github.com/tokio-rs/tracing/blob/master/tracing-subscriber/src/registry/stack.rs
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ContextId {
|
||||
id: Id,
|
||||
duplicate: bool,
|
||||
}
|
||||
|
||||
impl ContextId {
|
||||
pub fn id(&self) -> &Id {
|
||||
&self.id
|
||||
}
|
||||
}
|
||||
|
||||
/// `SpanStack` tracks what spans are currently executing on a thread-local basis.
|
||||
///
|
||||
/// A "separate current span" for each thread is a semantic choice, as each span
|
||||
/// can be executing in a different thread.
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct SpanStack {
|
||||
stack: Vec<ContextId>,
|
||||
}
|
||||
|
||||
impl SpanStack {
|
||||
#[inline]
|
||||
pub(crate) fn push(&mut self, id: Id) -> bool {
|
||||
let duplicate = self.stack.iter().any(|i| i.id == id);
|
||||
self.stack.push(ContextId { id, duplicate });
|
||||
!duplicate
|
||||
}
|
||||
|
||||
/// Pop a currently entered span.
|
||||
///
|
||||
/// Returns `true` if the span was actually exited.
|
||||
#[inline]
|
||||
pub(crate) fn pop(&mut self, expected_id: &Id) -> bool {
|
||||
if let Some((idx, _)) = self
|
||||
.stack
|
||||
.iter()
|
||||
.enumerate()
|
||||
.rev()
|
||||
.find(|(_, ctx_id)| ctx_id.id == *expected_id)
|
||||
{
|
||||
let ContextId { id: _, duplicate } = self.stack.remove(idx);
|
||||
return !duplicate;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn iter(&self) -> impl Iterator<Item = &Id> {
|
||||
self.stack
|
||||
.iter()
|
||||
.filter_map(|ContextId { id, duplicate }| if *duplicate { None } else { Some(id) })
|
||||
}
|
||||
|
||||
pub(crate) fn stack(&self) -> &Vec<ContextId> {
|
||||
&self.stack
|
||||
}
|
||||
}
|
610
runtime/console/src/stats.rs
Normal file
610
runtime/console/src/stats.rs
Normal file
@ -0,0 +1,610 @@
|
||||
use crate::id_map::ToProto;
|
||||
use crate::{attribute, event};
|
||||
use crossbeam_utils::atomic::AtomicCell;
|
||||
use hdrhistogram::serialization::{Serializer, V2Serializer};
|
||||
use std::cmp;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
use tracing::span;
|
||||
|
||||
/// A type which records whether it has unsent updates.
|
||||
///
|
||||
/// If something implementing this trait has been changed since the last time
|
||||
/// data was sent to a client, it will indicate that it is "dirty". If it has
|
||||
/// not been changed, it does not have to be included in the current update.
|
||||
pub(crate) trait Unsent {
|
||||
/// Returns `true` if this type has unsent updates, and if it does, clears
|
||||
/// the flag indicating there are unsent updates.
|
||||
///
|
||||
/// This is called when filtering which stats need to be included in the
|
||||
/// current update. If this returns `true`, it will be included, so it
|
||||
/// becomes no longer dirty.
|
||||
fn take_unsent(&self) -> bool;
|
||||
|
||||
/// Returns `true` if this type has unsent updates, without changing the
|
||||
/// flag.
|
||||
fn is_unsent(&self) -> bool;
|
||||
}
|
||||
|
||||
// An entity (e.g Task, Resource) that at some point in
|
||||
// time can be dropped. This generally refers to spans that
|
||||
// have been closed indicating that a task, async op or a
|
||||
// resource is not in use anymore
|
||||
pub(crate) trait DroppedAt {
|
||||
fn dropped_at(&self) -> Option<Instant>;
|
||||
}
|
||||
|
||||
impl<T: DroppedAt> DroppedAt for Arc<T> {
|
||||
fn dropped_at(&self) -> Option<Instant> {
|
||||
T::dropped_at(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Unsent> Unsent for Arc<T> {
|
||||
fn take_unsent(&self) -> bool {
|
||||
T::take_unsent(self)
|
||||
}
|
||||
|
||||
fn is_unsent(&self) -> bool {
|
||||
T::is_unsent(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ToProto> ToProto for Arc<T> {
|
||||
type Output = T::Output;
|
||||
fn to_proto(&self, base_time: &TimeAnchor) -> T::Output {
|
||||
T::to_proto(self, base_time)
|
||||
}
|
||||
}
|
||||
|
||||
/// Anchors an `Instant` with a `SystemTime` timestamp to allow converting
|
||||
/// monotonic `Instant`s into timestamps that can be sent over the wire.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct TimeAnchor {
|
||||
mono: Instant,
|
||||
sys: SystemTime,
|
||||
}
|
||||
|
||||
impl TimeAnchor {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
mono: Instant::now(),
|
||||
sys: SystemTime::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn to_system_time(&self, t: Instant) -> SystemTime {
|
||||
let dur = t
|
||||
.checked_duration_since(self.mono)
|
||||
.unwrap_or_else(|| Duration::from_secs(0));
|
||||
self.sys + dur
|
||||
}
|
||||
|
||||
pub(crate) fn to_timestamp(&self, t: Instant) -> prost_types::Timestamp {
|
||||
self.to_system_time(t).into()
|
||||
}
|
||||
}
|
||||
|
||||
trait RecordPoll {
|
||||
fn record_poll_duration(&mut self, duration: Duration);
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct PollStats<H> {
|
||||
/// The number of polls in progress
|
||||
current_polls: AtomicUsize,
|
||||
/// The total number of polls
|
||||
polls: AtomicUsize,
|
||||
timestamps: Mutex<PollTimestamps<H>>,
|
||||
}
|
||||
|
||||
impl<H: RecordPoll> PollStats<H> {
|
||||
fn start_poll(&self, at: Instant) {
|
||||
if self.current_polls.fetch_add(1, Ordering::AcqRel) == 0 {
|
||||
// We are starting the first poll
|
||||
let mut timestamps = self.timestamps.lock().unwrap();
|
||||
if timestamps.first_poll.is_none() {
|
||||
timestamps.first_poll = Some(at);
|
||||
}
|
||||
|
||||
timestamps.last_poll_started = Some(at);
|
||||
|
||||
self.polls.fetch_add(1, Ordering::Release);
|
||||
}
|
||||
}
|
||||
|
||||
fn end_poll(&self, at: Instant) {
|
||||
// Are we ending the last current poll?
|
||||
if self.current_polls.fetch_sub(1, Ordering::AcqRel) > 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut timestamps = self.timestamps.lock().unwrap();
|
||||
let started = match timestamps.last_poll_started {
|
||||
Some(last_poll) => last_poll,
|
||||
None => {
|
||||
eprintln!(
|
||||
"a poll ended, but start timestamp was recorded. \
|
||||
this is probably a `console-subscriber` bug"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
timestamps.last_poll_ended = Some(at);
|
||||
let elapsed = match at.checked_duration_since(started) {
|
||||
Some(elapsed) => elapsed,
|
||||
None => {
|
||||
eprintln!(
|
||||
"possible Instant clock skew detected: a poll's end timestamp \
|
||||
was before its start timestamp\nstart = {:?}\n end = {:?}",
|
||||
started, at
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// if we have a poll time histogram, add the timestamp
|
||||
timestamps.histogram.record_poll_duration(elapsed);
|
||||
|
||||
timestamps.busy_time += elapsed;
|
||||
}
|
||||
}
|
||||
|
||||
impl<H> ToProto for PollStats<H> {
|
||||
type Output = console_api::PollStats;
|
||||
|
||||
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
|
||||
let timestamps = self.timestamps.lock().unwrap();
|
||||
console_api::PollStats {
|
||||
polls: self.polls.load(Ordering::Acquire) as u64,
|
||||
first_poll: timestamps.first_poll.map(|at| base_time.to_timestamp(at)),
|
||||
last_poll_started: timestamps
|
||||
.last_poll_started
|
||||
.map(|at| base_time.to_timestamp(at)),
|
||||
last_poll_ended: timestamps
|
||||
.last_poll_ended
|
||||
.map(|at| base_time.to_timestamp(at)),
|
||||
busy_time: Some(timestamps.busy_time.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stats associated with a task.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct TaskStats {
|
||||
is_dirty: AtomicBool,
|
||||
is_dropped: AtomicBool,
|
||||
// task stats
|
||||
pub(crate) created_at: Instant,
|
||||
timestamps: Mutex<TaskTimestamps>,
|
||||
|
||||
// waker stats
|
||||
wakes: AtomicUsize,
|
||||
waker_clones: AtomicUsize,
|
||||
waker_drops: AtomicUsize,
|
||||
self_wakes: AtomicUsize,
|
||||
|
||||
/// Poll durations and other stats.
|
||||
poll_stats: PollStats<Histogram>,
|
||||
}
|
||||
|
||||
impl TaskStats {
|
||||
pub(crate) fn new(poll_duration_max: u64, created_at: Instant) -> Self {
|
||||
Self {
|
||||
is_dirty: AtomicBool::new(true),
|
||||
is_dropped: AtomicBool::new(false),
|
||||
created_at,
|
||||
timestamps: Mutex::new(TaskTimestamps::default()),
|
||||
poll_stats: PollStats {
|
||||
timestamps: Mutex::new(PollTimestamps {
|
||||
histogram: Histogram::new(poll_duration_max),
|
||||
first_poll: None,
|
||||
last_poll_started: None,
|
||||
last_poll_ended: None,
|
||||
busy_time: Duration::new(0, 0),
|
||||
}),
|
||||
current_polls: AtomicUsize::new(0),
|
||||
polls: AtomicUsize::new(0),
|
||||
},
|
||||
wakes: AtomicUsize::new(0),
|
||||
waker_clones: AtomicUsize::new(1),
|
||||
waker_drops: AtomicUsize::new(0),
|
||||
self_wakes: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn poll_duration_histogram(
|
||||
&self,
|
||||
) -> console_api::tasks::task_details::PollTimesHistogram {
|
||||
let hist = self
|
||||
.poll_stats
|
||||
.timestamps
|
||||
.lock()
|
||||
.unwrap()
|
||||
.histogram
|
||||
.to_proto();
|
||||
console_api::tasks::task_details::PollTimesHistogram::Histogram(hist)
|
||||
}
|
||||
|
||||
pub(crate) fn record_wake_op(&self, op: event::WakeOp, at: Instant) {
|
||||
use event::WakeOp;
|
||||
match op {
|
||||
WakeOp::Wake { self_wake } => {
|
||||
self.wake(at, self_wake);
|
||||
}
|
||||
WakeOp::WakeByRef { self_wake } => {
|
||||
self.wake(at, self_wake);
|
||||
}
|
||||
WakeOp::Clone => {
|
||||
self.waker_clones.fetch_add(1, Ordering::Release);
|
||||
}
|
||||
WakeOp::Drop => {
|
||||
self.waker_drops.fetch_add(1, Ordering::Release);
|
||||
}
|
||||
}
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
fn wake(&self, at: Instant, self_wake: bool) {
|
||||
let mut timestamps = self.timestamps.lock().unwrap();
|
||||
timestamps.last_wake = cmp::max(timestamps.last_wake, Some(at));
|
||||
if self_wake {
|
||||
self.wakes.fetch_add(1, Ordering::Release);
|
||||
}
|
||||
self.wakes.fetch_add(1, Ordering::Release);
|
||||
}
|
||||
|
||||
pub(crate) fn start_poll(&self, at: Instant) {
|
||||
self.poll_stats.start_poll(at);
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
pub(crate) fn end_poll(&self, at: Instant) {
|
||||
self.poll_stats.end_poll(at);
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
pub(crate) fn drop_task(&self, dropped_at: Instant) {
|
||||
if self.is_dropped.swap(true, Ordering::AcqRel) {
|
||||
// The task was already dropped.
|
||||
// TODO(eliza): this could maybe panic in debug mode...
|
||||
return;
|
||||
}
|
||||
|
||||
let mut timestamps = self.timestamps.lock().unwrap();
|
||||
let _prev = timestamps.dropped_at.replace(dropped_at);
|
||||
debug_assert_eq!(_prev, None, "tried to drop a task twice; this is a bug!");
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
fn make_dirty(&self) {
|
||||
self.is_dirty.swap(true, Ordering::AcqRel);
|
||||
}
|
||||
}
|
||||
|
||||
impl ToProto for TaskStats {
|
||||
type Output = console_api::tasks::Stats;
|
||||
|
||||
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
|
||||
let poll_stats = Some(self.poll_stats.to_proto(base_time));
|
||||
let timestamps = self.timestamps.lock().unwrap();
|
||||
console_api::tasks::Stats {
|
||||
poll_stats,
|
||||
created_at: Some(base_time.to_timestamp(self.created_at)),
|
||||
dropped_at: timestamps.dropped_at.map(|at| base_time.to_timestamp(at)),
|
||||
wakes: self.wakes.load(Ordering::Acquire) as u64,
|
||||
waker_clones: self.waker_clones.load(Ordering::Acquire) as u64,
|
||||
self_wakes: self.self_wakes.load(Ordering::Acquire) as u64,
|
||||
waker_drops: self.waker_drops.load(Ordering::Acquire) as u64,
|
||||
last_wake: timestamps.last_wake.map(|at| base_time.to_timestamp(at)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Unsent for TaskStats {
|
||||
#[inline]
|
||||
fn take_unsent(&self) -> bool {
|
||||
self.is_dirty.swap(false, Ordering::AcqRel)
|
||||
}
|
||||
|
||||
fn is_unsent(&self) -> bool {
|
||||
self.is_dirty.load(Ordering::Acquire)
|
||||
}
|
||||
}
|
||||
|
||||
impl DroppedAt for TaskStats {
|
||||
fn dropped_at(&self) -> Option<Instant> {
|
||||
// avoid acquiring the lock if we know we haven't tried to drop this
|
||||
// thing yet
|
||||
if self.is_dropped.load(Ordering::Acquire) {
|
||||
return self.timestamps.lock().unwrap().dropped_at;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Stats associated with an async operation.
|
||||
///
|
||||
/// This shares all of the same fields as [`ResourceStats]`, with the addition
|
||||
/// of [`PollStats`] tracking when the async operation is polled, and the task
|
||||
/// ID of the last task to poll the async op.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct AsyncOpStats {
|
||||
/// The task ID of the last task to poll this async op.
|
||||
///
|
||||
/// This is set every time the async op is polled, in case a future is
|
||||
/// passed between tasks.
|
||||
task_id: AtomicCell<u64>,
|
||||
|
||||
/// Fields shared with `ResourceStats`.
|
||||
pub(crate) stats: ResourceStats,
|
||||
|
||||
/// Poll durations and other stats.
|
||||
poll_stats: PollStats<()>,
|
||||
}
|
||||
|
||||
impl AsyncOpStats {
|
||||
pub(crate) fn new(
|
||||
created_at: Instant,
|
||||
inherit_child_attributes: bool,
|
||||
parent_id: Option<span::Id>,
|
||||
) -> Self {
|
||||
Self {
|
||||
task_id: AtomicCell::new(0),
|
||||
stats: ResourceStats::new(created_at, inherit_child_attributes, parent_id),
|
||||
poll_stats: PollStats::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn task_id(&self) -> Option<u64> {
|
||||
let id = self.task_id.load();
|
||||
if id > 0 {
|
||||
Some(id as u64)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn set_task_id(&self, id: &tracing::span::Id) {
|
||||
self.task_id.store(id.into_u64());
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
pub(crate) fn drop_async_op(&self, dropped_at: Instant) {
|
||||
self.stats.drop_resource(dropped_at)
|
||||
}
|
||||
|
||||
pub(crate) fn start_poll(&self, at: Instant) {
|
||||
self.poll_stats.start_poll(at);
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
pub(crate) fn end_poll(&self, at: Instant) {
|
||||
self.poll_stats.end_poll(at);
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn make_dirty(&self) {
|
||||
self.stats.make_dirty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Unsent for AsyncOpStats {
|
||||
#[inline]
|
||||
fn take_unsent(&self) -> bool {
|
||||
self.stats.take_unsent()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_unsent(&self) -> bool {
|
||||
self.stats.is_unsent()
|
||||
}
|
||||
}
|
||||
|
||||
impl DroppedAt for AsyncOpStats {
|
||||
fn dropped_at(&self) -> Option<Instant> {
|
||||
self.stats.dropped_at()
|
||||
}
|
||||
}
|
||||
|
||||
impl ToProto for AsyncOpStats {
|
||||
type Output = console_api::async_ops::Stats;
|
||||
|
||||
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
|
||||
let attributes = self
|
||||
.stats
|
||||
.attributes
|
||||
.lock()
|
||||
.unwrap()
|
||||
.values()
|
||||
.cloned()
|
||||
.collect();
|
||||
console_api::async_ops::Stats {
|
||||
poll_stats: Some(self.poll_stats.to_proto(base_time)),
|
||||
created_at: Some(base_time.to_timestamp(self.stats.created_at)),
|
||||
dropped_at: self
|
||||
.stats
|
||||
.dropped_at
|
||||
.lock()
|
||||
.unwrap()
|
||||
.map(|at| base_time.to_timestamp(at)),
|
||||
task_id: self.task_id().map(Into::into),
|
||||
attributes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stats associated with a resource.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ResourceStats {
|
||||
is_dirty: AtomicBool,
|
||||
is_dropped: AtomicBool,
|
||||
created_at: Instant,
|
||||
dropped_at: Mutex<Option<Instant>>,
|
||||
attributes: Mutex<attribute::Attributes>,
|
||||
pub(crate) inherit_child_attributes: bool,
|
||||
pub(crate) parent_id: Option<span::Id>,
|
||||
}
|
||||
|
||||
impl ResourceStats {
|
||||
pub(crate) fn new(
|
||||
created_at: Instant,
|
||||
inherit_child_attributes: bool,
|
||||
parent_id: Option<span::Id>,
|
||||
) -> Self {
|
||||
Self {
|
||||
is_dirty: AtomicBool::new(true),
|
||||
is_dropped: AtomicBool::new(false),
|
||||
created_at,
|
||||
dropped_at: Mutex::new(None),
|
||||
attributes: Default::default(),
|
||||
inherit_child_attributes,
|
||||
parent_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn update_attribute(&self, id: &span::Id, update: &attribute::Update) {
|
||||
self.attributes.lock().unwrap().update(id, update);
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn drop_resource(&self, dropped_at: Instant) {
|
||||
if self.is_dropped.swap(true, Ordering::AcqRel) {
|
||||
// The task was already dropped.
|
||||
// TODO(eliza): this could maybe panic in debug mode...
|
||||
return;
|
||||
}
|
||||
|
||||
let mut timestamp = self.dropped_at.lock().unwrap();
|
||||
let _prev = timestamp.replace(dropped_at);
|
||||
debug_assert_eq!(
|
||||
_prev, None,
|
||||
"tried to drop a resource/async op twice; this is a bug!"
|
||||
);
|
||||
self.make_dirty();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn make_dirty(&self) {
|
||||
self.is_dirty.swap(true, Ordering::AcqRel);
|
||||
}
|
||||
}
|
||||
|
||||
impl Unsent for ResourceStats {
|
||||
#[inline]
|
||||
fn take_unsent(&self) -> bool {
|
||||
self.is_dirty.swap(false, Ordering::AcqRel)
|
||||
}
|
||||
|
||||
fn is_unsent(&self) -> bool {
|
||||
self.is_dirty.load(Ordering::Acquire)
|
||||
}
|
||||
}
|
||||
|
||||
impl DroppedAt for ResourceStats {
|
||||
fn dropped_at(&self) -> Option<Instant> {
|
||||
// avoid acquiring the lock if we know we haven't tried to drop this
|
||||
// thing yet
|
||||
if self.is_dropped.load(Ordering::Acquire) {
|
||||
return *self.dropped_at.lock().unwrap();
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl ToProto for ResourceStats {
|
||||
type Output = console_api::resources::Stats;
|
||||
|
||||
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
|
||||
let attributes = self.attributes.lock().unwrap().values().cloned().collect();
|
||||
console_api::resources::Stats {
|
||||
created_at: Some(base_time.to_timestamp(self.created_at)),
|
||||
dropped_at: self
|
||||
.dropped_at
|
||||
.lock()
|
||||
.unwrap()
|
||||
.map(|at| base_time.to_timestamp(at)),
|
||||
attributes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct TaskTimestamps {
|
||||
dropped_at: Option<Instant>,
|
||||
last_wake: Option<Instant>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct PollTimestamps<H> {
|
||||
first_poll: Option<Instant>,
|
||||
last_poll_started: Option<Instant>,
|
||||
last_poll_ended: Option<Instant>,
|
||||
busy_time: Duration,
|
||||
histogram: H,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Histogram {
|
||||
histogram: hdrhistogram::Histogram<u64>,
|
||||
max: u64,
|
||||
outliers: u64,
|
||||
max_outlier: Option<u64>,
|
||||
}
|
||||
|
||||
impl Histogram {
|
||||
fn new(max: u64) -> Self {
|
||||
// significant figures should be in the [0-5] range and memory usage
|
||||
// grows exponentially with higher a sigfig
|
||||
let histogram = hdrhistogram::Histogram::new_with_max(max, 2).unwrap();
|
||||
Self {
|
||||
histogram,
|
||||
max,
|
||||
max_outlier: None,
|
||||
outliers: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_proto(&self) -> console_api::tasks::DurationHistogram {
|
||||
let mut serializer = V2Serializer::new();
|
||||
let mut raw_histogram = Vec::new();
|
||||
serializer
|
||||
.serialize(&self.histogram, &mut raw_histogram)
|
||||
.expect("histogram failed to serialize");
|
||||
console_api::tasks::DurationHistogram {
|
||||
raw_histogram,
|
||||
max_value: self.max,
|
||||
high_outliers: self.outliers,
|
||||
highest_outlier: self.max_outlier,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordPoll for Histogram {
|
||||
fn record_poll_duration(&mut self, duration: Duration) {
|
||||
let mut duration_ns = duration.as_nanos() as u64;
|
||||
|
||||
// clamp the duration to the histogram's max value
|
||||
if duration_ns > self.max {
|
||||
self.outliers += 1;
|
||||
self.max_outlier = cmp::max(self.max_outlier, Some(duration_ns));
|
||||
duration_ns = self.max;
|
||||
}
|
||||
|
||||
self.histogram
|
||||
.record(duration_ns)
|
||||
.expect("duration has already been clamped to histogram max value")
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordPoll for () {
|
||||
fn record_poll_duration(&mut self, _: Duration) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
535
runtime/console/src/visitors.rs
Normal file
535
runtime/console/src/visitors.rs
Normal file
@ -0,0 +1,535 @@
|
||||
use crate::{attribute, event};
|
||||
use console_api::resources::resource;
|
||||
use tracing::{field, span};
|
||||
use tracing_core::field::Visit;
|
||||
|
||||
const LOCATION_FILE: &str = "loc.file";
|
||||
const LOCATION_LINE: &str = "loc.line";
|
||||
const LOCATION_COLUMN: &str = "loc.col";
|
||||
const INHERIT_FIELD_NAME: &str = "inherits_child_attrs";
|
||||
|
||||
/// Used to extract the fields needed to construct
|
||||
/// an Event::Resource from the metadata of a tracing span
|
||||
/// that has the following shape:
|
||||
///
|
||||
/// tracing::trace_span!(
|
||||
/// "runtime.resource",
|
||||
/// concrete_type = "Sleep",
|
||||
/// kind = "timer",
|
||||
/// is_internal = true,
|
||||
/// inherits_child_attrs = true,
|
||||
/// );
|
||||
///
|
||||
/// Fields:
|
||||
/// concrete_type - indicates the concrete rust type for this resource
|
||||
/// kind - indicates the type of resource (i.e. timer, sync, io )
|
||||
/// is_internal - whether this is a resource type that is not exposed publicly (i.e. BatchSemaphore)
|
||||
/// inherits_child_attrs - whether this resource should inherit the state attributes of its children
|
||||
#[derive(Default)]
|
||||
pub(crate) struct ResourceVisitor {
|
||||
concrete_type: Option<String>,
|
||||
kind: Option<resource::Kind>,
|
||||
is_internal: bool,
|
||||
inherit_child_attrs: bool,
|
||||
line: Option<u32>,
|
||||
file: Option<String>,
|
||||
column: Option<u32>,
|
||||
}
|
||||
|
||||
pub(crate) struct ResourceVisitorResult {
|
||||
pub(crate) concrete_type: String,
|
||||
pub(crate) kind: resource::Kind,
|
||||
pub(crate) location: Option<console_api::Location>,
|
||||
pub(crate) is_internal: bool,
|
||||
pub(crate) inherit_child_attrs: bool,
|
||||
}
|
||||
|
||||
/// Used to extract all fields from the metadata
|
||||
/// of a tracing span
|
||||
pub(crate) struct FieldVisitor {
|
||||
fields: Vec<console_api::Field>,
|
||||
meta_id: console_api::MetaId,
|
||||
}
|
||||
|
||||
/// Used to extract the fields needed to construct
|
||||
/// an `Event::Spawn` from the metadata of a tracing span
|
||||
/// that has the following shape:
|
||||
///
|
||||
/// ```
|
||||
/// tracing::trace_span!(
|
||||
/// target: "tokio::task",
|
||||
/// "runtime.spawn",
|
||||
/// kind = "local",
|
||||
/// task.name = "some_name",
|
||||
/// loc.file = "some_file.rs",
|
||||
/// loc.line = 555,
|
||||
/// loc.col = 5,
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// This visitor has special behavior for `loc.line`, `loc.file`, and `loc.col`
|
||||
/// fields, which are interpreted as a Rust source code location where the task
|
||||
/// was spawned, if they are present. Other fields are recorded as arbitrary
|
||||
/// key-value pairs.
|
||||
pub(crate) struct TaskVisitor {
|
||||
field_visitor: FieldVisitor,
|
||||
line: Option<u32>,
|
||||
file: Option<String>,
|
||||
column: Option<u32>,
|
||||
}
|
||||
|
||||
/// Used to extract the fields needed to construct
|
||||
/// an Event::AsyncOp from the metadata of a tracing span
|
||||
/// that has the following shape:
|
||||
///
|
||||
/// tracing::trace_span!(
|
||||
/// "runtime.resource.async_op",
|
||||
/// source = "Sleep::new_timeout",
|
||||
/// );
|
||||
///
|
||||
/// Fields:
|
||||
/// source - the method which has created an instance of this async operation
|
||||
#[derive(Default)]
|
||||
pub(crate) struct AsyncOpVisitor {
|
||||
source: Option<String>,
|
||||
inherit_child_attrs: bool,
|
||||
}
|
||||
|
||||
/// Used to extract the fields needed to construct
|
||||
/// an Event::Waker from the metadata of a tracing span
|
||||
/// that has the following shape:
|
||||
///
|
||||
/// tracing::trace!(
|
||||
/// target: "tokio::task::waker",
|
||||
/// op = "waker.clone",
|
||||
/// task.id = id.into_u64(),
|
||||
/// );
|
||||
///
|
||||
/// Fields:
|
||||
/// task.id - the id of the task this waker will wake
|
||||
/// op - the operation associated with this waker event
|
||||
#[derive(Default)]
|
||||
pub(crate) struct WakerVisitor {
|
||||
id: Option<span::Id>,
|
||||
op: Option<event::WakeOp>,
|
||||
}
|
||||
|
||||
/// Used to extract the fields needed to construct
|
||||
/// an Event::PollOp from the metadata of a tracing event
|
||||
/// that has the following shape:
|
||||
///
|
||||
/// tracing::trace!(
|
||||
/// target: "runtime::resource::poll_op",
|
||||
/// op_name = "poll_elapsed",
|
||||
/// readiness = "pending"
|
||||
/// );
|
||||
///
|
||||
/// Fields:
|
||||
/// op_name - the name of this resource poll operation
|
||||
/// readiness - the result of invoking this poll op, describing its readiness
|
||||
#[derive(Default)]
|
||||
pub(crate) struct PollOpVisitor {
|
||||
op_name: Option<String>,
|
||||
is_ready: Option<bool>,
|
||||
}
|
||||
|
||||
/// Used to extract the fields needed to construct
|
||||
/// an Event::StateUpdate from the metadata of a tracing event
|
||||
/// that has the following shape:
|
||||
///
|
||||
/// tracing::trace!(
|
||||
/// target: "runtime::resource::state_update",
|
||||
/// duration = duration,
|
||||
/// duration.unit = "ms",
|
||||
/// duration.op = "override",
|
||||
/// );
|
||||
///
|
||||
/// Fields:
|
||||
/// attribute_name - a field value for a field that has the name of the resource attribute being updated
|
||||
/// value - the value for this update
|
||||
/// unit - the unit for the value being updated (e.g. ms, s, bytes)
|
||||
/// op - the operation that this update performs to the value of the resource attribute (one of: ovr, sub, add)
|
||||
pub(crate) struct StateUpdateVisitor {
|
||||
meta_id: console_api::MetaId,
|
||||
field: Option<console_api::Field>,
|
||||
unit: Option<String>,
|
||||
op: Option<attribute::UpdateOp>,
|
||||
}
|
||||
|
||||
impl ResourceVisitor {
|
||||
pub(crate) const RES_SPAN_NAME: &'static str = "runtime.resource";
|
||||
const RES_CONCRETE_TYPE_FIELD_NAME: &'static str = "concrete_type";
|
||||
const RES_VIZ_FIELD_NAME: &'static str = "is_internal";
|
||||
const RES_KIND_FIELD_NAME: &'static str = "kind";
|
||||
const RES_KIND_TIMER: &'static str = "timer";
|
||||
|
||||
pub(crate) fn result(self) -> Option<ResourceVisitorResult> {
|
||||
let concrete_type = self.concrete_type?;
|
||||
let kind = self.kind?;
|
||||
|
||||
let location = if self.file.is_some() && self.line.is_some() && self.column.is_some() {
|
||||
Some(console_api::Location {
|
||||
file: self.file,
|
||||
line: self.line,
|
||||
column: self.column,
|
||||
..Default::default()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Some(ResourceVisitorResult {
|
||||
concrete_type,
|
||||
kind,
|
||||
location,
|
||||
is_internal: self.is_internal,
|
||||
inherit_child_attrs: self.inherit_child_attrs,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Visit for ResourceVisitor {
|
||||
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {}
|
||||
|
||||
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
|
||||
match field.name() {
|
||||
Self::RES_CONCRETE_TYPE_FIELD_NAME => self.concrete_type = Some(value.to_string()),
|
||||
Self::RES_KIND_FIELD_NAME => {
|
||||
let kind = Some(match value {
|
||||
Self::RES_KIND_TIMER => {
|
||||
resource::kind::Kind::Known(resource::kind::Known::Timer as i32)
|
||||
}
|
||||
other => resource::kind::Kind::Other(other.to_string()),
|
||||
});
|
||||
self.kind = Some(resource::Kind { kind });
|
||||
}
|
||||
LOCATION_FILE => self.file = Some(value.to_string()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
|
||||
match field.name() {
|
||||
Self::RES_VIZ_FIELD_NAME => self.is_internal = value,
|
||||
INHERIT_FIELD_NAME => self.inherit_child_attrs = value,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
|
||||
match field.name() {
|
||||
LOCATION_LINE => self.line = Some(value as u32),
|
||||
LOCATION_COLUMN => self.column = Some(value as u32),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldVisitor {
|
||||
pub(crate) fn new(meta_id: console_api::MetaId) -> Self {
|
||||
FieldVisitor {
|
||||
fields: Vec::default(),
|
||||
meta_id,
|
||||
}
|
||||
}
|
||||
pub(crate) fn result(self) -> Vec<console_api::Field> {
|
||||
self.fields
|
||||
}
|
||||
}
|
||||
|
||||
impl TaskVisitor {
|
||||
pub(crate) const SPAWN_TARGET: &'static str = "executor::task";
|
||||
pub(crate) const SPAWN_NAME: &'static str = "runtime.spawn";
|
||||
|
||||
pub(crate) fn new(meta_id: console_api::MetaId) -> Self {
|
||||
TaskVisitor {
|
||||
field_visitor: FieldVisitor::new(meta_id),
|
||||
line: None,
|
||||
file: None,
|
||||
column: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn result(self) -> (Vec<console_api::Field>, Option<console_api::Location>) {
|
||||
let fields = self.field_visitor.result();
|
||||
let location = if self.file.is_some() && self.line.is_some() && self.column.is_some() {
|
||||
Some(console_api::Location {
|
||||
file: self.file,
|
||||
line: self.line,
|
||||
column: self.column,
|
||||
..Default::default()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
(fields, location)
|
||||
}
|
||||
}
|
||||
|
||||
impl Visit for TaskVisitor {
|
||||
fn record_debug(&mut self, field: &field::Field, value: &dyn std::fmt::Debug) {
|
||||
self.field_visitor.record_debug(field, value);
|
||||
}
|
||||
|
||||
fn record_i64(&mut self, field: &tracing_core::Field, value: i64) {
|
||||
self.field_visitor.record_i64(field, value);
|
||||
}
|
||||
|
||||
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
|
||||
match field.name() {
|
||||
LOCATION_LINE => self.line = Some(value as u32),
|
||||
LOCATION_COLUMN => self.column = Some(value as u32),
|
||||
_ => self.field_visitor.record_u64(field, value),
|
||||
}
|
||||
}
|
||||
|
||||
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
|
||||
self.field_visitor.record_bool(field, value);
|
||||
}
|
||||
|
||||
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
|
||||
if field.name() == LOCATION_FILE {
|
||||
self.file = Some(value.to_string());
|
||||
} else {
|
||||
self.field_visitor.record_str(field, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Visit for FieldVisitor {
|
||||
fn record_debug(&mut self, field: &field::Field, value: &dyn std::fmt::Debug) {
|
||||
self.fields.push(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
fn record_i64(&mut self, field: &tracing_core::Field, value: i64) {
|
||||
self.fields.push(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
|
||||
self.fields.push(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
|
||||
self.fields.push(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
|
||||
self.fields.push(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncOpVisitor {
|
||||
pub(crate) const ASYNC_OP_SPAN_NAME: &'static str = "runtime.resource.async_op";
|
||||
pub(crate) const ASYNC_OP_POLL_NAME: &'static str = "runtime.resource.async_op.poll";
|
||||
const ASYNC_OP_SRC_FIELD_NAME: &'static str = "source";
|
||||
|
||||
pub(crate) fn result(self) -> Option<(String, bool)> {
|
||||
let inherit = self.inherit_child_attrs;
|
||||
self.source.map(|s| (s, inherit))
|
||||
}
|
||||
}
|
||||
|
||||
impl Visit for AsyncOpVisitor {
|
||||
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {}
|
||||
|
||||
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
|
||||
if field.name() == Self::ASYNC_OP_SRC_FIELD_NAME {
|
||||
self.source = Some(value.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
|
||||
if field.name() == INHERIT_FIELD_NAME {
|
||||
self.inherit_child_attrs = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WakerVisitor {
|
||||
pub(crate) const WAKER_EVENT_TARGET: &'static str = "executor::waker";
|
||||
|
||||
const WAKE: &'static str = "waker.wake";
|
||||
const WAKE_BY_REF: &'static str = "waker.wake_by_ref";
|
||||
const CLONE: &'static str = "waker.clone";
|
||||
const DROP: &'static str = "waker.drop";
|
||||
const TASK_ID_FIELD_NAME: &'static str = "task.id";
|
||||
|
||||
pub(crate) fn result(self) -> Option<(span::Id, event::WakeOp)> {
|
||||
self.id.zip(self.op)
|
||||
}
|
||||
}
|
||||
|
||||
impl Visit for WakerVisitor {
|
||||
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {
|
||||
// don't care (yet?)
|
||||
}
|
||||
|
||||
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
|
||||
if field.name() == Self::TASK_ID_FIELD_NAME {
|
||||
self.id = Some(span::Id::from_u64(value));
|
||||
}
|
||||
}
|
||||
|
||||
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
|
||||
use crate::event::WakeOp;
|
||||
if field.name() == "op" {
|
||||
self.op = Some(match value {
|
||||
Self::WAKE => WakeOp::Wake { self_wake: false },
|
||||
Self::WAKE_BY_REF => WakeOp::WakeByRef { self_wake: false },
|
||||
Self::CLONE => WakeOp::Clone,
|
||||
Self::DROP => WakeOp::Drop,
|
||||
_ => return,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PollOpVisitor {
|
||||
pub(crate) const POLL_OP_EVENT_TARGET: &'static str = "runtime::resource::poll_op";
|
||||
const OP_NAME_FIELD_NAME: &'static str = "op_name";
|
||||
const OP_READINESS_FIELD_NAME: &'static str = "is_ready";
|
||||
|
||||
pub(crate) fn result(self) -> Option<(String, bool)> {
|
||||
let op_name = self.op_name?;
|
||||
let is_ready = self.is_ready?;
|
||||
Some((op_name, is_ready))
|
||||
}
|
||||
}
|
||||
|
||||
impl Visit for PollOpVisitor {
|
||||
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {}
|
||||
|
||||
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
|
||||
if field.name() == Self::OP_READINESS_FIELD_NAME {
|
||||
self.is_ready = Some(value)
|
||||
}
|
||||
}
|
||||
|
||||
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
|
||||
if field.name() == Self::OP_NAME_FIELD_NAME {
|
||||
self.op_name = Some(value.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StateUpdateVisitor {
|
||||
pub(crate) const RE_STATE_UPDATE_EVENT_TARGET: &'static str = "runtime::resource::state_update";
|
||||
pub(crate) const AO_STATE_UPDATE_EVENT_TARGET: &'static str =
|
||||
"runtime::resource::async_op::state_update";
|
||||
|
||||
const STATE_OP_SUFFIX: &'static str = ".op";
|
||||
const STATE_UNIT_SUFFIX: &'static str = ".unit";
|
||||
|
||||
const OP_ADD: &'static str = "add";
|
||||
const OP_SUB: &'static str = "sub";
|
||||
const OP_OVERRIDE: &'static str = "override";
|
||||
|
||||
pub(crate) fn new(meta_id: console_api::MetaId) -> Self {
|
||||
StateUpdateVisitor {
|
||||
meta_id,
|
||||
field: None,
|
||||
unit: None,
|
||||
op: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn result(self) -> Option<attribute::Update> {
|
||||
Some(attribute::Update {
|
||||
field: self.field?,
|
||||
op: self.op,
|
||||
unit: self.unit,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Visit for StateUpdateVisitor {
|
||||
fn record_debug(&mut self, field: &field::Field, value: &dyn std::fmt::Debug) {
|
||||
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
|
||||
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
|
||||
{
|
||||
self.field = Some(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn record_i64(&mut self, field: &field::Field, value: i64) {
|
||||
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
|
||||
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
|
||||
{
|
||||
self.field = Some(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn record_u64(&mut self, field: &field::Field, value: u64) {
|
||||
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
|
||||
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
|
||||
{
|
||||
self.field = Some(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn record_bool(&mut self, field: &field::Field, value: bool) {
|
||||
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
|
||||
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
|
||||
{
|
||||
self.field = Some(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn record_str(&mut self, field: &field::Field, value: &str) {
|
||||
if field.name().ends_with(Self::STATE_OP_SUFFIX) {
|
||||
match value {
|
||||
Self::OP_ADD => self.op = Some(attribute::UpdateOp::Add),
|
||||
Self::OP_SUB => self.op = Some(attribute::UpdateOp::Sub),
|
||||
Self::OP_OVERRIDE => self.op = Some(attribute::UpdateOp::Override),
|
||||
_ => {}
|
||||
};
|
||||
} else if field.name().ends_with(Self::STATE_UNIT_SUFFIX) {
|
||||
self.unit = Some(value.to_string());
|
||||
} else {
|
||||
self.field = Some(console_api::Field {
|
||||
name: Some(field.name().into()),
|
||||
value: Some(value.into()),
|
||||
metadata_id: Some(self.meta_id.clone()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
@ -39,14 +39,21 @@ lazy_static = "1.4"
|
||||
libc = "0.2"
|
||||
num_cpus = "1.13"
|
||||
pin-utils = "0.1.0"
|
||||
slab = "0.4"
|
||||
parking_lot = "0.12"
|
||||
|
||||
# Allocator
|
||||
arrayvec = { version = "0.7.0" }
|
||||
futures-timer = "3.0.2"
|
||||
once_cell = "1.4.0"
|
||||
tracing = "0.1.19"
|
||||
crossbeam-queue = "0.3.0"
|
||||
hdrhistogram = "7.5"
|
||||
|
||||
# Stats & Tracing
|
||||
tracing = "0.1"
|
||||
|
||||
# Supervision trees
|
||||
sharded-slab = "0.1"
|
||||
thread_local = "1.1"
|
||||
|
||||
[dev-dependencies]
|
||||
async-std = "1.10.0"
|
||||
|
@ -32,6 +32,7 @@ pub mod manage;
|
||||
pub mod placement;
|
||||
pub mod pool;
|
||||
pub mod run;
|
||||
mod supervision;
|
||||
mod thread_manager;
|
||||
mod worker;
|
||||
|
||||
@ -39,4 +40,6 @@ mod worker;
|
||||
/// Prelude of Bastion Executor
|
||||
pub mod prelude {
|
||||
pub use crate::pool::*;
|
||||
pub use crate::supervision::SupervisionRegistry;
|
||||
pub use lightproc::GroupId;
|
||||
}
|
||||
|
@ -8,11 +8,13 @@
|
||||
//! [`Worker`]: crate::run_queue::Worker
|
||||
|
||||
use crate::run::block;
|
||||
use crate::supervision::SupervisionRegistry;
|
||||
use crate::thread_manager::{DynamicRunner, ThreadManager};
|
||||
use crate::worker::{Sleeper, WorkerThread};
|
||||
use crossbeam_deque::{Injector, Stealer};
|
||||
use lightproc::lightproc::LightProc;
|
||||
use lightproc::recoverable_handle::RecoverableHandle;
|
||||
use lightproc::GroupId;
|
||||
use std::cell::Cell;
|
||||
use std::future::Future;
|
||||
use std::iter::Iterator;
|
||||
@ -20,6 +22,9 @@ use std::marker::PhantomData;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tracing::field::FieldSet;
|
||||
use tracing::metadata::Kind;
|
||||
use tracing::{Instrument, Level, Span};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Spooler<'a> {
|
||||
@ -45,12 +50,19 @@ impl Spooler<'_> {
|
||||
/// Global executor
|
||||
pub struct Executor<'a> {
|
||||
spooler: Arc<Spooler<'a>>,
|
||||
root_cgroup: GroupId,
|
||||
}
|
||||
|
||||
impl<'a, 'executor: 'a> Executor<'executor> {
|
||||
pub fn new() -> Self {
|
||||
let root_cgroup = SupervisionRegistry::with(|registry| {
|
||||
let cgroup = registry.new_root_group();
|
||||
registry.set_current(&cgroup);
|
||||
cgroup
|
||||
});
|
||||
Executor {
|
||||
spooler: Arc::new(Spooler::new()),
|
||||
root_cgroup,
|
||||
}
|
||||
}
|
||||
|
||||
@ -94,22 +106,75 @@ impl<'a, 'executor: 'a> Executor<'executor> {
|
||||
/// );
|
||||
/// # }
|
||||
/// ```
|
||||
#[track_caller]
|
||||
pub fn spawn<F, R>(&self, future: F) -> RecoverableHandle<R>
|
||||
where
|
||||
F: Future<Output = R> + Send + 'a,
|
||||
R: Send + 'a,
|
||||
{
|
||||
let (task, handle) = LightProc::recoverable(future, self.schedule());
|
||||
let location = std::panic::Location::caller();
|
||||
let cgroup = SupervisionRegistry::current();
|
||||
let id = cgroup.as_ref().map(|id| id.into_u64()).unwrap_or(0);
|
||||
let span = tracing::trace_span!(
|
||||
target: "executor::task",
|
||||
"runtime.spawn",
|
||||
loc.file = location.file(),
|
||||
loc.line = location.line(),
|
||||
loc.col = location.column(),
|
||||
kind = "global",
|
||||
cgroup = id,
|
||||
);
|
||||
|
||||
let (task, handle) = LightProc::recoverable(future, self.schedule(), span, cgroup);
|
||||
tracing::trace!("spawning sendable task");
|
||||
task.schedule();
|
||||
handle
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn spawn_local<F, R>(&self, future: F) -> RecoverableHandle<R>
|
||||
where
|
||||
F: Future<Output = R> + 'a,
|
||||
R: Send + 'a,
|
||||
{
|
||||
let (task, handle) = LightProc::recoverable(future, schedule_local());
|
||||
let location = std::panic::Location::caller();
|
||||
let cgroup = SupervisionRegistry::current();
|
||||
let id = cgroup.as_ref().map(|id| id.into_u64()).unwrap_or(0);
|
||||
let span = tracing::trace_span!(
|
||||
target: "executor::task",
|
||||
"runtime.spawn",
|
||||
loc.file = location.file(),
|
||||
loc.line = location.line(),
|
||||
loc.col = location.column(),
|
||||
kind = "local",
|
||||
cgroup = id,
|
||||
);
|
||||
|
||||
let (task, handle) = LightProc::recoverable(future, schedule_local(), span, cgroup);
|
||||
tracing::trace!("spawning sendable task");
|
||||
task.schedule();
|
||||
handle
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn spawn_local_cgroup<F, R>(&self, future: F, cgroup: GroupId) -> RecoverableHandle<R>
|
||||
where
|
||||
F: Future<Output = R> + 'a,
|
||||
R: Send + 'a,
|
||||
{
|
||||
let location = std::panic::Location::caller();
|
||||
let span = tracing::trace_span!(
|
||||
target: "executor::task",
|
||||
"runtime.spawn",
|
||||
loc.file = location.file(),
|
||||
loc.line = location.line(),
|
||||
loc.col = location.column(),
|
||||
kind = "local",
|
||||
cgroup = cgroup.into_u64(),
|
||||
);
|
||||
|
||||
let (task, handle) = LightProc::recoverable(future, schedule_local(), span, Some(cgroup));
|
||||
tracing::trace!("spawning sendable task");
|
||||
task.schedule();
|
||||
handle
|
||||
}
|
||||
|
179
runtime/executor/src/supervision.rs
Normal file
179
runtime/executor/src/supervision.rs
Normal file
@ -0,0 +1,179 @@
|
||||
use lightproc::GroupId;
|
||||
use once_cell::sync::OnceCell;
|
||||
use sharded_slab::pool::Ref;
|
||||
use sharded_slab::{Clear, Pool};
|
||||
use std::borrow::Borrow;
|
||||
use std::cell;
|
||||
use std::cell::RefCell;
|
||||
use std::sync::atomic::{fence, AtomicUsize, Ordering};
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
static REGISTRY: OnceCell<SupervisionRegistry> = OnceCell::new();
|
||||
|
||||
fn id_to_idx(id: &GroupId) -> usize {
|
||||
(id.into_u64() as usize).wrapping_sub(1)
|
||||
}
|
||||
|
||||
fn idx_to_id(idx: usize) -> GroupId {
|
||||
GroupId::from_u64(idx.wrapping_add(1) as u64)
|
||||
}
|
||||
|
||||
pub struct SupervisionRegistry {
|
||||
groups: Pool<GroupInner>,
|
||||
// TODO: would this be better as the full stack?
|
||||
current: ThreadLocal<RefCell<GroupId>>,
|
||||
}
|
||||
|
||||
impl SupervisionRegistry {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
groups: Pool::new(),
|
||||
current: ThreadLocal::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with<T>(f: impl FnOnce(&Self) -> T) -> T {
|
||||
let this = REGISTRY.get_or_init(SupervisionRegistry::new);
|
||||
f(&this)
|
||||
}
|
||||
|
||||
pub(crate) fn get(&self, id: &GroupId) -> Option<Ref<'_, GroupInner>> {
|
||||
self.groups.get(id_to_idx(id))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn current_ref(&self) -> Option<cell::Ref<GroupId>> {
|
||||
self.current.get().map(|c| c.borrow())
|
||||
}
|
||||
|
||||
pub fn current() -> Option<GroupId> {
|
||||
Self::with(|this| this.current_ref().map(|id| this.clone_group(&id)))
|
||||
}
|
||||
|
||||
pub(crate) fn set_current(&self, id: &GroupId) {
|
||||
self.current.get_or(|| RefCell::new(id.clone()));
|
||||
}
|
||||
|
||||
pub fn new_root_group(&self) -> GroupId {
|
||||
self.new_group_inner(None)
|
||||
}
|
||||
|
||||
pub fn new_group(&self) -> GroupId {
|
||||
let parent = self.current_ref().map(|id| self.clone_group(&id));
|
||||
self.new_group_inner(parent)
|
||||
}
|
||||
|
||||
fn new_group_inner(&self, parent: Option<GroupId>) -> GroupId {
|
||||
tracing::trace_span!(
|
||||
target: "executor::supervision",
|
||||
"new_group"
|
||||
);
|
||||
let parent_id = parent.as_ref().map(|id| id.into_non_zero_u64());
|
||||
let idx = self
|
||||
.groups
|
||||
.create_with(|group| {
|
||||
group.parent = parent;
|
||||
|
||||
let ref_cnt = group.ref_count.get_mut();
|
||||
debug_assert_eq!(0, *ref_cnt);
|
||||
*ref_cnt = 1;
|
||||
})
|
||||
.expect("Failed to allocate a new group");
|
||||
|
||||
let id = idx_to_id(idx);
|
||||
tracing::trace!(
|
||||
target: "executor::supervision",
|
||||
parent = parent_id,
|
||||
id = id.into_non_zero_u64(),
|
||||
"process group created"
|
||||
);
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
fn clone_group(&self, id: &GroupId) -> GroupId {
|
||||
tracing::trace!(
|
||||
target: "executor::supervision",
|
||||
id = id.into_u64(),
|
||||
"cloning process group"
|
||||
);
|
||||
let group = self
|
||||
.get(&id)
|
||||
.unwrap_or_else(|| panic!("tried to clone group {:?}, but no such group exists!", id));
|
||||
|
||||
let ref_cnt = group.ref_count.fetch_add(1, Ordering::Relaxed);
|
||||
assert_ne!(
|
||||
0, ref_cnt,
|
||||
"tried cloning group {:?} that was already closed",
|
||||
id
|
||||
);
|
||||
id.clone()
|
||||
}
|
||||
|
||||
/// Try to close the group with the given ID
|
||||
///
|
||||
/// If this method returns `true` the Group was closed. Otherwise there are still references
|
||||
/// left open.
|
||||
fn try_close(&self, id: GroupId) -> bool {
|
||||
tracing::trace!(
|
||||
target: "executor::supervision",
|
||||
id = id.into_u64(),
|
||||
"dropping process group"
|
||||
);
|
||||
let group = match self.get(&id) {
|
||||
None if std::thread::panicking() => return false,
|
||||
None => panic!("tried to drop a ref to {:?}, but no such group exists!", id),
|
||||
Some(group) => group,
|
||||
};
|
||||
|
||||
// Reference count *decreases* on the other hand must observe strong ordering — when
|
||||
let remaining = group.ref_count.fetch_sub(1, Ordering::Release);
|
||||
if !std::thread::panicking() {
|
||||
assert!(remaining < usize::MAX, "group reference count overflow");
|
||||
}
|
||||
if remaining > 1 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Generate a compiler fence making sure that all other calls to `try_close` are finished
|
||||
// before the one that returns `true`.
|
||||
fence(Ordering::Acquire);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct GroupInner {
|
||||
parent: Option<GroupId>,
|
||||
ref_count: AtomicUsize,
|
||||
}
|
||||
|
||||
impl GroupInner {
|
||||
#[inline]
|
||||
/// Increment the reference count of this group and return the previous value
|
||||
fn increment_refcnt(&self) -> usize {
|
||||
// Reference count increases don't need strong ordering. The increments can be done in
|
||||
// any order as long as they *do* happen.
|
||||
self.ref_count.fetch_add(1, Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for GroupInner {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
parent: None,
|
||||
ref_count: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Clear for GroupInner {
|
||||
fn clear(&mut self) {
|
||||
// A group is always alive as long as at least one of its children is alive. So each
|
||||
// Group holds a reference to its parent if it has one. If a group is being deleted this
|
||||
// reference must be closed too, i.e. the parent reference count reduced by one.
|
||||
if let Some(parent) = self.parent.take() {
|
||||
SupervisionRegistry::with(|reg| reg.try_close(parent));
|
||||
}
|
||||
}
|
||||
}
|
@ -15,6 +15,7 @@ crossbeam-utils = "0.8"
|
||||
pin-utils = "0.1.0"
|
||||
bitfield = "0.13.2"
|
||||
bitflags = "1.3.2"
|
||||
tracing = "0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
crossbeam = "0.8"
|
||||
|
@ -16,9 +16,9 @@
|
||||
#![forbid(missing_docs)]
|
||||
#![forbid(missing_debug_implementations)]
|
||||
#![forbid(unused_import_braces)]
|
||||
#![forbid(unused_imports)]
|
||||
#![warn(unused_imports)]
|
||||
#![forbid(unused_must_use)]
|
||||
#![forbid(unused_variables)]
|
||||
//TODO: reenable #![forbid(unused_variables)]
|
||||
|
||||
mod catch_unwind;
|
||||
mod layout_helpers;
|
||||
@ -33,6 +33,8 @@ pub mod lightproc;
|
||||
pub mod proc_handle;
|
||||
pub mod recoverable_handle;
|
||||
|
||||
pub use proc_data::GroupId;
|
||||
|
||||
/// The lightproc prelude.
|
||||
///
|
||||
/// The prelude re-exports lightproc structs and handles from this crate.
|
||||
|
@ -31,11 +31,13 @@ use crate::proc_ext::ProcFutureExt;
|
||||
use crate::proc_handle::ProcHandle;
|
||||
use crate::raw_proc::RawProc;
|
||||
use crate::recoverable_handle::RecoverableHandle;
|
||||
use crate::GroupId;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::future::Future;
|
||||
use std::mem;
|
||||
use std::mem::ManuallyDrop;
|
||||
use std::panic::AssertUnwindSafe;
|
||||
use std::ptr::NonNull;
|
||||
use tracing::Span;
|
||||
|
||||
/// Shared functionality for both Send and !Send LightProc
|
||||
pub struct LightProc {
|
||||
@ -45,8 +47,8 @@ pub struct LightProc {
|
||||
|
||||
// LightProc is both Sync and Send because it explicitly handles synchronization internally:
|
||||
// The state of a `LightProc` is only modified atomically guaranteeing a consistent view from all
|
||||
// threads. Existing handles are atomically reference counted so the proc itself will not be dropped
|
||||
// until all pointers to it are themselves dropped.
|
||||
// threads. Existing wakers (and the proc_handle) are atomically reference counted so the proc
|
||||
// itself will not be dropped until all pointers to it are themselves dropped.
|
||||
// However, if the future or result inside the LightProc is !Send the executor must ensure that
|
||||
// the `schedule` function does not move the LightProc to a different thread.
|
||||
unsafe impl Send for LightProc {}
|
||||
@ -76,14 +78,19 @@ impl LightProc {
|
||||
/// println!("future panicked!: {}", &reason);
|
||||
/// });
|
||||
/// ```
|
||||
pub fn recoverable<'a, F, R, S>(future: F, schedule: S) -> (Self, RecoverableHandle<R>)
|
||||
pub fn recoverable<'a, F, R, S>(
|
||||
future: F,
|
||||
schedule: S,
|
||||
span: Span,
|
||||
cgroup: Option<GroupId>,
|
||||
) -> (Self, RecoverableHandle<R>)
|
||||
where
|
||||
F: Future<Output = R> + 'a,
|
||||
R: 'a,
|
||||
S: Fn(LightProc) + 'a,
|
||||
{
|
||||
let recovery_future = AssertUnwindSafe(future).catch_unwind();
|
||||
let (proc, handle) = Self::build(recovery_future, schedule);
|
||||
let (proc, handle) = Self::build(recovery_future, schedule, span, cgroup);
|
||||
(proc, RecoverableHandle::new(handle))
|
||||
}
|
||||
|
||||
@ -92,6 +99,7 @@ impl LightProc {
|
||||
///
|
||||
/// # Example
|
||||
/// ```rust
|
||||
/// # use tracing::Span;
|
||||
/// # use lightproc::prelude::*;
|
||||
/// #
|
||||
/// # // ... future that does work
|
||||
@ -113,15 +121,22 @@ impl LightProc {
|
||||
/// let standard = LightProc::build(
|
||||
/// future,
|
||||
/// schedule_function,
|
||||
/// Span::current(),
|
||||
/// None,
|
||||
/// );
|
||||
/// ```
|
||||
pub fn build<'a, F, R, S>(future: F, schedule: S) -> (Self, ProcHandle<R>)
|
||||
pub fn build<'a, F, R, S>(
|
||||
future: F,
|
||||
schedule: S,
|
||||
span: Span,
|
||||
cgroup: Option<GroupId>,
|
||||
) -> (Self, ProcHandle<R>)
|
||||
where
|
||||
F: Future<Output = R> + 'a,
|
||||
R: 'a,
|
||||
S: Fn(LightProc) + 'a,
|
||||
{
|
||||
let raw_proc = RawProc::allocate(future, schedule);
|
||||
let raw_proc = RawProc::allocate(future, schedule, span, cgroup);
|
||||
let proc = LightProc { raw_proc };
|
||||
let handle = ProcHandle::new(raw_proc);
|
||||
(proc, handle)
|
||||
@ -130,9 +145,9 @@ impl LightProc {
|
||||
///
|
||||
/// Schedule the lightweight process with passed `schedule` function at the build time.
|
||||
pub fn schedule(self) {
|
||||
let ptr = self.raw_proc.as_ptr();
|
||||
let this = ManuallyDrop::new(self);
|
||||
let ptr = this.raw_proc.as_ptr();
|
||||
let pdata = ptr as *const ProcData;
|
||||
mem::forget(self);
|
||||
|
||||
unsafe {
|
||||
((*pdata).vtable.schedule)(ptr);
|
||||
@ -144,9 +159,9 @@ impl LightProc {
|
||||
/// "Running" a lightproc means ticking it once and if it doesn't complete
|
||||
/// immediately re-scheduling it as soon as it's Waker wakes it back up.
|
||||
pub fn run(self) {
|
||||
let ptr = self.raw_proc.as_ptr();
|
||||
let this = ManuallyDrop::new(self);
|
||||
let ptr = this.raw_proc.as_ptr();
|
||||
let pdata = ptr as *const ProcData;
|
||||
mem::forget(self);
|
||||
|
||||
unsafe {
|
||||
((*pdata).vtable.tick)(ptr);
|
||||
|
@ -3,8 +3,49 @@ use crate::state::*;
|
||||
use crossbeam_utils::Backoff;
|
||||
use std::cell::Cell;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::task::Waker;
|
||||
use tracing::Span;
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
||||
#[repr(transparent)]
|
||||
/// Opaque id of the group this proc belongs to
|
||||
pub struct GroupId(NonZeroU64);
|
||||
|
||||
impl GroupId {
|
||||
/// Construct an ID from an u64
|
||||
///
|
||||
/// # Panics
|
||||
/// - if the provided `u64` is `0`.
|
||||
pub fn from_u64(i: u64) -> Self {
|
||||
Self(NonZeroU64::new(i).expect("group id must be > 0"))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Construct an ID from a NonZeroU64
|
||||
///
|
||||
/// This method can't fail
|
||||
pub const fn from_non_zero_u64(i: NonZeroU64) -> Self {
|
||||
Self(i)
|
||||
}
|
||||
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
//noinspection RsSelfConvention
|
||||
#[inline]
|
||||
/// Convert a GroupId into a u64
|
||||
pub const fn into_u64(&self) -> u64 {
|
||||
self.0.get()
|
||||
}
|
||||
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
//noinspection RsSelfConvention
|
||||
#[inline]
|
||||
/// Convert a GroupId into a NonZeroU64
|
||||
pub const fn into_non_zero_u64(&self) -> NonZeroU64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// The pdata of a proc.
|
||||
///
|
||||
@ -25,6 +66,17 @@ pub(crate) struct ProcData {
|
||||
/// In addition to the actual waker virtual table, it also contains pointers to several other
|
||||
/// methods necessary for bookkeeping the heap-allocated proc.
|
||||
pub(crate) vtable: &'static ProcVTable,
|
||||
|
||||
/// The span assigned to this process.
|
||||
///
|
||||
/// A lightproc has a tracing span associated that allow recording occurances of vtable calls
|
||||
/// for this process.
|
||||
pub(crate) span: Span,
|
||||
|
||||
/// Control group assigned to this process.
|
||||
///
|
||||
/// The control group links this process to its supervision tree
|
||||
pub(crate) cgroup: Option<GroupId>,
|
||||
}
|
||||
|
||||
impl ProcData {
|
||||
@ -61,7 +113,7 @@ impl ProcData {
|
||||
}
|
||||
}
|
||||
|
||||
/// Notifies the proc blocked on the proc.
|
||||
/// Notifies the proc blocked on this proc, if any.
|
||||
///
|
||||
/// If there is a registered waker, it will be removed from the pdata and woken.
|
||||
#[inline]
|
||||
|
@ -6,6 +6,7 @@ use crate::state::*;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::future::Future;
|
||||
use std::marker::{PhantomData, Unpin};
|
||||
use std::mem::MaybeUninit;
|
||||
use std::pin::Pin;
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::atomic::Ordering;
|
||||
@ -22,7 +23,9 @@ pub struct ProcHandle<R> {
|
||||
pub(crate) raw_proc: NonNull<()>,
|
||||
|
||||
/// A marker capturing the generic type `R`.
|
||||
pub(crate) result: PhantomData<R>,
|
||||
// TODO: Instead of writing the future output to the RawProc on heap, put it in the handle
|
||||
// (if still available).
|
||||
pub(crate) marker: PhantomData<R>,
|
||||
}
|
||||
|
||||
unsafe impl<R: Send> Send for ProcHandle<R> {}
|
||||
@ -34,7 +37,7 @@ impl<R> ProcHandle<R> {
|
||||
pub(crate) fn new(raw_proc: NonNull<()>) -> Self {
|
||||
Self {
|
||||
raw_proc,
|
||||
result: PhantomData,
|
||||
marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
@ -48,6 +51,13 @@ impl<R> ProcHandle<R> {
|
||||
let pdata = ptr as *const ProcData;
|
||||
|
||||
unsafe {
|
||||
let id = (&(*pdata).span).id().map(|id| id.into_u64()).unwrap_or(0);
|
||||
tracing::trace!(
|
||||
target: "executor::handle",
|
||||
op = "handle.cancel",
|
||||
task.id = id,
|
||||
);
|
||||
|
||||
let mut state = (*pdata).state.load(Ordering::Acquire);
|
||||
|
||||
loop {
|
||||
@ -189,6 +199,14 @@ impl<R> Drop for ProcHandle<R> {
|
||||
let mut output = None;
|
||||
|
||||
unsafe {
|
||||
// Record dropping the handle for this task
|
||||
let id = (&(*pdata).span).id().map(|id| id.into_u64()).unwrap_or(0);
|
||||
tracing::trace!(
|
||||
target: "executor::handle",
|
||||
op = "handle.drop",
|
||||
task.id = id,
|
||||
);
|
||||
|
||||
// Optimistically assume the `ProcHandle` is being dropped just after creating the
|
||||
// proc. This is a common case so if the handle is not used, the overhead of it is only
|
||||
// one compare-exchange operation.
|
||||
|
@ -15,17 +15,30 @@ use std::pin::Pin;
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use crate::GroupId;
|
||||
use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker};
|
||||
use tracing::Span;
|
||||
|
||||
/// Raw pointers to the fields of a proc.
|
||||
// TODO: Make generic over the Allocator used!
|
||||
// TODO: The actual layout stored could be expressed as a struct w/ union. Maybe do that?
|
||||
pub(crate) struct RawProc<'a, F, R, S> {
|
||||
pub(crate) pdata: *const ProcData,
|
||||
pub(crate) schedule: *const S,
|
||||
pub(crate) future: *mut F,
|
||||
// TODO: Replace with `*mut MaybeUninit`? And also, store the result in the handle if that's
|
||||
// still available, instead of copying it to the heap.
|
||||
pub(crate) output: *mut R,
|
||||
|
||||
// Make the lifetime 'a of the future invariant
|
||||
_marker: PhantomData<&'a ()>,
|
||||
// TODO: We should link a proc to a process group for scheduling and tracing
|
||||
// - sub-tasks should start in the same group by default
|
||||
// - that data needs to be available when calling `spawn` and to decide which task to run.
|
||||
// So there must be a thread-local reference to it that's managed by the executor, and
|
||||
// updated when a new task is being polled.
|
||||
// Additionally `schedule` must have a reference to it to be able to push to the right
|
||||
// queue? The `schedule` fn could just come from the group instead.
|
||||
}
|
||||
|
||||
impl<'a, F, R, S> RawProc<'a, F, R, S>
|
||||
@ -37,7 +50,12 @@ where
|
||||
/// Allocates a proc with the given `future` and `schedule` function.
|
||||
///
|
||||
/// It is assumed there are initially only the `LightProc` reference and the `ProcHandle`.
|
||||
pub(crate) fn allocate(future: F, schedule: S) -> NonNull<()> {
|
||||
pub(crate) fn allocate(
|
||||
future: F,
|
||||
schedule: S,
|
||||
span: Span,
|
||||
cgroup: Option<GroupId>,
|
||||
) -> NonNull<()> {
|
||||
// Compute the layout of the proc for allocation. Abort if the computation fails.
|
||||
let proc_layout = Self::proc_layout();
|
||||
|
||||
@ -70,6 +88,8 @@ where
|
||||
destroy: Self::destroy,
|
||||
tick: Self::tick,
|
||||
},
|
||||
span,
|
||||
cgroup,
|
||||
});
|
||||
|
||||
// Write the schedule function as the third field of the proc.
|
||||
@ -128,6 +148,15 @@ where
|
||||
/// Wakes a waker.
|
||||
unsafe fn wake(ptr: *const ()) {
|
||||
let raw = Self::from_ptr(ptr);
|
||||
let id = (&(*raw.pdata).span)
|
||||
.id()
|
||||
.map(|id| id.into_u64())
|
||||
.unwrap_or(0);
|
||||
tracing::trace!(
|
||||
target: "executor::waker",
|
||||
op = "waker.wake",
|
||||
task.id = id,
|
||||
);
|
||||
|
||||
let mut state = (*raw.pdata).state.load(Ordering::Acquire);
|
||||
|
||||
@ -191,6 +220,15 @@ where
|
||||
/// Wakes a waker by reference.
|
||||
unsafe fn wake_by_ref(ptr: *const ()) {
|
||||
let raw = Self::from_ptr(ptr);
|
||||
let id = (&(*raw.pdata).span)
|
||||
.id()
|
||||
.map(|id| id.into_u64())
|
||||
.unwrap_or(0);
|
||||
tracing::trace!(
|
||||
target: "executor::waker",
|
||||
op = "waker.wake_by_ref",
|
||||
task.id = id,
|
||||
);
|
||||
|
||||
let mut state = (*raw.pdata).state.load(Ordering::Acquire);
|
||||
|
||||
@ -250,6 +288,16 @@ where
|
||||
/// Clones a waker.
|
||||
unsafe fn clone_waker(ptr: *const ()) -> RawWaker {
|
||||
let raw = Self::from_ptr(ptr);
|
||||
let id = (&(*raw.pdata).span)
|
||||
.id()
|
||||
.map(|id| id.into_u64())
|
||||
.unwrap_or(0);
|
||||
tracing::trace!(
|
||||
target: "executor::waker",
|
||||
op = "waker.clone",
|
||||
task.id = id,
|
||||
);
|
||||
|
||||
let raw_waker = &(*raw.pdata).vtable.raw_waker;
|
||||
|
||||
// Increment the reference count. With any kind of reference-counted data structure,
|
||||
@ -271,6 +319,15 @@ where
|
||||
#[inline]
|
||||
unsafe fn decrement(ptr: *const ()) {
|
||||
let raw = Self::from_ptr(ptr);
|
||||
let id = (&(*raw.pdata).span)
|
||||
.id()
|
||||
.map(|id| id.into_u64())
|
||||
.unwrap_or(0);
|
||||
tracing::trace!(
|
||||
target: "executor::waker",
|
||||
op = "waker.drop",
|
||||
task.id = id,
|
||||
);
|
||||
|
||||
// Decrement the reference count.
|
||||
let new = (*raw.pdata).state.fetch_sub(1, Ordering::AcqRel);
|
||||
@ -310,10 +367,11 @@ where
|
||||
raw.output as *const ()
|
||||
}
|
||||
|
||||
/// Cleans up proc's resources and deallocates it.
|
||||
/// Cleans up the procs resources and deallocates the associated memory.
|
||||
///
|
||||
/// If the proc has not been closed, then its future or the output will be dropped. The
|
||||
/// schedule function gets dropped too.
|
||||
/// The future or output stored will *not* be dropped, but its memory will be freed. Callers
|
||||
/// must ensure that they are correctly dropped beforehand if either of those is still alive to
|
||||
/// prevent use-after-free.
|
||||
#[inline]
|
||||
unsafe fn destroy(ptr: *const ()) {
|
||||
let raw = Self::from_ptr(ptr);
|
||||
@ -323,6 +381,9 @@ where
|
||||
// Drop the schedule function.
|
||||
(raw.schedule as *mut S).drop_in_place();
|
||||
|
||||
// Drop the proc data containing the associated Span
|
||||
(raw.pdata as *mut ProcData).drop_in_place();
|
||||
|
||||
// Finally, deallocate the memory reserved by the proc.
|
||||
alloc::dealloc(ptr as *mut u8, proc_layout.layout);
|
||||
}
|
||||
@ -332,9 +393,11 @@ where
|
||||
/// Ticking will call `poll` once and re-schedule the task if it returns `Poll::Pending`. If
|
||||
/// polling its future panics, the proc will be closed and the panic propagated into the caller.
|
||||
unsafe fn tick(ptr: *const ()) {
|
||||
let raw = Self::from_ptr(ptr);
|
||||
let mut raw = Self::from_ptr(ptr);
|
||||
// Enter the span associated with the process to track execution time if enabled.
|
||||
let _guard = (&(*raw.pdata).span).enter();
|
||||
|
||||
// Create a context from the raw proc pointer and the vtable inside the its pdata.
|
||||
// Create a context from the raw proc pointer and the vtable inside its pdata.
|
||||
let waker = ManuallyDrop::new(Waker::from_raw(RawWaker::new(
|
||||
ptr,
|
||||
&(*raw.pdata).vtable.raw_waker,
|
||||
@ -380,9 +443,9 @@ where
|
||||
|
||||
// Poll the inner future, but surround it with a guard that closes the proc in case polling
|
||||
// panics.
|
||||
let guard = Guard(raw);
|
||||
let poll = <F as Future>::poll(Pin::new_unchecked(&mut *raw.future), cx);
|
||||
mem::forget(guard);
|
||||
let drop_guard = Guard(&mut raw);
|
||||
let poll = <F as Future>::poll(drop_guard.pin_future(), cx);
|
||||
drop_guard.disable();
|
||||
|
||||
match poll {
|
||||
Poll::Ready(out) => {
|
||||
@ -497,21 +560,43 @@ impl<'a, F, R, S> Clone for RawProc<'a, F, R, S> {
|
||||
}
|
||||
impl<'a, F, R, S> Copy for RawProc<'a, F, R, S> {}
|
||||
|
||||
#[repr(transparent)]
|
||||
/// A guard that closes the proc if polling its future panics.
|
||||
struct Guard<'a, F, R, S>(RawProc<'a, F, R, S>)
|
||||
struct Guard<'guard, 'a, F, R, S>(&'guard mut RawProc<'a, F, R, S>)
|
||||
where
|
||||
F: Future<Output = R> + 'a,
|
||||
R: 'a,
|
||||
S: Fn(LightProc) + 'a;
|
||||
|
||||
impl<'a, F, R, S> Drop for Guard<'a, F, R, S>
|
||||
impl<'guard, 'a, F, R, S> Guard<'guard, 'a, F, R, S>
|
||||
where
|
||||
F: Future<Output = R> + 'a,
|
||||
R: 'a,
|
||||
S: Fn(LightProc) + 'a,
|
||||
{
|
||||
#[inline(always)]
|
||||
/// Disable the guard again.
|
||||
///
|
||||
/// This does essentially nothing but prevents the Drop implementation from being called
|
||||
fn disable(self) {
|
||||
// Put `self` in a ManuallyDrop telling the compiler to explicitly not call Drop::drop
|
||||
let _ = ManuallyDrop::new(self);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn pin_future(&self) -> Pin<&mut F> {
|
||||
Pin::new_unchecked(&mut *self.0.future)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, F, R, S> Drop for Guard<'_, 'a, F, R, S>
|
||||
where
|
||||
F: Future<Output = R> + 'a,
|
||||
R: 'a,
|
||||
S: Fn(LightProc) + 'a,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
let raw = self.0;
|
||||
let raw = &self.0;
|
||||
let ptr = raw.pdata as *const ();
|
||||
|
||||
unsafe {
|
||||
|
Loading…
Reference in New Issue
Block a user