Merge branch 'feature/runtime-improvements' into development

* feature/runtime-improvements:
  Runtime things furthermore
  Allow tracking cgroups with futures
  Oh whoops handle that
  Get started on supervision trees
  Attach a GroupID to all LightProcs
  Noting down improvement ideas for procs
  More ideas about how to record data
  A number of small updates batched into one commit
  Improve Drop guards
  Even more console shenanigans
  tracing more data
  Some bits work \o/
  Console is attached and compiles
  More console features
  Use `ManuallyDrop` instead of `mem::forget` where appropiate
  More console implementation stuff
  Start on the runtime console subscriber
This commit is contained in:
Nadja Reitzenstein 2022-06-24 12:25:11 +02:00
commit fac0a9ba94
27 changed files with 4057 additions and 321 deletions

994
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -49,6 +49,7 @@ dirs = "4.0.0"
# Runtime
executor = { path = "runtime/executor" }
console = { path = "runtime/console" }
# Catch&Handle POSIX process signals
signal-hook = "0.3.13"
@ -72,9 +73,9 @@ rust-argon2 = "0.8.3"
rand = "0.8.4"
# Async aware logging and tracing
tracing = "0.1.28"
tracing-subscriber = { version = "0.2.25", features = ["env-filter"] }
tracing-futures = { version = "0.2.5", features = ["futures-03"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "registry", "std"] }
tracing-futures = { version = "0.2", features = ["futures-03"] }
# API
api = { path = "api" }
@ -123,4 +124,4 @@ tempfile = "3.2"
shadow-rs = "0.11"
[workspace]
members = ["modules/*", "api"]
members = ["runtime/*", "modules/*", "api"]

View File

@ -51,7 +51,7 @@ impl AuditLog {
let mut ser = Serializer::new(&mut writer);
line.serialize(&mut ser)
.expect("failed to serialize audit log line");
writer.write("\n".as_bytes())?;
writer.write_all("\n".as_bytes())?;
Ok(())
}
}

View File

@ -3,7 +3,7 @@ use async_net::TcpListener;
use capnp_rpc::rpc_twoparty_capnp::Side;
use capnp_rpc::twoparty::VatNetwork;
use capnp_rpc::RpcSystem;
use executor::prelude::Executor;
use executor::prelude::{Executor, GroupId, SupervisionRegistry};
use futures_rustls::server::TlsStream;
use futures_rustls::TlsAcceptor;
use futures_util::stream::FuturesUnordered;
@ -167,6 +167,7 @@ impl APIServer {
tracing::error!("Error during RPC handling: {}", e);
}
};
self.executor.spawn_local(f);
let cgroup = SupervisionRegistry::with(SupervisionRegistry::new_group);
self.executor.spawn_local_cgroup(f, cgroup);
}
}

View File

@ -62,6 +62,7 @@ use crate::users::db::UserDB;
use crate::users::Users;
use executor::pool::Executor;
use signal_hook::consts::signal::*;
use tracing::Span;
pub struct Diflouroborane {
config: Config,
@ -70,20 +71,44 @@ pub struct Diflouroborane {
pub users: Users,
pub roles: Roles,
pub resources: ResourcesHandle,
span: Span,
}
pub static RESOURCES: OnceCell<ResourcesHandle> = OnceCell::new();
impl Diflouroborane {
pub fn new(config: Config) -> miette::Result<Self> {
logging::init(&config.logging);
let mut server = logging::init(&config.logging);
let span = tracing::info_span!(
target: "bffh",
"bffh"
);
let span2 = span.clone();
let _guard = span2.enter();
tracing::info!(version = env::VERSION, "Starting BFFH");
let span = tracing::info_span!("setup");
let _guard = span.enter();
let executor = Executor::new();
if let Some(aggregator) = server.aggregator.take() {
executor.spawn(aggregator.run());
}
tracing::info!("Server is being spawned");
let handle = executor.spawn(server.serve());
std::thread::spawn(move || {
let result = async_io::block_on(handle);
match result {
Some(Ok(())) => {
tracing::info!("console server finished without error");
}
Some(Err(error)) => {
tracing::info!(%error, "console server finished with error");
}
None => {
tracing::info!("console server finished with panic");
}
}
});
let env = StateDB::open_env(&config.db_path)?;
let statedb = StateDB::create_with_env(env.clone())?;
@ -111,10 +136,12 @@ impl Diflouroborane {
users,
roles,
resources,
span,
})
}
pub fn run(&mut self) -> miette::Result<()> {
let _guard = self.span.enter();
let mut signals = signal_hook_async_std::Signals::new(&[SIGINT, SIGQUIT, SIGTERM])
.into_diagnostic()
.wrap_err("Failed to construct signal handler")?;

View File

@ -1,6 +1,7 @@
use tracing_subscriber::EnvFilter;
use serde::{Deserialize, Serialize};
use tracing_subscriber::prelude::*;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LogConfig {
@ -24,21 +25,25 @@ impl Default for LogConfig {
}
}
pub fn init(config: &LogConfig) {
pub fn init(config: &LogConfig) -> console::Server {
let (console, server) = console::ConsoleLayer::new();
let filter = if let Some(ref filter) = config.filter {
EnvFilter::new(filter.as_str())
} else {
EnvFilter::from_env("BFFH_LOG")
};
let builder = tracing_subscriber::fmt().with_env_filter(filter);
let format = &config.format;
// TODO: Restore output format settings being settable
let fmt_layer = tracing_subscriber::fmt::layer().with_filter(filter);
let format = config.format.to_lowercase();
match format.as_str() {
"compact" => builder.compact().init(),
"pretty" => builder.pretty().init(),
"full" => builder.init(),
_ => builder.init(),
}
tracing::info!(format = format.as_str(), "Logging initialized")
tracing_subscriber::registry()
.with(fmt_layer)
.with(console)
.init();
tracing::info!(format = format.as_str(), "Logging initialized");
server
}

View File

@ -0,0 +1,30 @@
[package]
name = "console"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
console-api = "0.3"
prost-types = "0.10"
tonic = { version = "0.7.2", default_features = false, features = [] }
hyper = { version = "0.14", default_features = false, features = ["http2", "server", "stream"] }
thread_local = "1.1"
tracing = "0.1"
tracing-core = "0.1"
tracing-subscriber = { version = "0.3", default_features = false, features = ["registry"] }
crossbeam-utils = "0.8"
crossbeam-channel = "0.5"
async-net = "1.6"
async-compat = "0.2"
async-channel = "1.6"
async-oneshot = "0.5"
async-io = "1.7"
tokio-util = "0.7"
futures-util = "0.3"
tokio = { version = "1.19", default_features = false, features = []}
hdrhistogram = "7.5"
[dev-dependencies]
tracing-subscriber = "0.3"

View File

@ -0,0 +1,461 @@
use crate::id_map::{IdMap, ToProto};
use crate::server::{Watch, WatchRequest};
use crate::stats::{TimeAnchor, Unsent};
use crate::{server, stats};
use crate::{Event, Shared};
use console_api::{async_ops, instrument, resources, tasks};
use crossbeam_channel::{Receiver, TryRecvError};
use futures_util::{FutureExt, StreamExt};
use std::collections::HashMap;
use std::num::NonZeroU64;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tracing::span;
use tracing_core::Metadata;
#[derive(Debug)]
struct Resource {
id: span::Id,
is_dirty: AtomicBool,
parent_id: Option<span::Id>,
metadata: &'static Metadata<'static>,
concrete_type: String,
kind: resources::resource::Kind,
location: Option<console_api::Location>,
is_internal: bool,
}
/// Represents static data for tasks
#[derive(Debug)]
struct Task {
id: span::Id,
is_dirty: AtomicBool,
metadata: &'static Metadata<'static>,
fields: Vec<console_api::Field>,
location: Option<console_api::Location>,
}
#[derive(Debug)]
struct AsyncOp {
id: span::Id,
is_dirty: AtomicBool,
parent_id: Option<span::Id>,
resource_id: span::Id,
metadata: &'static Metadata<'static>,
source: String,
}
impl ToProto for Task {
type Output = tasks::Task;
fn to_proto(&self, _: &stats::TimeAnchor) -> Self::Output {
tasks::Task {
id: Some(self.id.clone().into()),
// TODO: more kinds of tasks...
kind: tasks::task::Kind::Spawn as i32,
metadata: Some(self.metadata.into()),
parents: Vec::new(), // TODO: implement parents nicely
fields: self.fields.clone(),
location: self.location.clone(),
}
}
}
impl Unsent for Task {
fn take_unsent(&self) -> bool {
self.is_dirty.swap(false, Ordering::AcqRel)
}
fn is_unsent(&self) -> bool {
self.is_dirty.load(Ordering::Acquire)
}
}
impl ToProto for Resource {
type Output = resources::Resource;
fn to_proto(&self, _: &stats::TimeAnchor) -> Self::Output {
resources::Resource {
id: Some(self.id.clone().into()),
parent_resource_id: self.parent_id.clone().map(Into::into),
kind: Some(self.kind.clone()),
metadata: Some(self.metadata.into()),
concrete_type: self.concrete_type.clone(),
location: self.location.clone(),
is_internal: self.is_internal,
}
}
}
impl Unsent for Resource {
fn take_unsent(&self) -> bool {
self.is_dirty.swap(false, Ordering::AcqRel)
}
fn is_unsent(&self) -> bool {
self.is_dirty.load(Ordering::Acquire)
}
}
impl ToProto for AsyncOp {
type Output = async_ops::AsyncOp;
fn to_proto(&self, _: &stats::TimeAnchor) -> Self::Output {
async_ops::AsyncOp {
id: Some(self.id.clone().into()),
metadata: Some(self.metadata.into()),
resource_id: Some(self.resource_id.clone().into()),
source: self.source.clone(),
parent_async_op_id: self.parent_id.clone().map(Into::into),
}
}
}
impl Unsent for AsyncOp {
fn take_unsent(&self) -> bool {
self.is_dirty.swap(false, Ordering::AcqRel)
}
fn is_unsent(&self) -> bool {
self.is_dirty.load(Ordering::Acquire)
}
}
#[derive(Copy, Clone, Eq, PartialEq)]
pub(crate) enum Include {
All,
UpdatedOnly,
}
#[derive(Debug)]
pub struct Aggregator {
shared: Arc<Shared>,
events: Receiver<Event>,
rpcs: async_channel::Receiver<server::Command>,
watchers: Vec<Watch<instrument::Update>>,
details_watchers: HashMap<span::Id, Vec<Watch<tasks::TaskDetails>>>,
all_metadata: Vec<console_api::register_metadata::NewMetadata>,
new_metadata: Vec<console_api::register_metadata::NewMetadata>,
running: bool,
publish_interval: Duration,
base_time: TimeAnchor,
tasks: IdMap<Task>,
task_stats: IdMap<Arc<stats::TaskStats>>,
resources: IdMap<Resource>,
resource_stats: IdMap<Arc<stats::ResourceStats>>,
async_ops: IdMap<AsyncOp>,
async_op_stats: IdMap<Arc<stats::AsyncOpStats>>,
poll_ops: Vec<console_api::resources::PollOp>,
}
impl Aggregator {
pub(crate) fn new(
shared: Arc<Shared>,
events: Receiver<Event>,
rpcs: async_channel::Receiver<server::Command>,
) -> Self {
Self {
shared,
events,
rpcs,
watchers: Vec::new(),
details_watchers: HashMap::default(),
running: true,
publish_interval: Duration::from_secs(1),
all_metadata: Vec::new(),
new_metadata: Vec::new(),
base_time: TimeAnchor::new(),
tasks: IdMap::default(),
task_stats: IdMap::default(),
resources: IdMap::default(),
resource_stats: IdMap::default(),
async_ops: IdMap::default(),
async_op_stats: IdMap::default(),
poll_ops: Vec::new(),
}
}
fn add_instrument_subscription(&mut self, subscription: Watch<instrument::Update>) {
tracing::debug!("new instrument subscription");
let task_update = Some(self.task_update(Include::All));
let resource_update = Some(self.resource_update(Include::All));
let async_op_update = Some(self.async_op_update(Include::All));
let now = Instant::now();
let update = &instrument::Update {
task_update,
resource_update,
async_op_update,
now: Some(self.base_time.to_timestamp(now)),
new_metadata: Some(console_api::RegisterMetadata {
metadata: (self.all_metadata).clone(),
}),
};
// Send the initial state --- if this fails, the subscription is already dead
if subscription.update(update) {
self.watchers.push(subscription)
}
}
/// Add the task details subscription to the watchers after sending the first update,
/// if the task is found.
fn add_task_detail_subscription(
&mut self,
watch_request: WatchRequest<console_api::tasks::TaskDetails>,
) {
let WatchRequest {
id,
mut stream_sender,
buffer,
} = watch_request;
tracing::debug!(id = ?id, "new task details subscription");
if let Some(stats) = self.task_stats.get(&id) {
let (tx, rx) = async_channel::bounded(buffer);
let subscription = Watch(tx);
let now = Some(self.base_time.to_timestamp(Instant::now()));
// Send back the stream receiver.
// Then send the initial state --- if this fails, the subscription is already dead.
if stream_sender.send(rx).is_ok()
&& subscription.update(&console_api::tasks::TaskDetails {
task_id: Some(id.clone().into()),
now,
poll_times_histogram: Some(stats.poll_duration_histogram()),
})
{
self.details_watchers
.entry(id.clone())
.or_insert_with(Vec::new)
.push(subscription);
}
}
// If the task is not found, drop `stream_sender` which will result in a not found error
}
fn task_update(&mut self, include: Include) -> tasks::TaskUpdate {
tasks::TaskUpdate {
new_tasks: self.tasks.as_proto_list(include, &self.base_time),
stats_update: self.task_stats.as_proto(include, &self.base_time),
dropped_events: self.shared.dropped_tasks.swap(0, Ordering::AcqRel) as u64,
}
}
fn resource_update(&mut self, include: Include) -> resources::ResourceUpdate {
let new_poll_ops = match include {
Include::All => self.poll_ops.clone(),
Include::UpdatedOnly => std::mem::take(&mut self.poll_ops),
};
resources::ResourceUpdate {
new_resources: self.resources.as_proto_list(include, &self.base_time),
stats_update: self.resource_stats.as_proto(include, &self.base_time),
new_poll_ops,
dropped_events: self.shared.dropped_resources.swap(0, Ordering::AcqRel) as u64,
}
}
fn async_op_update(&mut self, include: Include) -> async_ops::AsyncOpUpdate {
async_ops::AsyncOpUpdate {
new_async_ops: self.async_ops.as_proto_list(include, &self.base_time),
stats_update: self.async_op_stats.as_proto(include, &self.base_time),
dropped_events: self.shared.dropped_async_ops.swap(0, Ordering::AcqRel) as u64,
}
}
pub async fn run(mut self) {
let mut timer = StreamExt::fuse(async_io::Timer::interval(self.publish_interval));
loop {
let mut recv = self.rpcs.recv().fuse();
let should_send: bool = futures_util::select! {
_ = timer.next() => self.running,
cmd = recv => {
match cmd {
Ok(server::Command::Instrument(subscription)) => {
self.add_instrument_subscription(subscription);
}
Ok(server::Command::WatchTaskDetail(request)) => {
}
Ok(server::Command::Pause) => {
self.running = false;
}
Ok(server::Command::Resume) => {
self.running = true;
}
Err(_) => {
tracing::debug!("rpc channel closed, exiting");
return
}
}
false
},
};
// drain and aggregate buffered events.
//
// Note: we *don't* want to actually await the call to `recv` --- we
// don't want the aggregator task to be woken on every event,
// because it will then be woken when its own `poll` calls are
// exited. that would result in a busy-loop. instead, we only want
// to be woken when the flush interval has elapsed, or when the
// channel is almost full.
while let Ok(event) = self.events.try_recv() {
self.update_state(event);
}
if let Err(TryRecvError::Disconnected) = self.events.try_recv() {
tracing::debug!("event channel closed; terminating");
return;
}
// flush data to clients, if there are any currently subscribed
// watchers and we should send a new update.
if !self.watchers.is_empty() && should_send {
self.publish();
}
}
}
fn publish(&mut self) {
let new_metadata = if !self.new_metadata.is_empty() {
Some(console_api::RegisterMetadata {
metadata: std::mem::take(&mut self.new_metadata),
})
} else {
None
};
let task_update = Some(self.task_update(Include::UpdatedOnly));
let resource_update = Some(self.resource_update(Include::UpdatedOnly));
let async_op_update = Some(self.async_op_update(Include::UpdatedOnly));
let update = instrument::Update {
now: Some(self.base_time.to_timestamp(Instant::now())),
new_metadata,
task_update,
resource_update,
async_op_update,
};
self.watchers
.retain(|watch: &Watch<instrument::Update>| watch.update(&update));
let stats = &self.task_stats;
// Assuming there are much fewer task details subscribers than there are
// stats updates, iterate over `details_watchers` and compact the map.
self.details_watchers.retain(|id, watchers| {
if let Some(task_stats) = stats.get(id) {
let details = tasks::TaskDetails {
task_id: Some(id.clone().into()),
now: Some(self.base_time.to_timestamp(Instant::now())),
poll_times_histogram: Some(task_stats.poll_duration_histogram()),
};
watchers.retain(|watch| watch.update(&details));
!watchers.is_empty()
} else {
false
}
});
}
/// Update the current state with data from a single event.
fn update_state(&mut self, event: Event) {
// do state update
match event {
Event::Metadata(meta) => {
self.all_metadata.push(meta.into());
self.new_metadata.push(meta.into());
}
Event::Spawn {
id,
metadata,
stats,
fields,
location,
} => {
self.tasks.insert(
id.clone(),
Task {
id: id.clone(),
is_dirty: AtomicBool::new(true),
metadata,
fields,
location,
// TODO: parents
},
);
self.task_stats.insert(id, stats);
}
Event::Resource {
id,
parent_id,
metadata,
kind,
concrete_type,
location,
is_internal,
stats,
} => {
self.resources.insert(
id.clone(),
Resource {
id: id.clone(),
is_dirty: AtomicBool::new(true),
parent_id,
kind,
metadata,
concrete_type,
location,
is_internal,
},
);
self.resource_stats.insert(id, stats);
}
Event::PollOp {
metadata,
resource_id,
op_name,
async_op_id,
task_id,
is_ready,
} => {
let poll_op = resources::PollOp {
metadata: Some(metadata.into()),
resource_id: Some(resource_id.into()),
name: op_name,
task_id: Some(task_id.into()),
async_op_id: Some(async_op_id.into()),
is_ready,
};
self.poll_ops.push(poll_op);
}
Event::AsyncResourceOp {
id,
source,
resource_id,
metadata,
parent_id,
stats,
} => {
self.async_ops.insert(
id.clone(),
AsyncOp {
id: id.clone(),
is_dirty: AtomicBool::new(true),
resource_id,
metadata,
source,
parent_id,
},
);
self.async_op_stats.insert(id, stats);
}
}
}
}

View File

@ -0,0 +1,113 @@
use std::collections::HashMap;
use tracing_core::span::Id;
#[derive(Debug, Default)]
pub(crate) struct Attributes {
attributes: HashMap<FieldKey, console_api::Attribute>,
}
impl Attributes {
pub(crate) fn values(&self) -> impl Iterator<Item = &console_api::Attribute> {
self.attributes.values()
}
pub(crate) fn update(&mut self, id: &Id, update: &Update) {
let field_name = match update.field.name.as_ref() {
Some(name) => name.clone(),
None => {
tracing::warn!(?update.field, "field missing name, skipping...");
return;
}
};
let update_id = id.clone();
let key = FieldKey {
update_id,
field_name,
};
self.attributes
.entry(key)
.and_modify(|attr| update_attribute(attr, update))
.or_insert_with(|| update.clone().into());
}
}
fn update_attribute(attribute: &mut console_api::Attribute, update: &Update) {
use console_api::field::Value::*;
let attribute_val = attribute.field.as_mut().and_then(|a| a.value.as_mut());
let update_val = update.field.value.clone();
let update_name = update.field.name.clone();
match (attribute_val, update_val) {
(Some(BoolVal(v)), Some(BoolVal(upd))) => *v = upd,
(Some(StrVal(v)), Some(StrVal(upd))) => *v = upd,
(Some(DebugVal(v)), Some(DebugVal(upd))) => *v = upd,
(Some(U64Val(v)), Some(U64Val(upd))) => match update.op {
Some(UpdateOp::Add) => *v = v.saturating_add(upd),
Some(UpdateOp::Sub) => *v = v.saturating_sub(upd),
Some(UpdateOp::Override) => *v = upd,
None => tracing::warn!(
"numeric attribute update {:?} needs to have an op field",
update_name
),
},
(Some(I64Val(v)), Some(I64Val(upd))) => match update.op {
Some(UpdateOp::Add) => *v = v.saturating_add(upd),
Some(UpdateOp::Sub) => *v = v.saturating_sub(upd),
Some(UpdateOp::Override) => *v = upd,
None => tracing::warn!(
"numeric attribute update {:?} needs to have an op field",
update_name
),
},
(val, update) => {
tracing::warn!(
"attribute {:?} cannot be updated by update {:?}",
val,
update
);
}
}
}
impl From<Update> for console_api::Attribute {
fn from(upd: Update) -> Self {
console_api::Attribute {
field: Some(upd.field),
unit: upd.unit,
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct Update {
pub(crate) field: console_api::Field,
pub(crate) op: Option<UpdateOp>,
pub(crate) unit: Option<String>,
}
#[derive(Debug, Clone)]
pub(crate) enum UpdateOp {
Add,
Override,
Sub,
}
/// Represents a key for a `proto::field::Name`. Because the
/// proto::field::Name might not be unique we also include the
/// resource id in this key
#[derive(Debug, Hash, PartialEq, Eq)]
struct FieldKey {
update_id: Id,
field_name: console_api::field::Name,
}

View File

@ -0,0 +1,75 @@
use std::fmt;
use std::fmt::Formatter;
use std::ptr;
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
use tracing::Metadata;
pub(crate) struct Callsites<const MAX_CALLSITES: usize> {
array: [AtomicPtr<Metadata<'static>>; MAX_CALLSITES],
len: AtomicUsize,
}
impl<const MAX_CALLSITES: usize> Callsites<MAX_CALLSITES> {
pub(crate) fn insert(&self, callsite: &'static Metadata<'static>) {
if self.contains(callsite) {
return;
}
let idx = self.len.fetch_add(1, Ordering::AcqRel);
if idx <= MAX_CALLSITES {
self.array[idx]
.compare_exchange(
ptr::null_mut(),
callsite as *const _ as *mut _,
Ordering::AcqRel,
Ordering::Acquire,
)
.expect("would have clobbered callsite array");
} else {
todo!("Need to spill callsite over into backup storage");
}
}
pub(crate) fn contains(&self, callsite: &Metadata<'static>) -> bool {
let mut idx = 0;
let mut end = self.len.load(Ordering::Acquire);
while {
for cs in &self.array[idx..end] {
let ptr = cs.load(Ordering::Acquire);
let meta = unsafe { &*ptr };
if meta.callsite() == callsite.callsite() {
return true;
}
}
idx = end;
// Check if new callsites were added since we iterated
end = self.len.load(Ordering::Acquire);
end > idx
} {}
false
}
}
impl<const MAX_CALLSITES: usize> Default for Callsites<MAX_CALLSITES> {
fn default() -> Self {
const NULLPTR: AtomicPtr<Metadata<'static>> = AtomicPtr::new(ptr::null_mut());
Self {
array: [NULLPTR; MAX_CALLSITES],
len: AtomicUsize::new(0),
}
}
}
impl<const MAX_CALLSITES: usize> fmt::Debug for Callsites<MAX_CALLSITES> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let len = self.len.load(Ordering::Acquire);
f.debug_struct("Callsites")
.field("MAX_CALLSITES", &MAX_CALLSITES)
.field("array", &&self.array[..len])
.field("len", &len)
.finish()
}
}

View File

@ -0,0 +1,66 @@
use crate::stats;
use console_api::resources;
use std::sync::Arc;
use tracing::span;
use tracing_core::Metadata;
pub(crate) enum Event {
Metadata(&'static Metadata<'static>),
Spawn {
id: span::Id,
metadata: &'static Metadata<'static>,
stats: Arc<stats::TaskStats>,
fields: Vec<console_api::Field>,
location: Option<console_api::Location>,
},
Resource {
id: span::Id,
parent_id: Option<span::Id>,
metadata: &'static Metadata<'static>,
concrete_type: String,
kind: resources::resource::Kind,
location: Option<console_api::Location>,
is_internal: bool,
stats: Arc<stats::ResourceStats>,
},
PollOp {
metadata: &'static Metadata<'static>,
resource_id: span::Id,
op_name: String,
async_op_id: span::Id,
task_id: span::Id,
is_ready: bool,
},
AsyncResourceOp {
id: span::Id,
parent_id: Option<span::Id>,
resource_id: span::Id,
metadata: &'static Metadata<'static>,
source: String,
stats: Arc<stats::AsyncOpStats>,
},
}
#[derive(Clone, Debug, Copy)]
pub(crate) enum WakeOp {
Wake { self_wake: bool },
WakeByRef { self_wake: bool },
Clone,
Drop,
}
impl WakeOp {
/// Returns `true` if `self` is a `Wake` or `WakeByRef` event.
pub(crate) fn is_wake(self) -> bool {
matches!(self, Self::Wake { .. } | Self::WakeByRef { .. })
}
pub(crate) fn self_wake(self, self_wake: bool) -> Self {
match self {
Self::Wake { .. } => Self::Wake { self_wake },
Self::WakeByRef { .. } => Self::WakeByRef { self_wake },
x => x,
}
}
}

View File

@ -0,0 +1,126 @@
use crate::aggregate::Include;
use crate::stats::{DroppedAt, TimeAnchor, Unsent};
use std::collections::HashMap;
use std::time::{Duration, Instant};
use tracing_core::span::Id;
pub(crate) trait ToProto {
type Output;
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output;
}
#[derive(Debug)]
pub(crate) struct IdMap<T> {
data: HashMap<Id, T>,
}
impl<T> Default for IdMap<T> {
fn default() -> Self {
IdMap {
data: HashMap::<Id, T>::new(),
}
}
}
impl<T: Unsent> IdMap<T> {
pub(crate) fn insert(&mut self, id: Id, data: T) {
self.data.insert(id, data);
}
pub(crate) fn since_last_update(&mut self) -> impl Iterator<Item = (&Id, &mut T)> {
self.data.iter_mut().filter_map(|(id, data)| {
if data.take_unsent() {
Some((id, data))
} else {
None
}
})
}
pub(crate) fn all(&self) -> impl Iterator<Item = (&Id, &T)> {
self.data.iter()
}
pub(crate) fn get(&self, id: &Id) -> Option<&T> {
self.data.get(id)
}
pub(crate) fn as_proto_list(
&mut self,
include: Include,
base_time: &TimeAnchor,
) -> Vec<T::Output>
where
T: ToProto,
{
match include {
Include::UpdatedOnly => self
.since_last_update()
.map(|(_, d)| d.to_proto(base_time))
.collect(),
Include::All => self.all().map(|(_, d)| d.to_proto(base_time)).collect(),
}
}
pub(crate) fn as_proto(
&mut self,
include: Include,
base_time: &TimeAnchor,
) -> HashMap<u64, T::Output>
where
T: ToProto,
{
match include {
Include::UpdatedOnly => self
.since_last_update()
.map(|(id, d)| (id.into_u64(), d.to_proto(base_time)))
.collect(),
Include::All => self
.all()
.map(|(id, d)| (id.into_u64(), d.to_proto(base_time)))
.collect(),
}
}
pub(crate) fn drop_closed<R: DroppedAt + Unsent>(
&mut self,
stats: &mut IdMap<R>,
now: Instant,
retention: Duration,
has_watchers: bool,
) {
let _span = tracing::debug_span!(
"drop_closed",
entity = %std::any::type_name::<T>(),
stats = %std::any::type_name::<R>(),
)
.entered();
// drop closed entities
tracing::trace!(?retention, has_watchers, "dropping closed");
stats.data.retain(|id, stats| {
if let Some(dropped_at) = stats.dropped_at() {
let dropped_for = now.checked_duration_since(dropped_at).unwrap_or_default();
let dirty = stats.is_unsent();
let should_drop =
// if there are any clients watching, retain all dirty tasks regardless of age
(dirty && has_watchers)
|| dropped_for > retention;
tracing::trace!(
stats.id = ?id,
stats.dropped_at = ?dropped_at,
stats.dropped_for = ?dropped_for,
stats.dirty = dirty,
should_drop,
);
return !should_drop;
}
true
});
// drop closed entities which no longer have stats.
self.data.retain(|id, _| stats.data.contains_key(id));
}
}

514
runtime/console/src/lib.rs Normal file
View File

@ -0,0 +1,514 @@
use crossbeam_channel::{Sender, TrySendError};
use std::borrow::Borrow;
use std::cell::RefCell;
use std::net::IpAddr;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use thread_local::ThreadLocal;
use tracing::span;
use tracing_core::span::Attributes;
use tracing_core::{Interest, Metadata, Subscriber};
use tracing_subscriber::layer::{Context, Filter};
use tracing_subscriber::registry::{LookupSpan, SpanRef};
use tracing_subscriber::Layer;
mod aggregate;
mod attribute;
mod callsites;
mod event;
mod id_map;
mod server;
mod stack;
mod stats;
mod visitors;
use crate::aggregate::Aggregator;
use crate::callsites::Callsites;
use crate::visitors::{
AsyncOpVisitor, PollOpVisitor, ResourceVisitor, ResourceVisitorResult, StateUpdateVisitor,
TaskVisitor, WakerVisitor,
};
use event::Event;
pub use server::Server;
use stack::SpanStack;
#[derive(Debug)]
pub struct ConsoleLayer {
current_spans: ThreadLocal<RefCell<SpanStack>>,
tx: Sender<Event>,
shared: Arc<Shared>,
spawn_callsites: Callsites<8>,
waker_callsites: Callsites<8>,
resource_callsites: Callsites<8>,
/// Set of callsites for spans representing async operations on resources
///
/// TODO: Take some time to determine more reasonable numbers
async_op_callsites: Callsites<32>,
/// Set of callsites for spans representing async op poll operations
///
/// TODO: Take some time to determine more reasonable numbers
async_op_poll_callsites: Callsites<32>,
/// Set of callsites for events representing poll operation invocations on resources
///
/// TODO: Take some time to determine more reasonable numbers
poll_op_callsites: Callsites<32>,
/// Set of callsites for events representing state attribute state updates on resources
///
/// TODO: Take some time to determine more reasonable numbers
resource_state_update_callsites: Callsites<32>,
/// Set of callsites for events representing state attribute state updates on async resource ops
///
/// TODO: Take some time to determine more reasonable numbers
async_op_state_update_callsites: Callsites<32>,
max_poll_duration_nanos: u64,
}
#[derive(Debug)]
pub struct Builder {
/// Network Address the console server will listen on
server_addr: IpAddr,
/// Network Port the console server will listen on
server_port: u16,
/// Number of events that can be buffered before events are dropped.
///
/// A smaller number will reduce the memory footprint but may lead to more events being dropped
/// during activity bursts.
event_buffer_capacity: usize,
client_buffer_capacity: usize,
poll_duration_max: Duration,
}
impl Builder {
pub fn build(self) -> (ConsoleLayer, Server) {
ConsoleLayer::build(self)
}
}
impl Default for Builder {
fn default() -> Self {
Self {
// Listen on `::1` (aka localhost) by default
server_addr: Server::DEFAULT_ADDR,
server_port: Server::DEFAULT_PORT,
event_buffer_capacity: ConsoleLayer::DEFAULT_EVENT_BUFFER_CAPACITY,
client_buffer_capacity: 1024,
poll_duration_max: ConsoleLayer::DEFAULT_POLL_DURATION_MAX,
}
}
}
#[derive(Debug, Default)]
struct Shared {
dropped_tasks: AtomicUsize,
dropped_resources: AtomicUsize,
dropped_async_ops: AtomicUsize,
}
impl ConsoleLayer {
pub fn new() -> (Self, Server) {
Self::builder().build()
}
pub fn builder() -> Builder {
Builder::default()
}
fn build(config: Builder) -> (Self, Server) {
tracing::debug!(
?config.server_addr,
config.event_buffer_capacity,
"configured console subscriber"
);
let (tx, events) = crossbeam_channel::bounded(config.event_buffer_capacity);
let shared = Arc::new(Shared::default());
let (subscribe, rpcs) = async_channel::bounded(config.client_buffer_capacity);
let aggregator = Aggregator::new(shared.clone(), events, rpcs);
let server = Server::new(aggregator, config.client_buffer_capacity, subscribe);
let layer = Self {
current_spans: ThreadLocal::new(),
tx,
shared,
spawn_callsites: Callsites::default(),
waker_callsites: Callsites::default(),
resource_callsites: Callsites::default(),
async_op_callsites: Callsites::default(),
async_op_poll_callsites: Callsites::default(),
poll_op_callsites: Callsites::default(),
resource_state_update_callsites: Callsites::default(),
async_op_state_update_callsites: Callsites::default(),
max_poll_duration_nanos: config.poll_duration_max.as_nanos() as u64,
};
(layer, server)
}
}
impl ConsoleLayer {
const DEFAULT_EVENT_BUFFER_CAPACITY: usize = 1024;
const DEFAULT_CLIENT_BUFFER_CAPACITY: usize = 1024;
/// The default maximum value for task poll duration histograms.
///
/// Any poll duration exceeding this will be clamped to this value. By
/// default, the maximum poll duration is one second.
///
/// See also [`Builder::poll_duration_histogram_max`].
pub const DEFAULT_POLL_DURATION_MAX: Duration = Duration::from_secs(1);
fn is_spawn(&self, metadata: &Metadata<'static>) -> bool {
self.spawn_callsites.contains(metadata)
}
fn is_waker(&self, metadata: &Metadata<'static>) -> bool {
self.waker_callsites.contains(metadata)
}
fn is_resource(&self, meta: &'static Metadata<'static>) -> bool {
self.resource_callsites.contains(meta)
}
fn is_async_op(&self, meta: &'static Metadata<'static>) -> bool {
self.async_op_callsites.contains(meta)
}
fn is_id_spawned<S>(&self, id: &span::Id, cx: &Context<'_, S>) -> bool
where
S: Subscriber + for<'a> LookupSpan<'a>,
{
cx.span(id)
.map(|span| self.is_spawn(span.metadata()))
.unwrap_or(false)
}
fn is_id_resource<S>(&self, id: &span::Id, cx: &Context<'_, S>) -> bool
where
S: Subscriber + for<'a> LookupSpan<'a>,
{
cx.span(id)
.map(|span| self.is_resource(span.metadata()))
.unwrap_or(false)
}
fn is_id_async_op<S>(&self, id: &span::Id, cx: &Context<'_, S>) -> bool
where
S: Subscriber + for<'a> LookupSpan<'a>,
{
cx.span(id)
.map(|span| self.is_async_op(span.metadata()))
.unwrap_or(false)
}
fn first_entered<P>(&self, stack: &SpanStack, p: P) -> Option<span::Id>
where
P: Fn(&span::Id) -> bool,
{
stack
.stack()
.iter()
.rev()
.find(|id| p(id.id()))
.map(|id| id.id())
.cloned()
}
fn send_stats<S>(
&self,
dropped: &AtomicUsize,
mkEvent: impl FnOnce() -> (Event, S),
) -> Option<S> {
if self.tx.is_full() {
dropped.fetch_add(1, Ordering::Release);
return None;
}
let (event, stats) = mkEvent();
match self.tx.try_send(event) {
Ok(()) => Some(stats),
Err(TrySendError::Full(_)) => {
dropped.fetch_add(1, Ordering::Release);
None
}
Err(TrySendError::Disconnected(_)) => None,
}
}
fn send_metadata(&self, dropped: &AtomicUsize, event: Event) -> bool {
self.send_stats(dropped, || (event, ())).is_some()
}
}
impl<S> Layer<S> for ConsoleLayer
where
S: Subscriber + for<'a> LookupSpan<'a>,
{
fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest {
let dropped = match (metadata.name(), metadata.target()) {
(_, TaskVisitor::SPAWN_TARGET) | (TaskVisitor::SPAWN_NAME, _) => {
self.spawn_callsites.insert(metadata);
&self.shared.dropped_tasks
}
(_, WakerVisitor::WAKER_EVENT_TARGET) => {
self.waker_callsites.insert(metadata);
&self.shared.dropped_tasks
}
(ResourceVisitor::RES_SPAN_NAME, _) => {
self.resource_callsites.insert(metadata);
&self.shared.dropped_resources
}
(AsyncOpVisitor::ASYNC_OP_SPAN_NAME, _) => {
self.async_op_callsites.insert(metadata);
&self.shared.dropped_async_ops
}
(AsyncOpVisitor::ASYNC_OP_POLL_NAME, _) => {
self.async_op_poll_callsites.insert(metadata);
&self.shared.dropped_async_ops
}
(_, PollOpVisitor::POLL_OP_EVENT_TARGET) => {
self.poll_op_callsites.insert(metadata);
&self.shared.dropped_async_ops
}
(_, StateUpdateVisitor::RE_STATE_UPDATE_EVENT_TARGET) => {
self.resource_state_update_callsites.insert(metadata);
&self.shared.dropped_resources
}
(_, StateUpdateVisitor::AO_STATE_UPDATE_EVENT_TARGET) => {
self.async_op_state_update_callsites.insert(metadata);
&self.shared.dropped_async_ops
}
(_, _) => &self.shared.dropped_tasks,
};
self.send_metadata(dropped, Event::Metadata(metadata));
Interest::always()
}
fn on_new_span(&self, attrs: &Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
let metadata = attrs.metadata();
if self.is_spawn(metadata) {
let at = Instant::now();
let mut task_visitor = TaskVisitor::new(metadata.into());
attrs.record(&mut task_visitor);
let (fields, location) = task_visitor.result();
if let Some(stats) = self.send_stats(&self.shared.dropped_tasks, move || {
let stats = Arc::new(stats::TaskStats::new(self.max_poll_duration_nanos, at));
let event = Event::Spawn {
id: id.clone(),
stats: stats.clone(),
metadata,
fields,
location,
};
(event, stats)
}) {
ctx.span(id)
.expect("`on_new_span` called with nonexistent span. This is a tracing bug.")
.extensions_mut()
.insert(stats);
}
} else if self.is_resource(metadata) {
let at = Instant::now();
let mut resource_visitor = ResourceVisitor::default();
attrs.record(&mut resource_visitor);
if let Some(result) = resource_visitor.result() {
let ResourceVisitorResult {
concrete_type,
kind,
location,
is_internal,
inherit_child_attrs,
} = result;
let parent_id = self.current_spans.get().and_then(|stack| {
self.first_entered(&stack.borrow(), |id| self.is_id_resource(id, &ctx))
});
if let Some(stats) = self.send_stats(&self.shared.dropped_resources, move || {
let stats = Arc::new(stats::ResourceStats::new(
at,
inherit_child_attrs,
parent_id.clone(),
));
let event = Event::Resource {
id: id.clone(),
parent_id,
metadata,
concrete_type,
kind,
location,
is_internal,
stats: stats.clone(),
};
(event, stats)
}) {
ctx.span(id)
.expect("if `on_new_span` was called, the span must exist; this is a `tracing` bug!")
.extensions_mut()
.insert(stats);
}
}
} else if self.is_async_op(metadata) {
let at = Instant::now();
let mut async_op_visitor = AsyncOpVisitor::default();
attrs.record(&mut async_op_visitor);
if let Some((source, inherit_child_attrs)) = async_op_visitor.result() {
let resource_id = self.current_spans.get().and_then(|stack| {
self.first_entered(&stack.borrow(), |id| self.is_id_resource(id, &ctx))
});
let parent_id = self.current_spans.get().and_then(|stack| {
self.first_entered(&stack.borrow(), |id| self.is_id_async_op(id, &ctx))
});
if let Some(resource_id) = resource_id {
if let Some(stats) =
self.send_stats(&self.shared.dropped_async_ops, move || {
let stats = Arc::new(stats::AsyncOpStats::new(
at,
inherit_child_attrs,
parent_id.clone(),
));
let event = Event::AsyncResourceOp {
id: id.clone(),
parent_id,
resource_id,
metadata,
source,
stats: stats.clone(),
};
(event, stats)
})
{
ctx.span(id)
.expect("if `on_new_span` was called, the span must exist; this is a `tracing` bug!")
.extensions_mut()
.insert(stats);
}
}
}
}
}
fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) {
let metadata = event.metadata();
if self.waker_callsites.contains(metadata) {
let at = Instant::now();
let mut visitor = WakerVisitor::default();
event.record(&mut visitor);
if let Some((id, mut op)) = visitor.result() {
if let Some(span) = ctx.span(&id) {
let exts = span.extensions();
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
if op.is_wake() {
let self_wake = self
.current_spans
.get()
.map(|spans| spans.borrow().iter().any(|span| span == &id))
.unwrap_or(false);
op = op.self_wake(self_wake);
}
stats.record_wake_op(op, at);
}
}
}
} else if self.poll_op_callsites.contains(metadata) {
}
}
fn on_enter(&self, id: &span::Id, cx: Context<'_, S>) {
fn update<S: Subscriber + for<'a> LookupSpan<'a>>(
span: &SpanRef<S>,
at: Option<Instant>,
) -> Option<Instant> {
let exts = span.extensions();
// if the span we are entering is a task or async op, record the
// poll stats.
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
let at = at.unwrap_or_else(Instant::now);
stats.start_poll(at);
Some(at)
} else if let Some(stats) = exts.get::<Arc<stats::AsyncOpStats>>() {
let at = at.unwrap_or_else(Instant::now);
stats.start_poll(at);
Some(at)
// otherwise, is the span a resource? in that case, we also want
// to enter it, although we don't care about recording poll
// stats.
} else if exts.get::<Arc<stats::ResourceStats>>().is_some() {
Some(at.unwrap_or_else(Instant::now))
} else {
None
}
}
if let Some(span) = cx.span(id) {
if let Some(now) = update(&span, None) {
if let Some(parent) = span.parent() {
update(&parent, Some(now));
}
self.current_spans
.get_or_default()
.borrow_mut()
.push(id.clone());
}
}
}
fn on_exit(&self, id: &span::Id, cx: Context<'_, S>) {
fn update<S: Subscriber + for<'a> LookupSpan<'a>>(
span: &SpanRef<S>,
at: Option<Instant>,
) -> Option<Instant> {
let exts = span.extensions();
// if the span we are entering is a task or async op, record the
// poll stats.
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
let at = at.unwrap_or_else(Instant::now);
stats.end_poll(at);
Some(at)
} else if let Some(stats) = exts.get::<Arc<stats::AsyncOpStats>>() {
let at = at.unwrap_or_else(Instant::now);
stats.end_poll(at);
Some(at)
// otherwise, is the span a resource? in that case, we also want
// to enter it, although we don't care about recording poll
// stats.
} else if exts.get::<Arc<stats::ResourceStats>>().is_some() {
Some(at.unwrap_or_else(Instant::now))
} else {
None
}
}
if let Some(span) = cx.span(id) {
if let Some(now) = update(&span, None) {
if let Some(parent) = span.parent() {
update(&parent, Some(now));
}
self.current_spans.get_or_default().borrow_mut().pop(id);
}
}
}
fn on_close(&self, id: span::Id, cx: Context<'_, S>) {
if let Some(span) = cx.span(&id) {
let now = Instant::now();
let exts = span.extensions();
if let Some(stats) = exts.get::<Arc<stats::TaskStats>>() {
stats.drop_task(now);
} else if let Some(stats) = exts.get::<Arc<stats::AsyncOpStats>>() {
stats.drop_async_op(now);
} else if let Some(stats) = exts.get::<Arc<stats::ResourceStats>>() {
stats.drop_resource(now);
}
}
}
}

View File

@ -0,0 +1,223 @@
use crate::Aggregator;
use async_channel::{Receiver, Sender};
use async_compat::CompatExt;
use console_api::instrument;
use console_api::instrument::instrument_server::{Instrument, InstrumentServer};
use console_api::tasks;
use futures_util::TryStreamExt;
use std::error::Error;
use std::future::Future;
use std::io::IoSlice;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::AsyncRead as TokioAsyncRead;
use tokio::io::{AsyncWrite as TokioAsyncWrite, ReadBuf};
use tonic::transport::server::Connected;
use tonic::Status;
use tracing_core::span::Id;
struct StreamWrapper<T>(T);
impl<T> Connected for StreamWrapper<T> {
type ConnectInfo = ();
fn connect_info(&self) -> Self::ConnectInfo {
()
}
}
impl<T: TokioAsyncWrite + Unpin> TokioAsyncWrite for StreamWrapper<T> {
fn poll_write(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &[u8],
) -> Poll<Result<usize, std::io::Error>> {
TokioAsyncWrite::poll_write(Pin::new(&mut self.0), cx, buf)
}
fn poll_flush(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Result<(), std::io::Error>> {
TokioAsyncWrite::poll_flush(Pin::new(&mut self.0), cx)
}
fn poll_shutdown(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Result<(), std::io::Error>> {
TokioAsyncWrite::poll_shutdown(Pin::new(&mut self.0), cx)
}
fn poll_write_vectored(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
bufs: &[IoSlice<'_>],
) -> Poll<Result<usize, std::io::Error>> {
TokioAsyncWrite::poll_write_vectored(Pin::new(&mut self.0), cx, bufs)
}
fn is_write_vectored(&self) -> bool {
TokioAsyncWrite::is_write_vectored(&self.0)
}
}
impl<T: TokioAsyncRead + Unpin> TokioAsyncRead for StreamWrapper<T> {
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<std::io::Result<()>> {
TokioAsyncRead::poll_read(Pin::new(&mut self.0), cx, buf)
}
}
#[derive(Debug)]
pub struct Server {
pub aggregator: Option<Aggregator>,
client_buffer_size: usize,
subscribe: Sender<Command>,
}
impl Server {
//pub(crate) const DEFAULT_ADDR: IpAddr = IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1));
pub(crate) const DEFAULT_ADDR: IpAddr = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1));
pub(crate) const DEFAULT_PORT: u16 = 49289;
pub(crate) fn new(
aggregator: Aggregator,
client_buffer_size: usize,
subscribe: Sender<Command>,
) -> Self {
Self {
aggregator: Some(aggregator),
client_buffer_size,
subscribe,
}
}
pub async fn serve(
mut self, /*, incoming: I */
) -> Result<(), Box<dyn Error + Send + Sync + 'static>> {
let svc = InstrumentServer::new(self);
tonic::transport::Server::builder()
.add_service(svc)
.serve(SocketAddr::new(Self::DEFAULT_ADDR, Self::DEFAULT_PORT))
.compat()
.await?;
// TODO: Kill the aggregator task if the serve task has ended.
Ok(())
}
}
#[derive(Debug)]
pub(crate) struct Watch<T>(pub(crate) Sender<Result<T, tonic::Status>>);
impl<T: Clone> Watch<T> {
pub fn update(&self, update: &T) -> bool {
self.0.try_send(Ok(update.clone())).is_ok()
}
}
#[derive(Debug)]
pub(crate) struct WatchRequest<T> {
pub id: Id,
pub stream_sender: async_oneshot::Sender<Receiver<Result<T, tonic::Status>>>,
pub buffer: usize,
}
#[derive(Debug)]
pub(crate) enum Command {
Instrument(Watch<instrument::Update>),
WatchTaskDetail(WatchRequest<tasks::TaskDetails>),
Pause,
Resume,
}
#[tonic::async_trait]
impl Instrument for Server {
type WatchUpdatesStream = async_channel::Receiver<Result<instrument::Update, Status>>;
async fn watch_updates(
&self,
request: tonic::Request<instrument::InstrumentRequest>,
) -> Result<tonic::Response<Self::WatchUpdatesStream>, tonic::Status> {
match request.remote_addr() {
Some(addr) => tracing::debug!(client.addr = %addr, "starting a new watch"),
None => tracing::debug!(client.addr = %"<unknown>", "starting a new watch"),
}
if !self.subscribe.is_full() {
let (tx, rx) = async_channel::bounded(self.client_buffer_size);
self.subscribe.send(Command::Instrument(Watch(tx))).await;
tracing::debug!("watch started");
Ok(tonic::Response::new(rx))
} else {
Err(tonic::Status::internal(
"cannot start new watch, aggregation task is not running",
))
}
}
type WatchTaskDetailsStream = async_channel::Receiver<Result<tasks::TaskDetails, Status>>;
async fn watch_task_details(
&self,
request: tonic::Request<instrument::TaskDetailsRequest>,
) -> Result<tonic::Response<Self::WatchTaskDetailsStream>, tonic::Status> {
let task_id = request
.into_inner()
.id
.ok_or_else(|| tonic::Status::invalid_argument("missing task_id"))?
.id;
// `tracing` reserves span ID 0 for niche optimization for `Option<Id>`.
let id = std::num::NonZeroU64::new(task_id)
.map(Id::from_non_zero_u64)
.ok_or_else(|| tonic::Status::invalid_argument("task_id cannot be 0"))?;
if !self.subscribe.is_full() {
// Check with the aggregator task to request a stream if the task exists.
let (stream_sender, stream_recv) = async_oneshot::oneshot();
self.subscribe
.send(Command::WatchTaskDetail(WatchRequest {
id,
stream_sender,
buffer: self.client_buffer_size,
}))
.await;
// If the aggregator drops the sender, the task doesn't exist.
let rx = stream_recv.await.map_err(|_| {
tracing::warn!(id = ?task_id, "requested task not found");
tonic::Status::not_found("task not found")
})?;
tracing::debug!(id = ?task_id, "task details watch started");
Ok(tonic::Response::new(rx))
} else {
Err(tonic::Status::internal(
"cannot start new watch, aggregation task is not running",
))
}
}
async fn pause(
&self,
_request: tonic::Request<instrument::PauseRequest>,
) -> Result<tonic::Response<instrument::PauseResponse>, tonic::Status> {
self.subscribe.send(Command::Pause).await.map_err(|_| {
tonic::Status::internal("cannot pause, aggregation task is not running")
})?;
Ok(tonic::Response::new(instrument::PauseResponse {}))
}
async fn resume(
&self,
_request: tonic::Request<instrument::ResumeRequest>,
) -> Result<tonic::Response<instrument::ResumeResponse>, tonic::Status> {
self.subscribe.send(Command::Resume).await.map_err(|_| {
tonic::Status::internal("cannot resume, aggregation task is not running")
})?;
Ok(tonic::Response::new(instrument::ResumeResponse {}))
}
}

View File

@ -0,0 +1,64 @@
use tracing_core::span::Id;
// This has been copied from tracing-subscriber. Once the library adds
// the ability to iterate over entered spans, this code will
// no longer be needed here
//
// https://github.com/tokio-rs/tracing/blob/master/tracing-subscriber/src/registry/stack.rs
#[derive(Debug, Clone)]
pub(crate) struct ContextId {
id: Id,
duplicate: bool,
}
impl ContextId {
pub fn id(&self) -> &Id {
&self.id
}
}
/// `SpanStack` tracks what spans are currently executing on a thread-local basis.
///
/// A "separate current span" for each thread is a semantic choice, as each span
/// can be executing in a different thread.
#[derive(Debug, Default)]
pub(crate) struct SpanStack {
stack: Vec<ContextId>,
}
impl SpanStack {
#[inline]
pub(crate) fn push(&mut self, id: Id) -> bool {
let duplicate = self.stack.iter().any(|i| i.id == id);
self.stack.push(ContextId { id, duplicate });
!duplicate
}
/// Pop a currently entered span.
///
/// Returns `true` if the span was actually exited.
#[inline]
pub(crate) fn pop(&mut self, expected_id: &Id) -> bool {
if let Some((idx, _)) = self
.stack
.iter()
.enumerate()
.rev()
.find(|(_, ctx_id)| ctx_id.id == *expected_id)
{
let ContextId { id: _, duplicate } = self.stack.remove(idx);
return !duplicate;
}
false
}
pub(crate) fn iter(&self) -> impl Iterator<Item = &Id> {
self.stack
.iter()
.filter_map(|ContextId { id, duplicate }| if *duplicate { None } else { Some(id) })
}
pub(crate) fn stack(&self) -> &Vec<ContextId> {
&self.stack
}
}

View File

@ -0,0 +1,610 @@
use crate::id_map::ToProto;
use crate::{attribute, event};
use crossbeam_utils::atomic::AtomicCell;
use hdrhistogram::serialization::{Serializer, V2Serializer};
use std::cmp;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant, SystemTime};
use tracing::span;
/// A type which records whether it has unsent updates.
///
/// If something implementing this trait has been changed since the last time
/// data was sent to a client, it will indicate that it is "dirty". If it has
/// not been changed, it does not have to be included in the current update.
pub(crate) trait Unsent {
/// Returns `true` if this type has unsent updates, and if it does, clears
/// the flag indicating there are unsent updates.
///
/// This is called when filtering which stats need to be included in the
/// current update. If this returns `true`, it will be included, so it
/// becomes no longer dirty.
fn take_unsent(&self) -> bool;
/// Returns `true` if this type has unsent updates, without changing the
/// flag.
fn is_unsent(&self) -> bool;
}
// An entity (e.g Task, Resource) that at some point in
// time can be dropped. This generally refers to spans that
// have been closed indicating that a task, async op or a
// resource is not in use anymore
pub(crate) trait DroppedAt {
fn dropped_at(&self) -> Option<Instant>;
}
impl<T: DroppedAt> DroppedAt for Arc<T> {
fn dropped_at(&self) -> Option<Instant> {
T::dropped_at(self)
}
}
impl<T: Unsent> Unsent for Arc<T> {
fn take_unsent(&self) -> bool {
T::take_unsent(self)
}
fn is_unsent(&self) -> bool {
T::is_unsent(self)
}
}
impl<T: ToProto> ToProto for Arc<T> {
type Output = T::Output;
fn to_proto(&self, base_time: &TimeAnchor) -> T::Output {
T::to_proto(self, base_time)
}
}
/// Anchors an `Instant` with a `SystemTime` timestamp to allow converting
/// monotonic `Instant`s into timestamps that can be sent over the wire.
#[derive(Debug, Clone)]
pub(crate) struct TimeAnchor {
mono: Instant,
sys: SystemTime,
}
impl TimeAnchor {
pub(crate) fn new() -> Self {
Self {
mono: Instant::now(),
sys: SystemTime::now(),
}
}
pub(crate) fn to_system_time(&self, t: Instant) -> SystemTime {
let dur = t
.checked_duration_since(self.mono)
.unwrap_or_else(|| Duration::from_secs(0));
self.sys + dur
}
pub(crate) fn to_timestamp(&self, t: Instant) -> prost_types::Timestamp {
self.to_system_time(t).into()
}
}
trait RecordPoll {
fn record_poll_duration(&mut self, duration: Duration);
}
#[derive(Debug, Default)]
struct PollStats<H> {
/// The number of polls in progress
current_polls: AtomicUsize,
/// The total number of polls
polls: AtomicUsize,
timestamps: Mutex<PollTimestamps<H>>,
}
impl<H: RecordPoll> PollStats<H> {
fn start_poll(&self, at: Instant) {
if self.current_polls.fetch_add(1, Ordering::AcqRel) == 0 {
// We are starting the first poll
let mut timestamps = self.timestamps.lock().unwrap();
if timestamps.first_poll.is_none() {
timestamps.first_poll = Some(at);
}
timestamps.last_poll_started = Some(at);
self.polls.fetch_add(1, Ordering::Release);
}
}
fn end_poll(&self, at: Instant) {
// Are we ending the last current poll?
if self.current_polls.fetch_sub(1, Ordering::AcqRel) > 1 {
return;
}
let mut timestamps = self.timestamps.lock().unwrap();
let started = match timestamps.last_poll_started {
Some(last_poll) => last_poll,
None => {
eprintln!(
"a poll ended, but start timestamp was recorded. \
this is probably a `console-subscriber` bug"
);
return;
}
};
timestamps.last_poll_ended = Some(at);
let elapsed = match at.checked_duration_since(started) {
Some(elapsed) => elapsed,
None => {
eprintln!(
"possible Instant clock skew detected: a poll's end timestamp \
was before its start timestamp\nstart = {:?}\n end = {:?}",
started, at
);
return;
}
};
// if we have a poll time histogram, add the timestamp
timestamps.histogram.record_poll_duration(elapsed);
timestamps.busy_time += elapsed;
}
}
impl<H> ToProto for PollStats<H> {
type Output = console_api::PollStats;
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
let timestamps = self.timestamps.lock().unwrap();
console_api::PollStats {
polls: self.polls.load(Ordering::Acquire) as u64,
first_poll: timestamps.first_poll.map(|at| base_time.to_timestamp(at)),
last_poll_started: timestamps
.last_poll_started
.map(|at| base_time.to_timestamp(at)),
last_poll_ended: timestamps
.last_poll_ended
.map(|at| base_time.to_timestamp(at)),
busy_time: Some(timestamps.busy_time.into()),
}
}
}
/// Stats associated with a task.
#[derive(Debug)]
pub(crate) struct TaskStats {
is_dirty: AtomicBool,
is_dropped: AtomicBool,
// task stats
pub(crate) created_at: Instant,
timestamps: Mutex<TaskTimestamps>,
// waker stats
wakes: AtomicUsize,
waker_clones: AtomicUsize,
waker_drops: AtomicUsize,
self_wakes: AtomicUsize,
/// Poll durations and other stats.
poll_stats: PollStats<Histogram>,
}
impl TaskStats {
pub(crate) fn new(poll_duration_max: u64, created_at: Instant) -> Self {
Self {
is_dirty: AtomicBool::new(true),
is_dropped: AtomicBool::new(false),
created_at,
timestamps: Mutex::new(TaskTimestamps::default()),
poll_stats: PollStats {
timestamps: Mutex::new(PollTimestamps {
histogram: Histogram::new(poll_duration_max),
first_poll: None,
last_poll_started: None,
last_poll_ended: None,
busy_time: Duration::new(0, 0),
}),
current_polls: AtomicUsize::new(0),
polls: AtomicUsize::new(0),
},
wakes: AtomicUsize::new(0),
waker_clones: AtomicUsize::new(1),
waker_drops: AtomicUsize::new(0),
self_wakes: AtomicUsize::new(0),
}
}
pub(crate) fn poll_duration_histogram(
&self,
) -> console_api::tasks::task_details::PollTimesHistogram {
let hist = self
.poll_stats
.timestamps
.lock()
.unwrap()
.histogram
.to_proto();
console_api::tasks::task_details::PollTimesHistogram::Histogram(hist)
}
pub(crate) fn record_wake_op(&self, op: event::WakeOp, at: Instant) {
use event::WakeOp;
match op {
WakeOp::Wake { self_wake } => {
self.wake(at, self_wake);
}
WakeOp::WakeByRef { self_wake } => {
self.wake(at, self_wake);
}
WakeOp::Clone => {
self.waker_clones.fetch_add(1, Ordering::Release);
}
WakeOp::Drop => {
self.waker_drops.fetch_add(1, Ordering::Release);
}
}
self.make_dirty();
}
fn wake(&self, at: Instant, self_wake: bool) {
let mut timestamps = self.timestamps.lock().unwrap();
timestamps.last_wake = cmp::max(timestamps.last_wake, Some(at));
if self_wake {
self.wakes.fetch_add(1, Ordering::Release);
}
self.wakes.fetch_add(1, Ordering::Release);
}
pub(crate) fn start_poll(&self, at: Instant) {
self.poll_stats.start_poll(at);
self.make_dirty();
}
pub(crate) fn end_poll(&self, at: Instant) {
self.poll_stats.end_poll(at);
self.make_dirty();
}
pub(crate) fn drop_task(&self, dropped_at: Instant) {
if self.is_dropped.swap(true, Ordering::AcqRel) {
// The task was already dropped.
// TODO(eliza): this could maybe panic in debug mode...
return;
}
let mut timestamps = self.timestamps.lock().unwrap();
let _prev = timestamps.dropped_at.replace(dropped_at);
debug_assert_eq!(_prev, None, "tried to drop a task twice; this is a bug!");
self.make_dirty();
}
fn make_dirty(&self) {
self.is_dirty.swap(true, Ordering::AcqRel);
}
}
impl ToProto for TaskStats {
type Output = console_api::tasks::Stats;
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
let poll_stats = Some(self.poll_stats.to_proto(base_time));
let timestamps = self.timestamps.lock().unwrap();
console_api::tasks::Stats {
poll_stats,
created_at: Some(base_time.to_timestamp(self.created_at)),
dropped_at: timestamps.dropped_at.map(|at| base_time.to_timestamp(at)),
wakes: self.wakes.load(Ordering::Acquire) as u64,
waker_clones: self.waker_clones.load(Ordering::Acquire) as u64,
self_wakes: self.self_wakes.load(Ordering::Acquire) as u64,
waker_drops: self.waker_drops.load(Ordering::Acquire) as u64,
last_wake: timestamps.last_wake.map(|at| base_time.to_timestamp(at)),
}
}
}
impl Unsent for TaskStats {
#[inline]
fn take_unsent(&self) -> bool {
self.is_dirty.swap(false, Ordering::AcqRel)
}
fn is_unsent(&self) -> bool {
self.is_dirty.load(Ordering::Acquire)
}
}
impl DroppedAt for TaskStats {
fn dropped_at(&self) -> Option<Instant> {
// avoid acquiring the lock if we know we haven't tried to drop this
// thing yet
if self.is_dropped.load(Ordering::Acquire) {
return self.timestamps.lock().unwrap().dropped_at;
}
None
}
}
/// Stats associated with an async operation.
///
/// This shares all of the same fields as [`ResourceStats]`, with the addition
/// of [`PollStats`] tracking when the async operation is polled, and the task
/// ID of the last task to poll the async op.
#[derive(Debug)]
pub(crate) struct AsyncOpStats {
/// The task ID of the last task to poll this async op.
///
/// This is set every time the async op is polled, in case a future is
/// passed between tasks.
task_id: AtomicCell<u64>,
/// Fields shared with `ResourceStats`.
pub(crate) stats: ResourceStats,
/// Poll durations and other stats.
poll_stats: PollStats<()>,
}
impl AsyncOpStats {
pub(crate) fn new(
created_at: Instant,
inherit_child_attributes: bool,
parent_id: Option<span::Id>,
) -> Self {
Self {
task_id: AtomicCell::new(0),
stats: ResourceStats::new(created_at, inherit_child_attributes, parent_id),
poll_stats: PollStats::default(),
}
}
pub(crate) fn task_id(&self) -> Option<u64> {
let id = self.task_id.load();
if id > 0 {
Some(id as u64)
} else {
None
}
}
pub(crate) fn set_task_id(&self, id: &tracing::span::Id) {
self.task_id.store(id.into_u64());
self.make_dirty();
}
pub(crate) fn drop_async_op(&self, dropped_at: Instant) {
self.stats.drop_resource(dropped_at)
}
pub(crate) fn start_poll(&self, at: Instant) {
self.poll_stats.start_poll(at);
self.make_dirty();
}
pub(crate) fn end_poll(&self, at: Instant) {
self.poll_stats.end_poll(at);
self.make_dirty();
}
#[inline]
fn make_dirty(&self) {
self.stats.make_dirty()
}
}
impl Unsent for AsyncOpStats {
#[inline]
fn take_unsent(&self) -> bool {
self.stats.take_unsent()
}
#[inline]
fn is_unsent(&self) -> bool {
self.stats.is_unsent()
}
}
impl DroppedAt for AsyncOpStats {
fn dropped_at(&self) -> Option<Instant> {
self.stats.dropped_at()
}
}
impl ToProto for AsyncOpStats {
type Output = console_api::async_ops::Stats;
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
let attributes = self
.stats
.attributes
.lock()
.unwrap()
.values()
.cloned()
.collect();
console_api::async_ops::Stats {
poll_stats: Some(self.poll_stats.to_proto(base_time)),
created_at: Some(base_time.to_timestamp(self.stats.created_at)),
dropped_at: self
.stats
.dropped_at
.lock()
.unwrap()
.map(|at| base_time.to_timestamp(at)),
task_id: self.task_id().map(Into::into),
attributes,
}
}
}
/// Stats associated with a resource.
#[derive(Debug)]
pub(crate) struct ResourceStats {
is_dirty: AtomicBool,
is_dropped: AtomicBool,
created_at: Instant,
dropped_at: Mutex<Option<Instant>>,
attributes: Mutex<attribute::Attributes>,
pub(crate) inherit_child_attributes: bool,
pub(crate) parent_id: Option<span::Id>,
}
impl ResourceStats {
pub(crate) fn new(
created_at: Instant,
inherit_child_attributes: bool,
parent_id: Option<span::Id>,
) -> Self {
Self {
is_dirty: AtomicBool::new(true),
is_dropped: AtomicBool::new(false),
created_at,
dropped_at: Mutex::new(None),
attributes: Default::default(),
inherit_child_attributes,
parent_id,
}
}
pub(crate) fn update_attribute(&self, id: &span::Id, update: &attribute::Update) {
self.attributes.lock().unwrap().update(id, update);
self.make_dirty();
}
#[inline]
pub(crate) fn drop_resource(&self, dropped_at: Instant) {
if self.is_dropped.swap(true, Ordering::AcqRel) {
// The task was already dropped.
// TODO(eliza): this could maybe panic in debug mode...
return;
}
let mut timestamp = self.dropped_at.lock().unwrap();
let _prev = timestamp.replace(dropped_at);
debug_assert_eq!(
_prev, None,
"tried to drop a resource/async op twice; this is a bug!"
);
self.make_dirty();
}
#[inline]
fn make_dirty(&self) {
self.is_dirty.swap(true, Ordering::AcqRel);
}
}
impl Unsent for ResourceStats {
#[inline]
fn take_unsent(&self) -> bool {
self.is_dirty.swap(false, Ordering::AcqRel)
}
fn is_unsent(&self) -> bool {
self.is_dirty.load(Ordering::Acquire)
}
}
impl DroppedAt for ResourceStats {
fn dropped_at(&self) -> Option<Instant> {
// avoid acquiring the lock if we know we haven't tried to drop this
// thing yet
if self.is_dropped.load(Ordering::Acquire) {
return *self.dropped_at.lock().unwrap();
}
None
}
}
impl ToProto for ResourceStats {
type Output = console_api::resources::Stats;
fn to_proto(&self, base_time: &TimeAnchor) -> Self::Output {
let attributes = self.attributes.lock().unwrap().values().cloned().collect();
console_api::resources::Stats {
created_at: Some(base_time.to_timestamp(self.created_at)),
dropped_at: self
.dropped_at
.lock()
.unwrap()
.map(|at| base_time.to_timestamp(at)),
attributes,
}
}
}
#[derive(Debug, Default)]
struct TaskTimestamps {
dropped_at: Option<Instant>,
last_wake: Option<Instant>,
}
#[derive(Debug, Default)]
struct PollTimestamps<H> {
first_poll: Option<Instant>,
last_poll_started: Option<Instant>,
last_poll_ended: Option<Instant>,
busy_time: Duration,
histogram: H,
}
#[derive(Debug)]
struct Histogram {
histogram: hdrhistogram::Histogram<u64>,
max: u64,
outliers: u64,
max_outlier: Option<u64>,
}
impl Histogram {
fn new(max: u64) -> Self {
// significant figures should be in the [0-5] range and memory usage
// grows exponentially with higher a sigfig
let histogram = hdrhistogram::Histogram::new_with_max(max, 2).unwrap();
Self {
histogram,
max,
max_outlier: None,
outliers: 0,
}
}
fn to_proto(&self) -> console_api::tasks::DurationHistogram {
let mut serializer = V2Serializer::new();
let mut raw_histogram = Vec::new();
serializer
.serialize(&self.histogram, &mut raw_histogram)
.expect("histogram failed to serialize");
console_api::tasks::DurationHistogram {
raw_histogram,
max_value: self.max,
high_outliers: self.outliers,
highest_outlier: self.max_outlier,
}
}
}
impl RecordPoll for Histogram {
fn record_poll_duration(&mut self, duration: Duration) {
let mut duration_ns = duration.as_nanos() as u64;
// clamp the duration to the histogram's max value
if duration_ns > self.max {
self.outliers += 1;
self.max_outlier = cmp::max(self.max_outlier, Some(duration_ns));
duration_ns = self.max;
}
self.histogram
.record(duration_ns)
.expect("duration has already been clamped to histogram max value")
}
}
impl RecordPoll for () {
fn record_poll_duration(&mut self, _: Duration) {
// do nothing
}
}

View File

@ -0,0 +1,535 @@
use crate::{attribute, event};
use console_api::resources::resource;
use tracing::{field, span};
use tracing_core::field::Visit;
const LOCATION_FILE: &str = "loc.file";
const LOCATION_LINE: &str = "loc.line";
const LOCATION_COLUMN: &str = "loc.col";
const INHERIT_FIELD_NAME: &str = "inherits_child_attrs";
/// Used to extract the fields needed to construct
/// an Event::Resource from the metadata of a tracing span
/// that has the following shape:
///
/// tracing::trace_span!(
/// "runtime.resource",
/// concrete_type = "Sleep",
/// kind = "timer",
/// is_internal = true,
/// inherits_child_attrs = true,
/// );
///
/// Fields:
/// concrete_type - indicates the concrete rust type for this resource
/// kind - indicates the type of resource (i.e. timer, sync, io )
/// is_internal - whether this is a resource type that is not exposed publicly (i.e. BatchSemaphore)
/// inherits_child_attrs - whether this resource should inherit the state attributes of its children
#[derive(Default)]
pub(crate) struct ResourceVisitor {
concrete_type: Option<String>,
kind: Option<resource::Kind>,
is_internal: bool,
inherit_child_attrs: bool,
line: Option<u32>,
file: Option<String>,
column: Option<u32>,
}
pub(crate) struct ResourceVisitorResult {
pub(crate) concrete_type: String,
pub(crate) kind: resource::Kind,
pub(crate) location: Option<console_api::Location>,
pub(crate) is_internal: bool,
pub(crate) inherit_child_attrs: bool,
}
/// Used to extract all fields from the metadata
/// of a tracing span
pub(crate) struct FieldVisitor {
fields: Vec<console_api::Field>,
meta_id: console_api::MetaId,
}
/// Used to extract the fields needed to construct
/// an `Event::Spawn` from the metadata of a tracing span
/// that has the following shape:
///
/// ```
/// tracing::trace_span!(
/// target: "tokio::task",
/// "runtime.spawn",
/// kind = "local",
/// task.name = "some_name",
/// loc.file = "some_file.rs",
/// loc.line = 555,
/// loc.col = 5,
/// );
/// ```
///
/// # Fields
///
/// This visitor has special behavior for `loc.line`, `loc.file`, and `loc.col`
/// fields, which are interpreted as a Rust source code location where the task
/// was spawned, if they are present. Other fields are recorded as arbitrary
/// key-value pairs.
pub(crate) struct TaskVisitor {
field_visitor: FieldVisitor,
line: Option<u32>,
file: Option<String>,
column: Option<u32>,
}
/// Used to extract the fields needed to construct
/// an Event::AsyncOp from the metadata of a tracing span
/// that has the following shape:
///
/// tracing::trace_span!(
/// "runtime.resource.async_op",
/// source = "Sleep::new_timeout",
/// );
///
/// Fields:
/// source - the method which has created an instance of this async operation
#[derive(Default)]
pub(crate) struct AsyncOpVisitor {
source: Option<String>,
inherit_child_attrs: bool,
}
/// Used to extract the fields needed to construct
/// an Event::Waker from the metadata of a tracing span
/// that has the following shape:
///
/// tracing::trace!(
/// target: "tokio::task::waker",
/// op = "waker.clone",
/// task.id = id.into_u64(),
/// );
///
/// Fields:
/// task.id - the id of the task this waker will wake
/// op - the operation associated with this waker event
#[derive(Default)]
pub(crate) struct WakerVisitor {
id: Option<span::Id>,
op: Option<event::WakeOp>,
}
/// Used to extract the fields needed to construct
/// an Event::PollOp from the metadata of a tracing event
/// that has the following shape:
///
/// tracing::trace!(
/// target: "runtime::resource::poll_op",
/// op_name = "poll_elapsed",
/// readiness = "pending"
/// );
///
/// Fields:
/// op_name - the name of this resource poll operation
/// readiness - the result of invoking this poll op, describing its readiness
#[derive(Default)]
pub(crate) struct PollOpVisitor {
op_name: Option<String>,
is_ready: Option<bool>,
}
/// Used to extract the fields needed to construct
/// an Event::StateUpdate from the metadata of a tracing event
/// that has the following shape:
///
/// tracing::trace!(
/// target: "runtime::resource::state_update",
/// duration = duration,
/// duration.unit = "ms",
/// duration.op = "override",
/// );
///
/// Fields:
/// attribute_name - a field value for a field that has the name of the resource attribute being updated
/// value - the value for this update
/// unit - the unit for the value being updated (e.g. ms, s, bytes)
/// op - the operation that this update performs to the value of the resource attribute (one of: ovr, sub, add)
pub(crate) struct StateUpdateVisitor {
meta_id: console_api::MetaId,
field: Option<console_api::Field>,
unit: Option<String>,
op: Option<attribute::UpdateOp>,
}
impl ResourceVisitor {
pub(crate) const RES_SPAN_NAME: &'static str = "runtime.resource";
const RES_CONCRETE_TYPE_FIELD_NAME: &'static str = "concrete_type";
const RES_VIZ_FIELD_NAME: &'static str = "is_internal";
const RES_KIND_FIELD_NAME: &'static str = "kind";
const RES_KIND_TIMER: &'static str = "timer";
pub(crate) fn result(self) -> Option<ResourceVisitorResult> {
let concrete_type = self.concrete_type?;
let kind = self.kind?;
let location = if self.file.is_some() && self.line.is_some() && self.column.is_some() {
Some(console_api::Location {
file: self.file,
line: self.line,
column: self.column,
..Default::default()
})
} else {
None
};
Some(ResourceVisitorResult {
concrete_type,
kind,
location,
is_internal: self.is_internal,
inherit_child_attrs: self.inherit_child_attrs,
})
}
}
impl Visit for ResourceVisitor {
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {}
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
match field.name() {
Self::RES_CONCRETE_TYPE_FIELD_NAME => self.concrete_type = Some(value.to_string()),
Self::RES_KIND_FIELD_NAME => {
let kind = Some(match value {
Self::RES_KIND_TIMER => {
resource::kind::Kind::Known(resource::kind::Known::Timer as i32)
}
other => resource::kind::Kind::Other(other.to_string()),
});
self.kind = Some(resource::Kind { kind });
}
LOCATION_FILE => self.file = Some(value.to_string()),
_ => {}
}
}
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
match field.name() {
Self::RES_VIZ_FIELD_NAME => self.is_internal = value,
INHERIT_FIELD_NAME => self.inherit_child_attrs = value,
_ => {}
}
}
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
match field.name() {
LOCATION_LINE => self.line = Some(value as u32),
LOCATION_COLUMN => self.column = Some(value as u32),
_ => {}
}
}
}
impl FieldVisitor {
pub(crate) fn new(meta_id: console_api::MetaId) -> Self {
FieldVisitor {
fields: Vec::default(),
meta_id,
}
}
pub(crate) fn result(self) -> Vec<console_api::Field> {
self.fields
}
}
impl TaskVisitor {
pub(crate) const SPAWN_TARGET: &'static str = "executor::task";
pub(crate) const SPAWN_NAME: &'static str = "runtime.spawn";
pub(crate) fn new(meta_id: console_api::MetaId) -> Self {
TaskVisitor {
field_visitor: FieldVisitor::new(meta_id),
line: None,
file: None,
column: None,
}
}
pub(crate) fn result(self) -> (Vec<console_api::Field>, Option<console_api::Location>) {
let fields = self.field_visitor.result();
let location = if self.file.is_some() && self.line.is_some() && self.column.is_some() {
Some(console_api::Location {
file: self.file,
line: self.line,
column: self.column,
..Default::default()
})
} else {
None
};
(fields, location)
}
}
impl Visit for TaskVisitor {
fn record_debug(&mut self, field: &field::Field, value: &dyn std::fmt::Debug) {
self.field_visitor.record_debug(field, value);
}
fn record_i64(&mut self, field: &tracing_core::Field, value: i64) {
self.field_visitor.record_i64(field, value);
}
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
match field.name() {
LOCATION_LINE => self.line = Some(value as u32),
LOCATION_COLUMN => self.column = Some(value as u32),
_ => self.field_visitor.record_u64(field, value),
}
}
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
self.field_visitor.record_bool(field, value);
}
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
if field.name() == LOCATION_FILE {
self.file = Some(value.to_string());
} else {
self.field_visitor.record_str(field, value);
}
}
}
impl Visit for FieldVisitor {
fn record_debug(&mut self, field: &field::Field, value: &dyn std::fmt::Debug) {
self.fields.push(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
fn record_i64(&mut self, field: &tracing_core::Field, value: i64) {
self.fields.push(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
self.fields.push(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
self.fields.push(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
self.fields.push(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
}
impl AsyncOpVisitor {
pub(crate) const ASYNC_OP_SPAN_NAME: &'static str = "runtime.resource.async_op";
pub(crate) const ASYNC_OP_POLL_NAME: &'static str = "runtime.resource.async_op.poll";
const ASYNC_OP_SRC_FIELD_NAME: &'static str = "source";
pub(crate) fn result(self) -> Option<(String, bool)> {
let inherit = self.inherit_child_attrs;
self.source.map(|s| (s, inherit))
}
}
impl Visit for AsyncOpVisitor {
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {}
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
if field.name() == Self::ASYNC_OP_SRC_FIELD_NAME {
self.source = Some(value.to_string());
}
}
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
if field.name() == INHERIT_FIELD_NAME {
self.inherit_child_attrs = value;
}
}
}
impl WakerVisitor {
pub(crate) const WAKER_EVENT_TARGET: &'static str = "executor::waker";
const WAKE: &'static str = "waker.wake";
const WAKE_BY_REF: &'static str = "waker.wake_by_ref";
const CLONE: &'static str = "waker.clone";
const DROP: &'static str = "waker.drop";
const TASK_ID_FIELD_NAME: &'static str = "task.id";
pub(crate) fn result(self) -> Option<(span::Id, event::WakeOp)> {
self.id.zip(self.op)
}
}
impl Visit for WakerVisitor {
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {
// don't care (yet?)
}
fn record_u64(&mut self, field: &tracing_core::Field, value: u64) {
if field.name() == Self::TASK_ID_FIELD_NAME {
self.id = Some(span::Id::from_u64(value));
}
}
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
use crate::event::WakeOp;
if field.name() == "op" {
self.op = Some(match value {
Self::WAKE => WakeOp::Wake { self_wake: false },
Self::WAKE_BY_REF => WakeOp::WakeByRef { self_wake: false },
Self::CLONE => WakeOp::Clone,
Self::DROP => WakeOp::Drop,
_ => return,
});
}
}
}
impl PollOpVisitor {
pub(crate) const POLL_OP_EVENT_TARGET: &'static str = "runtime::resource::poll_op";
const OP_NAME_FIELD_NAME: &'static str = "op_name";
const OP_READINESS_FIELD_NAME: &'static str = "is_ready";
pub(crate) fn result(self) -> Option<(String, bool)> {
let op_name = self.op_name?;
let is_ready = self.is_ready?;
Some((op_name, is_ready))
}
}
impl Visit for PollOpVisitor {
fn record_debug(&mut self, _: &field::Field, _: &dyn std::fmt::Debug) {}
fn record_bool(&mut self, field: &tracing_core::Field, value: bool) {
if field.name() == Self::OP_READINESS_FIELD_NAME {
self.is_ready = Some(value)
}
}
fn record_str(&mut self, field: &tracing_core::Field, value: &str) {
if field.name() == Self::OP_NAME_FIELD_NAME {
self.op_name = Some(value.to_string());
}
}
}
impl StateUpdateVisitor {
pub(crate) const RE_STATE_UPDATE_EVENT_TARGET: &'static str = "runtime::resource::state_update";
pub(crate) const AO_STATE_UPDATE_EVENT_TARGET: &'static str =
"runtime::resource::async_op::state_update";
const STATE_OP_SUFFIX: &'static str = ".op";
const STATE_UNIT_SUFFIX: &'static str = ".unit";
const OP_ADD: &'static str = "add";
const OP_SUB: &'static str = "sub";
const OP_OVERRIDE: &'static str = "override";
pub(crate) fn new(meta_id: console_api::MetaId) -> Self {
StateUpdateVisitor {
meta_id,
field: None,
unit: None,
op: None,
}
}
pub(crate) fn result(self) -> Option<attribute::Update> {
Some(attribute::Update {
field: self.field?,
op: self.op,
unit: self.unit,
})
}
}
impl Visit for StateUpdateVisitor {
fn record_debug(&mut self, field: &field::Field, value: &dyn std::fmt::Debug) {
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
{
self.field = Some(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
}
fn record_i64(&mut self, field: &field::Field, value: i64) {
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
{
self.field = Some(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
}
fn record_u64(&mut self, field: &field::Field, value: u64) {
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
{
self.field = Some(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
}
fn record_bool(&mut self, field: &field::Field, value: bool) {
if !field.name().ends_with(Self::STATE_OP_SUFFIX)
&& !field.name().ends_with(Self::STATE_UNIT_SUFFIX)
{
self.field = Some(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
}
fn record_str(&mut self, field: &field::Field, value: &str) {
if field.name().ends_with(Self::STATE_OP_SUFFIX) {
match value {
Self::OP_ADD => self.op = Some(attribute::UpdateOp::Add),
Self::OP_SUB => self.op = Some(attribute::UpdateOp::Sub),
Self::OP_OVERRIDE => self.op = Some(attribute::UpdateOp::Override),
_ => {}
};
} else if field.name().ends_with(Self::STATE_UNIT_SUFFIX) {
self.unit = Some(value.to_string());
} else {
self.field = Some(console_api::Field {
name: Some(field.name().into()),
value: Some(value.into()),
metadata_id: Some(self.meta_id.clone()),
});
}
}
}

View File

@ -39,14 +39,21 @@ lazy_static = "1.4"
libc = "0.2"
num_cpus = "1.13"
pin-utils = "0.1.0"
slab = "0.4"
parking_lot = "0.12"
# Allocator
arrayvec = { version = "0.7.0" }
futures-timer = "3.0.2"
once_cell = "1.4.0"
tracing = "0.1.19"
crossbeam-queue = "0.3.0"
hdrhistogram = "7.5"
# Stats & Tracing
tracing = "0.1"
# Supervision trees
sharded-slab = "0.1"
thread_local = "1.1"
[dev-dependencies]
async-std = "1.10.0"

View File

@ -32,6 +32,7 @@ pub mod manage;
pub mod placement;
pub mod pool;
pub mod run;
mod supervision;
mod thread_manager;
mod worker;
@ -39,4 +40,6 @@ mod worker;
/// Prelude of Bastion Executor
pub mod prelude {
pub use crate::pool::*;
pub use crate::supervision::SupervisionRegistry;
pub use lightproc::GroupId;
}

View File

@ -8,11 +8,13 @@
//! [`Worker`]: crate::run_queue::Worker
use crate::run::block;
use crate::supervision::SupervisionRegistry;
use crate::thread_manager::{DynamicRunner, ThreadManager};
use crate::worker::{Sleeper, WorkerThread};
use crossbeam_deque::{Injector, Stealer};
use lightproc::lightproc::LightProc;
use lightproc::recoverable_handle::RecoverableHandle;
use lightproc::GroupId;
use std::cell::Cell;
use std::future::Future;
use std::iter::Iterator;
@ -20,6 +22,9 @@ use std::marker::PhantomData;
use std::mem::MaybeUninit;
use std::sync::Arc;
use std::time::Duration;
use tracing::field::FieldSet;
use tracing::metadata::Kind;
use tracing::{Instrument, Level, Span};
#[derive(Debug)]
struct Spooler<'a> {
@ -45,12 +50,19 @@ impl Spooler<'_> {
/// Global executor
pub struct Executor<'a> {
spooler: Arc<Spooler<'a>>,
root_cgroup: GroupId,
}
impl<'a, 'executor: 'a> Executor<'executor> {
pub fn new() -> Self {
let root_cgroup = SupervisionRegistry::with(|registry| {
let cgroup = registry.new_root_group();
registry.set_current(&cgroup);
cgroup
});
Executor {
spooler: Arc::new(Spooler::new()),
root_cgroup,
}
}
@ -94,22 +106,75 @@ impl<'a, 'executor: 'a> Executor<'executor> {
/// );
/// # }
/// ```
#[track_caller]
pub fn spawn<F, R>(&self, future: F) -> RecoverableHandle<R>
where
F: Future<Output = R> + Send + 'a,
R: Send + 'a,
{
let (task, handle) = LightProc::recoverable(future, self.schedule());
let location = std::panic::Location::caller();
let cgroup = SupervisionRegistry::current();
let id = cgroup.as_ref().map(|id| id.into_u64()).unwrap_or(0);
let span = tracing::trace_span!(
target: "executor::task",
"runtime.spawn",
loc.file = location.file(),
loc.line = location.line(),
loc.col = location.column(),
kind = "global",
cgroup = id,
);
let (task, handle) = LightProc::recoverable(future, self.schedule(), span, cgroup);
tracing::trace!("spawning sendable task");
task.schedule();
handle
}
#[track_caller]
pub fn spawn_local<F, R>(&self, future: F) -> RecoverableHandle<R>
where
F: Future<Output = R> + 'a,
R: Send + 'a,
{
let (task, handle) = LightProc::recoverable(future, schedule_local());
let location = std::panic::Location::caller();
let cgroup = SupervisionRegistry::current();
let id = cgroup.as_ref().map(|id| id.into_u64()).unwrap_or(0);
let span = tracing::trace_span!(
target: "executor::task",
"runtime.spawn",
loc.file = location.file(),
loc.line = location.line(),
loc.col = location.column(),
kind = "local",
cgroup = id,
);
let (task, handle) = LightProc::recoverable(future, schedule_local(), span, cgroup);
tracing::trace!("spawning sendable task");
task.schedule();
handle
}
#[track_caller]
pub fn spawn_local_cgroup<F, R>(&self, future: F, cgroup: GroupId) -> RecoverableHandle<R>
where
F: Future<Output = R> + 'a,
R: Send + 'a,
{
let location = std::panic::Location::caller();
let span = tracing::trace_span!(
target: "executor::task",
"runtime.spawn",
loc.file = location.file(),
loc.line = location.line(),
loc.col = location.column(),
kind = "local",
cgroup = cgroup.into_u64(),
);
let (task, handle) = LightProc::recoverable(future, schedule_local(), span, Some(cgroup));
tracing::trace!("spawning sendable task");
task.schedule();
handle
}

View File

@ -0,0 +1,179 @@
use lightproc::GroupId;
use once_cell::sync::OnceCell;
use sharded_slab::pool::Ref;
use sharded_slab::{Clear, Pool};
use std::borrow::Borrow;
use std::cell;
use std::cell::RefCell;
use std::sync::atomic::{fence, AtomicUsize, Ordering};
use thread_local::ThreadLocal;
static REGISTRY: OnceCell<SupervisionRegistry> = OnceCell::new();
fn id_to_idx(id: &GroupId) -> usize {
(id.into_u64() as usize).wrapping_sub(1)
}
fn idx_to_id(idx: usize) -> GroupId {
GroupId::from_u64(idx.wrapping_add(1) as u64)
}
pub struct SupervisionRegistry {
groups: Pool<GroupInner>,
// TODO: would this be better as the full stack?
current: ThreadLocal<RefCell<GroupId>>,
}
impl SupervisionRegistry {
fn new() -> Self {
Self {
groups: Pool::new(),
current: ThreadLocal::new(),
}
}
pub fn with<T>(f: impl FnOnce(&Self) -> T) -> T {
let this = REGISTRY.get_or_init(SupervisionRegistry::new);
f(&this)
}
pub(crate) fn get(&self, id: &GroupId) -> Option<Ref<'_, GroupInner>> {
self.groups.get(id_to_idx(id))
}
#[inline]
pub fn current_ref(&self) -> Option<cell::Ref<GroupId>> {
self.current.get().map(|c| c.borrow())
}
pub fn current() -> Option<GroupId> {
Self::with(|this| this.current_ref().map(|id| this.clone_group(&id)))
}
pub(crate) fn set_current(&self, id: &GroupId) {
self.current.get_or(|| RefCell::new(id.clone()));
}
pub fn new_root_group(&self) -> GroupId {
self.new_group_inner(None)
}
pub fn new_group(&self) -> GroupId {
let parent = self.current_ref().map(|id| self.clone_group(&id));
self.new_group_inner(parent)
}
fn new_group_inner(&self, parent: Option<GroupId>) -> GroupId {
tracing::trace_span!(
target: "executor::supervision",
"new_group"
);
let parent_id = parent.as_ref().map(|id| id.into_non_zero_u64());
let idx = self
.groups
.create_with(|group| {
group.parent = parent;
let ref_cnt = group.ref_count.get_mut();
debug_assert_eq!(0, *ref_cnt);
*ref_cnt = 1;
})
.expect("Failed to allocate a new group");
let id = idx_to_id(idx);
tracing::trace!(
target: "executor::supervision",
parent = parent_id,
id = id.into_non_zero_u64(),
"process group created"
);
id
}
fn clone_group(&self, id: &GroupId) -> GroupId {
tracing::trace!(
target: "executor::supervision",
id = id.into_u64(),
"cloning process group"
);
let group = self
.get(&id)
.unwrap_or_else(|| panic!("tried to clone group {:?}, but no such group exists!", id));
let ref_cnt = group.ref_count.fetch_add(1, Ordering::Relaxed);
assert_ne!(
0, ref_cnt,
"tried cloning group {:?} that was already closed",
id
);
id.clone()
}
/// Try to close the group with the given ID
///
/// If this method returns `true` the Group was closed. Otherwise there are still references
/// left open.
fn try_close(&self, id: GroupId) -> bool {
tracing::trace!(
target: "executor::supervision",
id = id.into_u64(),
"dropping process group"
);
let group = match self.get(&id) {
None if std::thread::panicking() => return false,
None => panic!("tried to drop a ref to {:?}, but no such group exists!", id),
Some(group) => group,
};
// Reference count *decreases* on the other hand must observe strong ordering — when
let remaining = group.ref_count.fetch_sub(1, Ordering::Release);
if !std::thread::panicking() {
assert!(remaining < usize::MAX, "group reference count overflow");
}
if remaining > 1 {
return false;
}
// Generate a compiler fence making sure that all other calls to `try_close` are finished
// before the one that returns `true`.
fence(Ordering::Acquire);
true
}
}
#[derive(Debug)]
pub(crate) struct GroupInner {
parent: Option<GroupId>,
ref_count: AtomicUsize,
}
impl GroupInner {
#[inline]
/// Increment the reference count of this group and return the previous value
fn increment_refcnt(&self) -> usize {
// Reference count increases don't need strong ordering. The increments can be done in
// any order as long as they *do* happen.
self.ref_count.fetch_add(1, Ordering::Relaxed)
}
}
impl Default for GroupInner {
fn default() -> Self {
Self {
parent: None,
ref_count: AtomicUsize::new(0),
}
}
}
impl Clear for GroupInner {
fn clear(&mut self) {
// A group is always alive as long as at least one of its children is alive. So each
// Group holds a reference to its parent if it has one. If a group is being deleted this
// reference must be closed too, i.e. the parent reference count reduced by one.
if let Some(parent) = self.parent.take() {
SupervisionRegistry::with(|reg| reg.try_close(parent));
}
}
}

View File

@ -15,6 +15,7 @@ crossbeam-utils = "0.8"
pin-utils = "0.1.0"
bitfield = "0.13.2"
bitflags = "1.3.2"
tracing = "0.1"
[dev-dependencies]
crossbeam = "0.8"

View File

@ -16,9 +16,9 @@
#![forbid(missing_docs)]
#![forbid(missing_debug_implementations)]
#![forbid(unused_import_braces)]
#![forbid(unused_imports)]
#![warn(unused_imports)]
#![forbid(unused_must_use)]
#![forbid(unused_variables)]
//TODO: reenable #![forbid(unused_variables)]
mod catch_unwind;
mod layout_helpers;
@ -33,6 +33,8 @@ pub mod lightproc;
pub mod proc_handle;
pub mod recoverable_handle;
pub use proc_data::GroupId;
/// The lightproc prelude.
///
/// The prelude re-exports lightproc structs and handles from this crate.

View File

@ -31,11 +31,13 @@ use crate::proc_ext::ProcFutureExt;
use crate::proc_handle::ProcHandle;
use crate::raw_proc::RawProc;
use crate::recoverable_handle::RecoverableHandle;
use crate::GroupId;
use std::fmt::{self, Debug, Formatter};
use std::future::Future;
use std::mem;
use std::mem::ManuallyDrop;
use std::panic::AssertUnwindSafe;
use std::ptr::NonNull;
use tracing::Span;
/// Shared functionality for both Send and !Send LightProc
pub struct LightProc {
@ -45,8 +47,8 @@ pub struct LightProc {
// LightProc is both Sync and Send because it explicitly handles synchronization internally:
// The state of a `LightProc` is only modified atomically guaranteeing a consistent view from all
// threads. Existing handles are atomically reference counted so the proc itself will not be dropped
// until all pointers to it are themselves dropped.
// threads. Existing wakers (and the proc_handle) are atomically reference counted so the proc
// itself will not be dropped until all pointers to it are themselves dropped.
// However, if the future or result inside the LightProc is !Send the executor must ensure that
// the `schedule` function does not move the LightProc to a different thread.
unsafe impl Send for LightProc {}
@ -76,14 +78,19 @@ impl LightProc {
/// println!("future panicked!: {}", &reason);
/// });
/// ```
pub fn recoverable<'a, F, R, S>(future: F, schedule: S) -> (Self, RecoverableHandle<R>)
pub fn recoverable<'a, F, R, S>(
future: F,
schedule: S,
span: Span,
cgroup: Option<GroupId>,
) -> (Self, RecoverableHandle<R>)
where
F: Future<Output = R> + 'a,
R: 'a,
S: Fn(LightProc) + 'a,
{
let recovery_future = AssertUnwindSafe(future).catch_unwind();
let (proc, handle) = Self::build(recovery_future, schedule);
let (proc, handle) = Self::build(recovery_future, schedule, span, cgroup);
(proc, RecoverableHandle::new(handle))
}
@ -92,6 +99,7 @@ impl LightProc {
///
/// # Example
/// ```rust
/// # use tracing::Span;
/// # use lightproc::prelude::*;
/// #
/// # // ... future that does work
@ -113,15 +121,22 @@ impl LightProc {
/// let standard = LightProc::build(
/// future,
/// schedule_function,
/// Span::current(),
/// None,
/// );
/// ```
pub fn build<'a, F, R, S>(future: F, schedule: S) -> (Self, ProcHandle<R>)
pub fn build<'a, F, R, S>(
future: F,
schedule: S,
span: Span,
cgroup: Option<GroupId>,
) -> (Self, ProcHandle<R>)
where
F: Future<Output = R> + 'a,
R: 'a,
S: Fn(LightProc) + 'a,
{
let raw_proc = RawProc::allocate(future, schedule);
let raw_proc = RawProc::allocate(future, schedule, span, cgroup);
let proc = LightProc { raw_proc };
let handle = ProcHandle::new(raw_proc);
(proc, handle)
@ -130,9 +145,9 @@ impl LightProc {
///
/// Schedule the lightweight process with passed `schedule` function at the build time.
pub fn schedule(self) {
let ptr = self.raw_proc.as_ptr();
let this = ManuallyDrop::new(self);
let ptr = this.raw_proc.as_ptr();
let pdata = ptr as *const ProcData;
mem::forget(self);
unsafe {
((*pdata).vtable.schedule)(ptr);
@ -144,9 +159,9 @@ impl LightProc {
/// "Running" a lightproc means ticking it once and if it doesn't complete
/// immediately re-scheduling it as soon as it's Waker wakes it back up.
pub fn run(self) {
let ptr = self.raw_proc.as_ptr();
let this = ManuallyDrop::new(self);
let ptr = this.raw_proc.as_ptr();
let pdata = ptr as *const ProcData;
mem::forget(self);
unsafe {
((*pdata).vtable.tick)(ptr);

View File

@ -3,8 +3,49 @@ use crate::state::*;
use crossbeam_utils::Backoff;
use std::cell::Cell;
use std::fmt::{self, Debug, Formatter};
use std::num::NonZeroU64;
use std::sync::atomic::Ordering;
use std::task::Waker;
use tracing::Span;
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
#[repr(transparent)]
/// Opaque id of the group this proc belongs to
pub struct GroupId(NonZeroU64);
impl GroupId {
/// Construct an ID from an u64
///
/// # Panics
/// - if the provided `u64` is `0`.
pub fn from_u64(i: u64) -> Self {
Self(NonZeroU64::new(i).expect("group id must be > 0"))
}
#[inline]
/// Construct an ID from a NonZeroU64
///
/// This method can't fail
pub const fn from_non_zero_u64(i: NonZeroU64) -> Self {
Self(i)
}
#[allow(clippy::wrong_self_convention)]
//noinspection RsSelfConvention
#[inline]
/// Convert a GroupId into a u64
pub const fn into_u64(&self) -> u64 {
self.0.get()
}
#[allow(clippy::wrong_self_convention)]
//noinspection RsSelfConvention
#[inline]
/// Convert a GroupId into a NonZeroU64
pub const fn into_non_zero_u64(&self) -> NonZeroU64 {
self.0
}
}
/// The pdata of a proc.
///
@ -25,6 +66,17 @@ pub(crate) struct ProcData {
/// In addition to the actual waker virtual table, it also contains pointers to several other
/// methods necessary for bookkeeping the heap-allocated proc.
pub(crate) vtable: &'static ProcVTable,
/// The span assigned to this process.
///
/// A lightproc has a tracing span associated that allow recording occurances of vtable calls
/// for this process.
pub(crate) span: Span,
/// Control group assigned to this process.
///
/// The control group links this process to its supervision tree
pub(crate) cgroup: Option<GroupId>,
}
impl ProcData {
@ -61,7 +113,7 @@ impl ProcData {
}
}
/// Notifies the proc blocked on the proc.
/// Notifies the proc blocked on this proc, if any.
///
/// If there is a registered waker, it will be removed from the pdata and woken.
#[inline]

View File

@ -6,6 +6,7 @@ use crate::state::*;
use std::fmt::{self, Debug, Formatter};
use std::future::Future;
use std::marker::{PhantomData, Unpin};
use std::mem::MaybeUninit;
use std::pin::Pin;
use std::ptr::NonNull;
use std::sync::atomic::Ordering;
@ -22,7 +23,9 @@ pub struct ProcHandle<R> {
pub(crate) raw_proc: NonNull<()>,
/// A marker capturing the generic type `R`.
pub(crate) result: PhantomData<R>,
// TODO: Instead of writing the future output to the RawProc on heap, put it in the handle
// (if still available).
pub(crate) marker: PhantomData<R>,
}
unsafe impl<R: Send> Send for ProcHandle<R> {}
@ -34,7 +37,7 @@ impl<R> ProcHandle<R> {
pub(crate) fn new(raw_proc: NonNull<()>) -> Self {
Self {
raw_proc,
result: PhantomData,
marker: PhantomData,
}
}
@ -48,6 +51,13 @@ impl<R> ProcHandle<R> {
let pdata = ptr as *const ProcData;
unsafe {
let id = (&(*pdata).span).id().map(|id| id.into_u64()).unwrap_or(0);
tracing::trace!(
target: "executor::handle",
op = "handle.cancel",
task.id = id,
);
let mut state = (*pdata).state.load(Ordering::Acquire);
loop {
@ -189,6 +199,14 @@ impl<R> Drop for ProcHandle<R> {
let mut output = None;
unsafe {
// Record dropping the handle for this task
let id = (&(*pdata).span).id().map(|id| id.into_u64()).unwrap_or(0);
tracing::trace!(
target: "executor::handle",
op = "handle.drop",
task.id = id,
);
// Optimistically assume the `ProcHandle` is being dropped just after creating the
// proc. This is a common case so if the handle is not used, the overhead of it is only
// one compare-exchange operation.

View File

@ -15,17 +15,30 @@ use std::pin::Pin;
use std::ptr::NonNull;
use std::sync::atomic::Ordering;
use crate::GroupId;
use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker};
use tracing::Span;
/// Raw pointers to the fields of a proc.
// TODO: Make generic over the Allocator used!
// TODO: The actual layout stored could be expressed as a struct w/ union. Maybe do that?
pub(crate) struct RawProc<'a, F, R, S> {
pub(crate) pdata: *const ProcData,
pub(crate) schedule: *const S,
pub(crate) future: *mut F,
// TODO: Replace with `*mut MaybeUninit`? And also, store the result in the handle if that's
// still available, instead of copying it to the heap.
pub(crate) output: *mut R,
// Make the lifetime 'a of the future invariant
_marker: PhantomData<&'a ()>,
// TODO: We should link a proc to a process group for scheduling and tracing
// - sub-tasks should start in the same group by default
// - that data needs to be available when calling `spawn` and to decide which task to run.
// So there must be a thread-local reference to it that's managed by the executor, and
// updated when a new task is being polled.
// Additionally `schedule` must have a reference to it to be able to push to the right
// queue? The `schedule` fn could just come from the group instead.
}
impl<'a, F, R, S> RawProc<'a, F, R, S>
@ -37,7 +50,12 @@ where
/// Allocates a proc with the given `future` and `schedule` function.
///
/// It is assumed there are initially only the `LightProc` reference and the `ProcHandle`.
pub(crate) fn allocate(future: F, schedule: S) -> NonNull<()> {
pub(crate) fn allocate(
future: F,
schedule: S,
span: Span,
cgroup: Option<GroupId>,
) -> NonNull<()> {
// Compute the layout of the proc for allocation. Abort if the computation fails.
let proc_layout = Self::proc_layout();
@ -70,6 +88,8 @@ where
destroy: Self::destroy,
tick: Self::tick,
},
span,
cgroup,
});
// Write the schedule function as the third field of the proc.
@ -128,6 +148,15 @@ where
/// Wakes a waker.
unsafe fn wake(ptr: *const ()) {
let raw = Self::from_ptr(ptr);
let id = (&(*raw.pdata).span)
.id()
.map(|id| id.into_u64())
.unwrap_or(0);
tracing::trace!(
target: "executor::waker",
op = "waker.wake",
task.id = id,
);
let mut state = (*raw.pdata).state.load(Ordering::Acquire);
@ -191,6 +220,15 @@ where
/// Wakes a waker by reference.
unsafe fn wake_by_ref(ptr: *const ()) {
let raw = Self::from_ptr(ptr);
let id = (&(*raw.pdata).span)
.id()
.map(|id| id.into_u64())
.unwrap_or(0);
tracing::trace!(
target: "executor::waker",
op = "waker.wake_by_ref",
task.id = id,
);
let mut state = (*raw.pdata).state.load(Ordering::Acquire);
@ -250,6 +288,16 @@ where
/// Clones a waker.
unsafe fn clone_waker(ptr: *const ()) -> RawWaker {
let raw = Self::from_ptr(ptr);
let id = (&(*raw.pdata).span)
.id()
.map(|id| id.into_u64())
.unwrap_or(0);
tracing::trace!(
target: "executor::waker",
op = "waker.clone",
task.id = id,
);
let raw_waker = &(*raw.pdata).vtable.raw_waker;
// Increment the reference count. With any kind of reference-counted data structure,
@ -271,6 +319,15 @@ where
#[inline]
unsafe fn decrement(ptr: *const ()) {
let raw = Self::from_ptr(ptr);
let id = (&(*raw.pdata).span)
.id()
.map(|id| id.into_u64())
.unwrap_or(0);
tracing::trace!(
target: "executor::waker",
op = "waker.drop",
task.id = id,
);
// Decrement the reference count.
let new = (*raw.pdata).state.fetch_sub(1, Ordering::AcqRel);
@ -310,10 +367,11 @@ where
raw.output as *const ()
}
/// Cleans up proc's resources and deallocates it.
/// Cleans up the procs resources and deallocates the associated memory.
///
/// If the proc has not been closed, then its future or the output will be dropped. The
/// schedule function gets dropped too.
/// The future or output stored will *not* be dropped, but its memory will be freed. Callers
/// must ensure that they are correctly dropped beforehand if either of those is still alive to
/// prevent use-after-free.
#[inline]
unsafe fn destroy(ptr: *const ()) {
let raw = Self::from_ptr(ptr);
@ -323,6 +381,9 @@ where
// Drop the schedule function.
(raw.schedule as *mut S).drop_in_place();
// Drop the proc data containing the associated Span
(raw.pdata as *mut ProcData).drop_in_place();
// Finally, deallocate the memory reserved by the proc.
alloc::dealloc(ptr as *mut u8, proc_layout.layout);
}
@ -332,9 +393,11 @@ where
/// Ticking will call `poll` once and re-schedule the task if it returns `Poll::Pending`. If
/// polling its future panics, the proc will be closed and the panic propagated into the caller.
unsafe fn tick(ptr: *const ()) {
let raw = Self::from_ptr(ptr);
let mut raw = Self::from_ptr(ptr);
// Enter the span associated with the process to track execution time if enabled.
let _guard = (&(*raw.pdata).span).enter();
// Create a context from the raw proc pointer and the vtable inside the its pdata.
// Create a context from the raw proc pointer and the vtable inside its pdata.
let waker = ManuallyDrop::new(Waker::from_raw(RawWaker::new(
ptr,
&(*raw.pdata).vtable.raw_waker,
@ -380,9 +443,9 @@ where
// Poll the inner future, but surround it with a guard that closes the proc in case polling
// panics.
let guard = Guard(raw);
let poll = <F as Future>::poll(Pin::new_unchecked(&mut *raw.future), cx);
mem::forget(guard);
let drop_guard = Guard(&mut raw);
let poll = <F as Future>::poll(drop_guard.pin_future(), cx);
drop_guard.disable();
match poll {
Poll::Ready(out) => {
@ -497,21 +560,43 @@ impl<'a, F, R, S> Clone for RawProc<'a, F, R, S> {
}
impl<'a, F, R, S> Copy for RawProc<'a, F, R, S> {}
#[repr(transparent)]
/// A guard that closes the proc if polling its future panics.
struct Guard<'a, F, R, S>(RawProc<'a, F, R, S>)
struct Guard<'guard, 'a, F, R, S>(&'guard mut RawProc<'a, F, R, S>)
where
F: Future<Output = R> + 'a,
R: 'a,
S: Fn(LightProc) + 'a;
impl<'a, F, R, S> Drop for Guard<'a, F, R, S>
impl<'guard, 'a, F, R, S> Guard<'guard, 'a, F, R, S>
where
F: Future<Output = R> + 'a,
R: 'a,
S: Fn(LightProc) + 'a,
{
#[inline(always)]
/// Disable the guard again.
///
/// This does essentially nothing but prevents the Drop implementation from being called
fn disable(self) {
// Put `self` in a ManuallyDrop telling the compiler to explicitly not call Drop::drop
let _ = ManuallyDrop::new(self);
}
#[inline(always)]
unsafe fn pin_future(&self) -> Pin<&mut F> {
Pin::new_unchecked(&mut *self.0.future)
}
}
impl<'a, F, R, S> Drop for Guard<'_, 'a, F, R, S>
where
F: Future<Output = R> + 'a,
R: 'a,
S: Fn(LightProc) + 'a,
{
fn drop(&mut self) {
let raw = self.0;
let raw = &self.0;
let ptr = raw.pdata as *const ();
unsafe {