Forked from
syslab / tapir
36 commits behind the upstream repository.
-
Naveen Kr. Sharma authoredcd3706f4
latency.cc 10.72 KiB
// -*- mode: c++; c-file-style: "k&r"; c-basic-offset: 4 -*-
/***********************************************************************
*
* latency.cc:
* latency profiling functions
*
* Copyright 2013-2015 Irene Zhang <iyzhang@cs.washington.edu>
* Naveen Kr. Sharma <naveenks@cs.washington.edu>
* Dan R. K. Ports <drkp@cs.washington.edu>
* Copyright 2009-2012 Massachusetts Institute of Technology
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************/
#include "latency.h"
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <iostream>
#include <fstream>
#include "lib/message.h"
#include "lib/latency-format.pb.h"
static struct Latency_t *latencyHead;
static void
LatencyInit(Latency_t *l, const char *name)
{
memset(l, 0, sizeof *l);
l->name = name;
for (int i = 0; i < LATENCY_DIST_POOL_SIZE; ++i) {
Latency_Dist_t *d = &l->distPool[i];
d->min = ~0ll;
}
}
void
_Latency_Init(Latency_t *l, const char *name)
{
LatencyInit(l, name);
l->next = latencyHead;
latencyHead = l;
}
static void
LatencyMaybeFlush(void)
{
static struct timespec lastFlush;
struct timespec now;
if (clock_gettime(CLOCK_MONOTONIC, &now) < 0)
PPanic("Failed to get CLOCK_MONOTONIC");
if (now.tv_sec != lastFlush.tv_sec) {
lastFlush = now;
Latency_Flush();
}
}
static inline Latency_Dist_t *
LatencyAddHist(Latency_t *l, char type, uint64_t val, uint32_t count)
{
if (!l->dists[(int)type]) {
if (l->distPoolNext == LATENCY_DIST_POOL_SIZE) {
Panic("Too many distributions; maybe increase "
"LATENCY_DIST_POOL_SIZE");
}
l->dists[(int)type] = &l->distPool[l->distPoolNext++];
l->dists[(int)type]->type = type;
}
Latency_Dist_t *d = l->dists[(int)type];
int bucket = 0;
val >>= 1;
while (val) {
val >>= 1;
++bucket;
}
Assert(bucket < LATENCY_NUM_BUCKETS);
d->buckets[bucket] += count;
return d;
}
static void
LatencyAdd(Latency_t *l, char type, uint64_t val)
{
Latency_Dist_t *d = LatencyAddHist(l, type, val, 1);
if (val < d->min)
d->min = val;
if (val > d->max)
d->max = val;
d->total += val;
++d->count;
}
static void
LatencyMap(void (*f)(Latency_t *))
{
Latency_t *l = latencyHead;
for (; l; l = l->next)
f(l);
}
void
Latency_StartRec(Latency_t *l, Latency_Frame_t *fr)
{
fr->accum = 0;
fr->parent = l->bottom;
l->bottom = fr;
Latency_Resume(l);
}
void
Latency_EndRecType(Latency_t *l, Latency_Frame_t *fr, char type)
{
Latency_Pause(l);
Assert(l->bottom == fr);
l->bottom = fr->parent;
LatencyAdd(l, type, fr->accum);
LatencyMaybeFlush();
}
void
Latency_Pause(Latency_t *l)
{
struct timespec end;
if (clock_gettime(CLOCK_MONOTONIC, &end) < 0)
PPanic("Failed to get CLOCK_MONOTONIC");
Latency_Frame_t *fr = l->bottom;
uint64_t delta;
delta = end.tv_sec - fr->start.tv_sec;
delta *= 1000000000ll;
if (end.tv_nsec < fr->start.tv_nsec) {
delta -= 1000000000ll;
delta += (end.tv_nsec + 1000000000ll) - fr->start.tv_nsec;
} else {
delta += end.tv_nsec - fr->start.tv_nsec;
}
fr->accum += delta;
}
void
Latency_Resume(Latency_t *l)
{
if (clock_gettime(CLOCK_MONOTONIC, &l->bottom->start) < 0)
PPanic("Failed to get CLOCK_MONOTONIC");
}
void
Latency_Sum(Latency_t *dest, Latency_t *summand)
{
for (int i = 0; i < summand->distPoolNext; ++i) {
Latency_Dist_t *d = &summand->distPool[i];
for (int b = 0; b < LATENCY_NUM_BUCKETS; ++b) {
if (d->buckets[b] == 0)
continue;
LatencyAddHist(dest, d->type, 1ll<<b, d->buckets[b]);
}
}
for (int i = 0; i < LATENCY_MAX_DIST; ++i) {
Latency_Dist_t *dd = dest->dists[i];
Latency_Dist_t *ds = summand->dists[i];
if (!ds)
continue;
if (ds->min < dd->min)
dd->min = ds->min;
if (ds->max > dd->max)
dd->max = ds->max;
dd->total += ds->total;
dd->count += ds->count;
}
}
static char *
LatencyFmtNS(uint64_t ns, char *buf)
{
static const char *units[] = {"ns", "us", "ms", "s"};
unsigned int unit = 0;
while (ns >= 10000 && unit < (sizeof units / sizeof units[0]) - 1) {
ns /= 1000;
++unit;
}
sprintf(buf, "%" PRIu64 " %s", ns, units[unit]);
return buf;
}
void
Latency_Dump(Latency_t *l)
{
if (l->distPoolNext == 0) {
// No distributions yet
return;
}
char buf[5][64];
// Keep track of the index of the first used distribution, and
// for each other used distribution, the index of the next
// used distribution. This way we only have to make one scan
// over all the distributions and the rest of our scans
// (especially when printing the histograms) are fast.
int firstType = -1;
int nextTypes[LATENCY_MAX_DIST];
int *ppnext = &firstType;
for (int type = 0; type < LATENCY_MAX_DIST; ++type) {
Latency_Dist_t *d = l->dists[type];
if (!d)
continue;
*ppnext = type;
ppnext = &nextTypes[type];
// Find the median
uint64_t accum = 0;
int medianBucket;
for (medianBucket = 0; medianBucket < LATENCY_NUM_BUCKETS;
++medianBucket) {
accum += d->buckets[medianBucket];
if (accum >= d->count / 2)
break;
}
char extra[3] = {'/', (char)type, 0};
if (type == '=')
extra[0] = '\0';
QNotice("LATENCY %s%s: %s %s/%s %s (%" PRIu64 " samples, %s total)",
l->name, extra, LatencyFmtNS(d->min, buf[0]),
LatencyFmtNS(d->total / d->count, buf[1]),
LatencyFmtNS((uint64_t)1 << medianBucket, buf[2]),
LatencyFmtNS(d->max, buf[3]), d->count,
LatencyFmtNS(d->total, buf[4]));
}
*ppnext = -1;
// Find the count of the largest bucket so we can scale the
// histogram
uint64_t largestCount = LATENCY_HISTOGRAM_WIDTH;
for (int i = 0; i < LATENCY_NUM_BUCKETS; ++i) {
uint64_t total = 0;
for (int dist = 0; dist < l->distPoolNext; ++dist) {
Latency_Dist_t *d = &l->distPool[dist];
total += d->buckets[i];
}
if (total > largestCount)
largestCount = total;
}
// Display the histogram
int lastPrinted = LATENCY_NUM_BUCKETS;
for (int i = 0; i < LATENCY_NUM_BUCKETS; ++i) {
char bar[LATENCY_HISTOGRAM_WIDTH + 1];
int pos = 0;
uint64_t total = 0;
for (int type = firstType; type != -1; type = nextTypes[type]) {
Latency_Dist_t *d = l->dists[type];
if (!d)
continue;
total += d->buckets[i];
int goal = ((total * LATENCY_HISTOGRAM_WIDTH)
/ largestCount);
for (; pos < goal; ++pos)
bar[pos] = type;
}
if (total > 0) {
bar[pos] = '\0';
if (lastPrinted < i - 3) {
QNotice("%10s |", "...");
} else {
for (++lastPrinted; lastPrinted < i;
++lastPrinted)
QNotice("%10s | %10ld |",
LatencyFmtNS((uint64_t)1 << lastPrinted,
buf[0]), 0L);
}
QNotice("%10s | %10ld | %s",
LatencyFmtNS((uint64_t)1 << i, buf[0]),
total,
bar);
lastPrinted = i;
}
}
}
void
Latency_DumpAll(void)
{
LatencyMap(Latency_Dump);
}
void
Latency_FlushTo(const char *fname)
{
std::ofstream outfile(fname);
Latency_t *l = latencyHead;
::transport::latency::format::LatencyFile out;
for (; l; l = l->next) {
::transport::latency::format::Latency lout;
Latency_Put(l, lout);
*(out.add_latencies()) = lout;
}
if (!out.SerializeToOstream(&outfile)) {
Panic("Failed to write latency stats to file");
}
}
void
Latency_Flush(void)
{
if (access("/tmp/stats/", R_OK) < 0) {
mkdir("/tmp/stats", 0777);
chmod("/tmp/stats", 0777);
}
char fname[128];
snprintf(fname, sizeof fname, "/tmp/stats/%d-l", getpid());
Latency_FlushTo(fname);
}
void
Latency_Put(Latency_t *l, ::transport::latency::format::Latency &out)
{
out.Clear();
out.set_name(l->name);
for (int i = 0; i < l->distPoolNext; ++i) {
Latency_Dist_t *d = &l->distPool[i];
::transport::latency::format::LatencyDist *outd = out.add_dists();
outd->set_type(d->type);
outd->set_min(d->min);
outd->set_max(d->max);
outd->set_total(d->total);
outd->set_count(d->count);
for (int b = 0; b < LATENCY_NUM_BUCKETS; ++b) {
outd->add_buckets(d->buckets[b]);
}
}
}
bool
Latency_TryGet(const ::transport::latency::format::Latency &in, Latency_t *l)
{
LatencyInit(l, strdup(in.name().c_str())); // XXX Memory leak
l->distPoolNext = in.dists_size();
for (int i = 0; i < l->distPoolNext; ++i) {
const ::transport::latency::format::LatencyDist &ind =
in.dists(i);
Latency_Dist_t *d = &l->distPool[i];
d->type = ind.type();
l->dists[(int)d->type] = d;
d->min = ind.min();
d->max = ind.max();
d->total = ind.total();
d->count = ind.count();
for (int b = 0; b < LATENCY_NUM_BUCKETS; ++b)
d->buckets[b] = ind.buckets(b);
}
return true;
}