From 855301bf26c29828bbdd90d957c90faa80d0b3be Mon Sep 17 00:00:00 2001 From: Stanley Feng <stfeng@google.com> Date: Sat, 31 Oct 2015 11:35:00 -0700 Subject: [PATCH] Add a new and optional measurement type called "RAW" which outputs every datapoint of a run. Raw datapoint output is quite useful and important for two reasons: 1) Allows the user to do more thorough statistical analysis on the complete set of the data. 2) Using timestamps that are logged with raw data points, it becomes much easier to investigate and correlate latency outliers at the backend service. --- .../yahoo/ycsb/measurements/Measurements.java | 40 +++- .../ycsb/measurements/OneMeasurementRaw.java | 192 ++++++++++++++++++ workloads/workload_template | 17 ++ 3 files changed, 238 insertions(+), 11 deletions(-) create mode 100644 core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java diff --git a/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java b/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java index e9137584..6c84f80a 100644 --- a/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java +++ b/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java @@ -29,8 +29,18 @@ import com.yahoo.ycsb.measurements.exporter.MeasurementsExporter; * @author cooperb * */ -public class Measurements -{ +public class Measurements { + /** + * All supported measurement types are defined in this enum. + * + */ + public enum MeasurementType { + HISTOGRAM, + HDRHISTOGRAM, + HDRHISTOGRAM_AND_HISTOGRAM, + TIMESERIES, + RAW + } public static final String MEASUREMENT_TYPE_PROPERTY = "measurementtype"; private static final String MEASUREMENT_TYPE_PROPERTY_DEFAULT = "hdrhistogram"; @@ -60,7 +70,7 @@ public class Measurements final ConcurrentHashMap<String,OneMeasurement> _opToMesurementMap; final ConcurrentHashMap<String,OneMeasurement> _opToIntendedMesurementMap; - final int _measurementType; + final MeasurementType _measurementType; final int _measurementInterval; private Properties _props; @@ -77,19 +87,23 @@ public class Measurements String mTypeString = _props.getProperty(MEASUREMENT_TYPE_PROPERTY, MEASUREMENT_TYPE_PROPERTY_DEFAULT); if (mTypeString.equals("histogram")) { - _measurementType = 0; + _measurementType = MeasurementType.HISTOGRAM; } else if (mTypeString.equals("hdrhistogram")) { - _measurementType = 1; + _measurementType = MeasurementType.HDRHISTOGRAM; } else if (mTypeString.equals("hdrhistogram+histogram")) { - _measurementType = 2; + _measurementType = MeasurementType.HDRHISTOGRAM_AND_HISTOGRAM; } else if (mTypeString.equals("timeseries")) { - _measurementType = 3; + _measurementType = MeasurementType.TIMESERIES; + } + else if (mTypeString.equals("raw")) + { + _measurementType = MeasurementType.RAW; } else { throw new IllegalArgumentException("unknown "+MEASUREMENT_TYPE_PROPERTY+"="+mTypeString); @@ -117,16 +131,20 @@ public class Measurements { switch (_measurementType) { - case 0: + case HISTOGRAM: return new OneMeasurementHistogram(name, _props); - case 1: + case HDRHISTOGRAM: return new OneMeasurementHdrHistogram(name, _props); - case 2: + case HDRHISTOGRAM_AND_HISTOGRAM: return new TwoInOneMeasurement(name, new OneMeasurementHdrHistogram("Hdr"+name, _props), new OneMeasurementHistogram("Bucket"+name, _props)); - default: + case TIMESERIES: return new OneMeasurementTimeSeries(name, _props); + case RAW: + return new OneMeasurementRaw(name, _props); + default: + throw new AssertionError("Impossible to be here. Dead code reached. Bugs?"); } } diff --git a/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java b/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java new file mode 100644 index 00000000..b401b42f --- /dev/null +++ b/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java @@ -0,0 +1,192 @@ +/** + * Copyright (c) 2015 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package com.yahoo.ycsb.measurements; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.Properties; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import com.yahoo.ycsb.measurements.exporter.MeasurementsExporter; + +/** + * Record a series of measurements as raw data points without down sampling, + * optionally write to an output file when configured. + * + * @author stfeng + * + */ +public class OneMeasurementRaw extends OneMeasurement { + /** + * One raw data point, two fields: timestamp (ms) when the datapoint is + * inserted, and the value. + */ + class RawDataPoint { + private final long timestamp; + private final int value; + + public RawDataPoint(int value) { + this.timestamp = System.currentTimeMillis(); + this.value = value; + } + + public long timeStamp() { + return timestamp; + } + + public int value() { + return value; + } + } + + class RawDataPointComparator implements Comparator<RawDataPoint> { + @Override + public int compare(RawDataPoint p1, RawDataPoint p2){ + if (p1.value() < p2.value()){ + return -1; + } else if (p1.value() == p2.value()) { + return 0; + } else { + return 1; + } + } + } + + /** + * Optionally, user can configure an output file to save the raw data points. + * Default is none, raw results will be written to stdout. + * + */ + public static final String OUTPUT_FILE_PATH = "measurement.raw.output_file"; + public static final String OUTPUT_FILE_PATH_DEFAULT = ""; + private String outputFilePath = ""; + private final PrintStream outputStream; + + private ArrayList<RawDataPoint> measurements; + private long totalLatency = 0; + + // A window of stats to print summary for at the next getSummary() call. + // It's supposed to be a one line summary, so we will just print count and + // average. + private int windowOperations = 0; + private long windowTotalLatency = 0; + + public OneMeasurementRaw(String name, Properties props) { + super(name); + + outputFilePath = props.getProperty(OUTPUT_FILE_PATH, + OUTPUT_FILE_PATH_DEFAULT); + if (!outputFilePath.isEmpty()) { + System.out.println("Raw data measurement: will output to result file: " + + outputFilePath); + + try { + outputStream = new PrintStream( + new FileOutputStream(outputFilePath, true), + true); + } catch (FileNotFoundException e) { + throw new RuntimeException("Failed to open raw data output file", e); + } + + } else{ + System.out.println("Raw data measurement: will output to stdout."); + outputStream = System.out; + + } + measurements = new ArrayList<RawDataPoint>(1000); + } + + @Override + public synchronized void measure(int latency) { + totalLatency += latency; + windowTotalLatency += latency; + windowOperations++; + + measurements.add(new RawDataPoint(latency)); + } + + @Override + public void exportMeasurements(MeasurementsExporter exporter) + throws IOException { + // Output raw data points first then print out a summary of percentiles to + // stdout. + + outputStream.println(getName() + + " latency raw data: op, timestamp(ms), latency(us)"); + for (RawDataPoint point : measurements) { + outputStream.println( + String.format("%s,%d,%d", getName(), point.timeStamp(), + point.value())); + } + if (outputStream != System.out) { + outputStream.close(); + } + + int totalOps = measurements.size(); + exporter.write(getName(), "Total Operations", totalOps); + if (totalOps > 0) { + exporter.write(getName(), + "Below is a summary of latency in microseconds:", -1); + exporter.write(getName(), "Average", + (double)totalLatency / (double)totalOps); + + Collections.sort(measurements, new RawDataPointComparator()); + + exporter.write(getName(), "Min", measurements.get(0).value()); + exporter.write( + getName(), "Max", measurements.get(totalOps - 1).value()); + exporter.write( + getName(), "p1", measurements.get((int)(totalOps*0.01)).value()); + exporter.write( + getName(), "p5", measurements.get((int)(totalOps*0.05)).value()); + exporter.write( + getName(), "p50", measurements.get((int)(totalOps*0.5)).value()); + exporter.write( + getName(), "p90", measurements.get((int)(totalOps*0.9)).value()); + exporter.write( + getName(), "p95", measurements.get((int)(totalOps*0.95)).value()); + exporter.write( + getName(), "p99", measurements.get((int)(totalOps*0.99)).value()); + exporter.write(getName(), "p99.9", + measurements.get((int)(totalOps*0.999)).value()); + exporter.write(getName(), "p99.99", + measurements.get((int)(totalOps*0.9999)).value()); + } + + exportReturnCodes(exporter); + } + + @Override + public synchronized String getSummary() { + if (windowOperations == 0) { + return ""; + } + + String toReturn = String.format("%s count: %d, average latency(us): %.2f", + getName(), windowOperations, + (double)windowTotalLatency / (double)windowOperations); + + windowTotalLatency=0; + windowOperations=0; + + return toReturn; + } +} diff --git a/workloads/workload_template b/workloads/workload_template index e59bace1..f95d1d48 100644 --- a/workloads/workload_template +++ b/workloads/workload_template @@ -115,6 +115,23 @@ table=usertable # How the latency measurements are presented measurementtype=histogram #measurementtype=timeseries +#measurementtype=raw +# When measurementtype is set to raw, measurements will be output +# as RAW datapoints in the following csv format: +# "operation, timestamp of the measurement, latency in us" +# +# Raw datapoints are collected in-memory while the test is running. Each +# data point consumes about 20 bytes (including java object overhead). +# For a typical run of 1 million to 10 million operations, this should +# easily fit into memory. If you plan to do a run with 100s of millions of +# operations, consider increasing your jvm heap size before you enable the +# RAW measurement type, or split the run into multiple runs. +# +# Optionally, you can specify an output file to save raw datapoints. +# Otherwise, raw datapoints will be written to stdout. +# The output file will be appended to if it already exists, otherwise +# a new output file will be created. +#measurement.raw.output_file = /tmp/your_output_file_for_this_run # The range of latencies to track in the histogram (milliseconds) histogram.buckets=1000 -- GitLab