Skip to content
Snippets Groups Projects
Commit 2c5daad0 authored by Russell Sears's avatar Russell Sears
Browse files

add ability to specify field length distributions

parent e204c050
No related branches found
No related tags found
No related merge requests found
/**
* Copyright (c) 2010 Yahoo! Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License. See accompanying
* LICENSE file.
*/
package com.yahoo.ycsb.generator;
/**
* A trivial integer generator that always returns the same value.
*
* @author sears
*
*/
public class ConstantIntegerGenerator extends IntegerGenerator {
private final int i;
/**
* @param i The integer that this generator will always return.
*/
public ConstantIntegerGenerator(int i) {
this.i = i;
}
@Override
public int nextInt() {
return i;
}
@Override
public double mean() {
return i;
}
}
......@@ -45,5 +45,8 @@ public class CounterGenerator extends IntegerGenerator
return lastint;
}
@Override
public double mean() {
throw new UnsupportedOperationException("Can't compute mean of non-stationary distribution!");
}
}
/**
* Copyright (c) 2010 Yahoo! Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License. See accompanying
* LICENSE file.
*/
package com.yahoo.ycsb.generator;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import com.yahoo.ycsb.generator.IntegerGenerator;
/**
* Generate integers according to a histogram distribution. The histogram
* buckets are of width one, but the values are multiplied by a block size.
* Therefore, instead of drawing sizes uniformly at random within each
* bucket, we always draw the largest value in the current bucket, so the value
* drawn is always a multiple of block_size.
*
* The minimum value this distribution returns is block_size (not zero).
*
* Modified Nov 19 2010 by sears
*
* @author snjones
*
*/
public class HistogramGenerator extends IntegerGenerator {
long block_size;
long[] buckets;
long area;
Random rand = new Random();
long weighted_area = 0;
double mean_size = 0;
public HistogramGenerator(String histogramfile) throws IOException {
BufferedReader in = new BufferedReader(new FileReader(histogramfile));
String str;
String[] line;
ArrayList<Integer> a = new ArrayList<Integer>();
str = in.readLine();
if(str == null) {
throw new IOException("Empty input file!\n");
}
line = str.split("\t");
if(line[0].compareTo("BlockSize") != 0) {
throw new IOException("First line of histogram is not the BlockSize!\n");
}
block_size = Integer.parseInt(line[1]);
while((str = in.readLine()) != null){
// [0] is the bucket, [1] is the value
line = str.split("\t");
a.add(Integer.parseInt(line[0]), Integer.parseInt(line[1]));
}
buckets = new long[a.size()];
for(int i = 0; i < a.size(); i++) {
buckets[i] = a.get(i);
}
in.close();
init();
}
public HistogramGenerator(long[] buckets, int block_size) {
this.block_size = block_size;
this.buckets = buckets;
init();
}
private void init() {
for(int i = 0; i < buckets.length; i++) {
area += buckets[i];
weighted_area = i * buckets[i];
}
// calculate average file size
mean_size = ((double)block_size) * ((double)weighted_area) / (double)(area);
}
@Override
public int nextInt() {
int number = rand.nextInt((int)area);
int i;
for(i = 0; i < (buckets.length - 1); i++){
number -= buckets[i];
if(number <= 0){
return (int)((i+1)*block_size);
}
}
return (int)(i * block_size);
}
@Override
public double mean() {
return mean_size;
}
}
......@@ -67,4 +67,8 @@ public abstract class IntegerGenerator extends Generator
{
return lastint;
}
/**
* Return the expected value (mean) of the values this generator will return.
*/
public abstract double mean();
}
......@@ -126,4 +126,12 @@ public class ScrambledZipfianGenerator extends IntegerGenerator
System.out.println(""+gen.nextInt());
}
}
/**
* since the values are scrambled (hopefully uniformly), the mean is simply the middle of the range.
*/
@Override
public double mean() {
return ((double)(((long)_min) +(long)_max))/2.0;
}
}
......@@ -53,4 +53,9 @@ public class SkewedLatestGenerator extends IntegerGenerator
}
@Override
public double mean() {
throw new UnsupportedOperationException("Can't compute mean of non-stationary distribution!");
}
}
......@@ -50,4 +50,8 @@ public class UniformIntegerGenerator extends IntegerGenerator
return ret;
}
@Override
public double mean() {
return ((double)((long)(_lb + (long)_ub))) / 2.0;
}
}
......@@ -316,4 +316,12 @@ public class ZipfianGenerator extends IntegerGenerator
{
new ZipfianGenerator(ScrambledZipfianGenerator.ITEM_COUNT);
}
/**
* @todo Implement ZipfianGenerator.mean()
*/
@Override
public double mean() {
throw new UnsupportedOperationException("@todo implement ZipfianGenerator.mean()");
}
}
......@@ -22,7 +22,9 @@ import com.yahoo.ycsb.*;
import com.yahoo.ycsb.generator.CounterGenerator;
import com.yahoo.ycsb.generator.DiscreteGenerator;
import com.yahoo.ycsb.generator.Generator;
import com.yahoo.ycsb.generator.ConstantIntegerGenerator;
import com.yahoo.ycsb.generator.HotspotIntegerGenerator;
import com.yahoo.ycsb.generator.HistogramGenerator;
import com.yahoo.ycsb.generator.IntegerGenerator;
import com.yahoo.ycsb.generator.ScrambledZipfianGenerator;
import com.yahoo.ycsb.generator.SkewedLatestGenerator;
......@@ -30,6 +32,7 @@ import com.yahoo.ycsb.generator.UniformIntegerGenerator;
import com.yahoo.ycsb.generator.ZipfianGenerator;
import com.yahoo.ycsb.measurements.Measurements;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Vector;
......@@ -84,18 +87,41 @@ public class CoreWorkload extends Workload
int fieldcount;
/**
* The name of the property for the field length distribution. Options are "uniform", "zipfian" (favoring short records), "constant", and "histogram".
*
* If "uniform", "zipfian" or "constant", the maximum field length will be that specified by the fieldlength property. If "histogram", then the
* histogram will be read from the filename specified in the "fieldlengthhistogram" property.
*/
public static final String FIELD_LENGTH_DISTRIBUTION_PROPERTY="fieldlengthdistribution";
/**
* The default field length distribution.
*/
public static final String FIELD_LENGTH_DISTRIBUTION_PROPERTY_DEFAULT = "constant";
/**
* The name of the property for the length of a field in bytes.
*/
public static final String FIELD_LENGTH_PROPERTY="fieldlength";
/**
* The default length of a field in bytes.
* The default maximum length of a field in bytes.
*/
public static final String FIELD_LENGTH_PROPERTY_DEFAULT="100";
int fieldlength;
/**
* The name of a property that specifies the filename containing the field length histogram (only used if fieldlengthdistribution is "histogram").
*/
public static final String FIELD_LENGTH_HISTOGRAM_FILE_PROPERTY = "fieldlengthhistogram";
/**
* The default filename containing a field length histogram.
*/
public static final String FIELD_LENGTH_HISTOGRAM_FILE_PROPERTY_DEFAULT = "hist.txt";
/**
* Generator object that produces field lengths. The value of this depends on the properties that start with "FIELD_LENGTH_".
*/
IntegerGenerator fieldlengthgenerator;
/**
* The name of the property for deciding whether to read one field (false) or all fields (true) of a record.
*/
......@@ -247,6 +273,29 @@ public class CoreWorkload extends Workload
int recordcount;
protected static IntegerGenerator getFieldLengthGenerator(Properties p) throws WorkloadException{
IntegerGenerator fieldlengthgenerator;
String fieldlengthdistribution = p.getProperty(FIELD_LENGTH_DISTRIBUTION_PROPERTY, FIELD_LENGTH_DISTRIBUTION_PROPERTY_DEFAULT);
int fieldlength=Integer.parseInt(p.getProperty(FIELD_LENGTH_PROPERTY,FIELD_LENGTH_PROPERTY_DEFAULT));
String fieldlengthhistogram = p.getProperty(FIELD_LENGTH_HISTOGRAM_FILE_PROPERTY, FIELD_LENGTH_HISTOGRAM_FILE_PROPERTY_DEFAULT);
if(fieldlengthdistribution.compareTo("constant") == 0) {
fieldlengthgenerator = new ConstantIntegerGenerator(fieldlength);
} else if(fieldlengthdistribution.compareTo("uniform") == 0) {
fieldlengthgenerator = new UniformIntegerGenerator(1, fieldlength);
} else if(fieldlengthdistribution.compareTo("zipfian") == 0) {
fieldlengthgenerator = new ZipfianGenerator(1, fieldlength);
} else if(fieldlengthdistribution.compareTo("histogram") == 0) {
try {
fieldlengthgenerator = new HistogramGenerator(fieldlengthhistogram);
} catch(IOException e) {
throw new WorkloadException("Couldn't read field length histogram file: "+fieldlengthhistogram, e);
}
} else {
throw new WorkloadException("Unknown field length distribution \""+fieldlengthdistribution+"\"");
}
return fieldlengthgenerator;
}
/**
* Initialize the scenario.
* Called once, in the main client thread, before any operations are started.
......@@ -254,8 +303,10 @@ public class CoreWorkload extends Workload
public void init(Properties p) throws WorkloadException
{
table = p.getProperty(TABLENAME_PROPERTY,TABLENAME_PROPERTY_DEFAULT);
fieldcount=Integer.parseInt(p.getProperty(FIELD_COUNT_PROPERTY,FIELD_COUNT_PROPERTY_DEFAULT));
fieldlength=Integer.parseInt(p.getProperty(FIELD_LENGTH_PROPERTY,FIELD_LENGTH_PROPERTY_DEFAULT));
int fieldcount=Integer.parseInt(p.getProperty(FIELD_COUNT_PROPERTY,FIELD_COUNT_PROPERTY_DEFAULT));
fieldlengthgenerator = CoreWorkload.getFieldLengthGenerator(p);
double readproportion=Double.parseDouble(p.getProperty(READ_PROPORTION_PROPERTY,READ_PROPORTION_PROPERTY_DEFAULT));
double updateproportion=Double.parseDouble(p.getProperty(UPDATE_PROPORTION_PROPERTY,UPDATE_PROPORTION_PROPERTY_DEFAULT));
double insertproportion=Double.parseDouble(p.getProperty(INSERT_PROPORTION_PROPERTY,INSERT_PROPORTION_PROPERTY_DEFAULT));
......@@ -341,7 +392,7 @@ public class CoreWorkload extends Workload
}
else
{
throw new WorkloadException("Unknown distribution \""+requestdistrib+"\"");
throw new WorkloadException("Unknown request distribution \""+requestdistrib+"\"");
}
fieldchooser=new UniformIntegerGenerator(0,fieldcount-1);
......@@ -378,7 +429,7 @@ public class CoreWorkload extends Workload
for (int i=0; i<fieldcount; i++)
{
String fieldkey="field"+i;
String data=Utils.ASCIIString(fieldlength);
String data=Utils.ASCIIString(fieldlengthgenerator.nextInt());
values.put(fieldkey,data);
}
if (db.insert(table,dbkey,values) == 0)
......@@ -486,7 +537,7 @@ public class CoreWorkload extends Workload
for (int i=0; i<fieldcount; i++)
{
String fieldname="field"+i;
String data=Utils.ASCIIString(fieldlength);
String data=Utils.ASCIIString(fieldlengthgenerator.nextInt());
values.put(fieldname,data);
}
}
......@@ -494,7 +545,7 @@ public class CoreWorkload extends Workload
{
//update a random field
String fieldname="field"+fieldchooser.nextString();
String data=Utils.ASCIIString(fieldlength);
String data=Utils.ASCIIString(fieldlengthgenerator.nextInt());
values.put(fieldname,data);
}
......@@ -568,7 +619,7 @@ public class CoreWorkload extends Workload
for (int i=0; i<fieldcount; i++)
{
String fieldname="field"+i;
String data=Utils.ASCIIString(fieldlength);
String data=Utils.ASCIIString(fieldlengthgenerator.nextInt());
values.put(fieldname,data);
}
}
......@@ -576,7 +627,7 @@ public class CoreWorkload extends Workload
{
//update a random field
String fieldname="field"+fieldchooser.nextString();
String data=Utils.ASCIIString(fieldlength);
String data=Utils.ASCIIString(fieldlengthgenerator.nextInt());
values.put(fieldname,data);
}
......@@ -597,7 +648,7 @@ public class CoreWorkload extends Workload
for (int i=0; i<fieldcount; i++)
{
String fieldkey="field"+i;
String data=Utils.ASCIIString(fieldlength);
String data=Utils.ASCIIString(fieldlengthgenerator.nextInt());
values.put(fieldkey,data);
}
db.insert(table,dbkey,values);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment