Saturday, March 8, 2008

Java collections perf redux

I finally got some time to run my microbenchmark for various Java collection objects.

Here's the source:

package com.solyanik.perftesting;

import java.util.Arrays;
import java.util.Vector;
import java.util.ArrayList;

/**
* Implements a micro-benchmark using
* high-resolution RDTSC timer.
*
* @author sergey@solyanik.com (Sergey Solyanik)
*/
public class MicroBenchmark {

/*
* RDTSC - returns number of clocks since
* the core was last reset.
*
* NOTE: no synchronizing instruction
* is executed before RDTSC, so this
* is not suitable for really short
* runs as the second RDTSC can execute
* before the instruction we're trying
* to measure.
*
* NOTE2: Different cores have
* different clock counters, and they
* are not synchronized.
*/
private native static long rdtsc();

/**
* A function that consists of one
* instruction - ret.
*/
private native static void nop();

/**
* Fixes the thread to the first CPU
* and core.
*/
private native static void fixthread();

/**
* Loads the library containing our
* native code.
*/
static {
System.loadLibrary("jnipc");
}

/**
* Prints the statistics for an array
* of clock intervals.
*
* @param clocks The array of clocks
* @param iterations Number of iterations
* @param bucket_number How many buckets
* to use for the histogram
* @param scale Scale for the histogram
*/
private static void Analyze(
String header, long [] clocks,
int iterations, int bucket_number,
int scale) {
Arrays.sort(clocks);
double average = 0.0;

long [] buckets = new long[bucket_number];

double log2 = Math.log(2.0);

for (int i = 0 ; i < clocks.length; ++i) {
average += clocks[i];
int ndx = (int)(Math.log(
(double)clocks[i]
/ (double)scale) / log2);
if (ndx < 0)
buckets[0]++;
else if (ndx < buckets.length - 1)
buckets[ndx + 1]++;
else
buckets[buckets.length - 1]++;
}

average /= clocks.length;

System.out.println(header + ":");
System.out.println("Median for " +
iterations + " iterations: " +
clocks[clocks.length / 2]);
System.out.println("Average for " +
iterations + " iterations: " +
average);
System.out.println("Median per iteration: "
+ clocks[clocks.length / 2]
/ iterations);
System.out.println("Average per iteration: "
+ average / iterations);

System.out.println("Histogram:");
int prev = 0;
int curr = scale;
for (int i = 0; i < buckets.length - 1; ++i) {
System.out.println(prev + " - " + curr
+ ": " + buckets[i]);
prev = curr;
curr *= 2;
}

System.out.println(" > " + prev + ": " +
buckets[buckets.length - 1]);
}

/**
* Number of loop iterations per sample.
*/
private static final int ITERATIONS = 10000;

/**
* Number of samples.
*/
private static final int SAMPLES = 1000;

/**
* Number of buckets for the histogram.
*/
private static final int BUCKETS = 20;

/**
* Bucket scale for the histogram.
*/
private static final int BUCKET_SCALE = 1000;

/**
* Main function.
*
* @param args
*/
public static void main(String[] args) {
fixthread();
System.out.println("Let the fun begin!");

long [] clocks = new long[SAMPLES];

Boolean b = new Boolean(true);

for (int i = 0; i < clocks.length; ++i) {
Boolean [] a = new Boolean[ITERATIONS];
long start = rdtsc();
for (int j = 0; j < ITERATIONS; ++j)
a[j] = b;
long stop = rdtsc();
clocks[i] = stop - start;
}

Analyze("Array",
clocks, ITERATIONS, BUCKETS,
BUCKET_SCALE);

for (int i = 0; i < clocks.length; ++i) {
Vector<Object> v = new Vector<Object>();

long start = rdtsc();
for (int j = 0; j < ITERATIONS; ++j)
v.add(b);
long stop = rdtsc();
clocks[i] = stop - start;
}

Analyze("Expanding Vector", clocks,
ITERATIONS, BUCKETS, BUCKET_SCALE);

for (int i = 0; i < clocks.length; ++i) {
Vector<Object> v = new Vector<Object>();
v.setSize(ITERATIONS);
long start = rdtsc();
for (int j = 0; j < ITERATIONS; ++j)
v.set(j, b);
long stop = rdtsc();
clocks[i] = stop - start;
}

Analyze("Pre-allocated Vector",
clocks, ITERATIONS, BUCKETS,
BUCKET_SCALE);

for (int i = 0; i < clocks.length; ++i) {
ArrayList<Object> al = new ArrayList<Object>();
long start = rdtsc();
for (int j = 0; j < ITERATIONS; ++j)
al.add(b);
long stop = rdtsc();
clocks[i] = stop - start;
}

Analyze("Expanding ArrayList", clocks,
ITERATIONS, BUCKETS, BUCKET_SCALE);

for (int i = 0; i < clocks.length; ++i) {
ArrayList<Object> al = new ArrayList<Object>();
al.ensureCapacity(ITERATIONS);
long start = rdtsc();
for (int j = 0; j < ITERATIONS; ++j)
al.add(b);
long stop = rdtsc();
clocks[i] = stop - start;
}

Analyze("Pre-allocated ArrayList", clocks,
ITERATIONS, BUCKETS, BUCKET_SCALE);
}
}

And here's what I've got:

Median Average
Array 14 15
Expanding vector 124 133
Pre-allocated vector 115 119
Expanding ArrayList 57 66
Pre-allocated ArrayList 43 46
To remind, C++ numbers were like this:

Median Average
Array 2 2
Pre-allocated vector,
assigment 2 2
Pre-allocated vector,
push_back 8 8
Expanding vector,
push_back 145 148 (*)

(*) Obviously, this number very much depends on number of expansions, because STL vector grows by 50%, a very big vectors will have smaller cost. See my previous posts for details on C++ testing.

No comments: