Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added serialization #162

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ log
.classpath
.project
.settings/
**/.factorypath

# Latex and R related files
_region_.prv/
Expand Down
6 changes: 3 additions & 3 deletions benchmark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<compilerVersion>1.7</compilerVersion>
<source>1.7</source>
<target>1.7</target>
<compilerVersion>${java.version}</compilerVersion>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>

Expand Down
4 changes: 2 additions & 2 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@
<configuration>
<verbose>true</verbose>
<compilerVersion>1.8</compilerVersion>
<source>1.7</source>
<target>1.7</target>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.tdunning.math.stats;

import java.util.Properties;

public abstract class AbstractTDigestSerializer<T extends TDigest, O extends Object> implements TDigestSerializer<T, O> {
private Properties properties;

public AbstractTDigestSerializer(Properties properties) {
this.properties = properties;
}

public Properties getProperties() {
return properties;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.tdunning.math.stats;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Properties;

public class TDigestDefaultSerializer extends AbstractTDigestSerializer<TDigest, byte[]> {

public TDigestDefaultSerializer(Properties properties) {
super(properties);
}

@Override
public byte[] serialize(TDigest tDigest) throws TDigestSerializerException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(5120);
try (ObjectOutputStream out = new ObjectOutputStream(baos)) {
out.writeObject(tDigest);
return baos.toByteArray();
} catch (IOException e) {
throw new TDigestSerializerException(e);
}
}

@Override
public TDigest deserialize(byte[] object) throws TDigestSerializerException {
try (ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(object))) {
return (TDigest) in.readObject();
} catch (ClassCastException | ClassNotFoundException | IOException e) {
throw new TDigestSerializerException(e);
}
}

}
32 changes: 32 additions & 0 deletions core/src/main/java/com/tdunning/math/stats/TDigestSerializer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.tdunning.math.stats;

import java.util.Properties;

public interface TDigestSerializer<T extends TDigest, O extends Object> {

/**
* Serializes a TDigest object
*
* @param tDigest
* @return
* @throws TDigestSerializerException
*/
public O serialize(T tDigest) throws TDigestSerializerException;

/**
* De-serializes an Object into a TDigest
*
* @param object
* @return
* @throws TDigestSerializerException
*/
public T deserialize(O object) throws TDigestSerializerException;

/**
* Returns properties defined in serialization.properties file
*
* @return
*/
public Properties getProperties();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.tdunning.math.stats;

public class TDigestSerializerException extends Exception {

public TDigestSerializerException(String error) {
super(error);
}

public TDigestSerializerException(String error, Exception e) {
super(error, e);
}

public TDigestSerializerException(Exception e) {
super(e);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package com.tdunning.math.stats;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

public class TDigestSerializerFactory {
private static final String PROPERTIES_FILE_NAME = "serialization.properties";
private static final String SERIALIZER_PROP_KEY = "tdigest.serializerClass";

private Class<? extends TDigestSerializer<? extends TDigest, ? extends Object>> clazz;
private Properties serializationProperties;

@SuppressWarnings("unchecked")
public TDigestSerializerFactory() {
Properties props = new Properties();
try {
File f = new File(PROPERTIES_FILE_NAME);
InputStream is = f.exists() ?
new FileInputStream(f)
: getClass().getClassLoader().getResourceAsStream(PROPERTIES_FILE_NAME);

props.load(is);
if(!props.containsKey(SERIALIZER_PROP_KEY))
throw new IllegalStateException(PROPERTIES_FILE_NAME + " does not contain key " + SERIALIZER_PROP_KEY);

clazz = (Class<? extends TDigestSerializer<? extends TDigest, ? extends Object>>) Class.forName(props.getProperty(SERIALIZER_PROP_KEY));
serializationProperties = props;
} catch (IOException | ClassNotFoundException e) {
clazz = TDigestDefaultSerializer.class;
e.printStackTrace(System.err);
}
}

@SuppressWarnings("rawtypes")
public TDigestSerializer create() throws TDigestSerializerException {
try {
return (TDigestSerializer)clazz.getConstructor(Properties.class).newInstance(serializationProperties);
} catch (Exception e) {
throw new TDigestSerializerException(e);
}
}

}
1 change: 1 addition & 0 deletions core/src/main/resources/serialization.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tdigest.serializerClass=com.tdunning.math.stats.TDigestDefaultSerializer
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.junit.Test;

import java.io.FileNotFoundException;
import java.io.IOException;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ public void testK() throws FileNotFoundException {
}
}
}
out.close();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,66 +17,55 @@

package com.tdunning.math.stats;

import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Iterator;
import java.util.Random;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import org.junit.Before;
import org.junit.Test;

/**
* Verifies that the various TDigest implementations can be serialized.
* <p>
* Serializability is important, for example, if we want to use t-digests with Spark.
*/
public class TDigestSerializationTest {

private static TDigestSerializer<TDigest, byte[]> tDigestSerializer = null;

@Before
@SuppressWarnings("unchecked")
public void setup() throws TDigestSerializerException {
tDigestSerializer = new TDigestSerializerFactory().create();
}

@Test
public void testMergingDigest() throws IOException {
public void testMergingDigest() throws TDigestSerializerException {
assertSerializesAndDeserializes(new MergingDigest(100));
}

@Test
public void testAVLTreeDigest() throws IOException {
public void testAVLTreeDigest() throws TDigestSerializerException {
assertSerializesAndDeserializes(new AVLTreeDigest(100));
}


private <T extends TDigest> void assertSerializesAndDeserializes(T tdigest) throws IOException {
assertNotNull(deserialize(serialize(tdigest)));
@SuppressWarnings("unchecked")
private <T extends TDigest> void assertSerializesAndDeserializes(T tdigest) throws TDigestSerializerException {
assertNotNull(tDigestSerializer.deserialize(tDigestSerializer.serialize(tdigest)));

final Random gen = new Random();
for (int i = 0; i < 100000; i++) {
tdigest.add(gen.nextDouble());
}
T roundTrip = deserialize(serialize(tdigest));
T roundTrip = (T) tDigestSerializer.deserialize(tDigestSerializer.serialize(tdigest));

assertTDigestEquals(tdigest, roundTrip);
}

private static byte[] serialize(Serializable obj) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(5120);
try (ObjectOutputStream out = new ObjectOutputStream(baos)){
out.writeObject(obj);
return baos.toByteArray();
}
}

private static <T> T deserialize(byte[] objectData) throws IOException {
try (ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(objectData))) {
//noinspection unchecked
return (T) in.readObject();
} catch (ClassCastException | ClassNotFoundException | IOException e) {
throw new IOException(e);
}
}

private void assertTDigestEquals(TDigest t1, TDigest t2) {
assertEquals(t1.getMin(), t2.getMin(), 0);
assertEquals(t1.getMax(), t2.getMax(), 0);
Expand Down
4 changes: 4 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
<packaging>pom</packaging>
<version>3.3-SNAPSHOT</version>
<name>T-digest Parent</name>

<properties>
<java.version>1.7</java.version>
</properties>

<modules>
<module>core</module>
Expand Down