Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Early stages / PoC of Distributed Data Movement with Gobblin-on-Temporal #3789

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public class State implements WritableShim {

private static final Joiner LIST_JOINER = Joiner.on(",");
private static final Splitter LIST_SPLITTER = Splitter.on(",").trimResults().omitEmptyStrings();
private static final JsonParser JSON_PARSER = new JsonParser();

private String id;

Expand All @@ -62,8 +63,6 @@ public class State implements WritableShim {
@Getter
private Properties specProperties;

private final JsonParser jsonParser = new JsonParser();

public State() {
this.specProperties = new Properties();
this.commonProperties = new Properties();
Expand Down Expand Up @@ -476,7 +475,7 @@ public boolean getPropAsBoolean(String key, boolean def) {
* @return {@link JsonArray} value associated with the key
*/
public JsonArray getPropAsJsonArray(String key) {
JsonElement jsonElement = this.jsonParser.parse(getProp(key));
JsonElement jsonElement = this.JSON_PARSER.parse(getProp(key));
Preconditions.checkArgument(jsonElement.isJsonArray(),
"Value for key " + key + " is malformed, it must be a JsonArray: " + jsonElement);
return jsonElement.getAsJsonArray();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.StringWriter;

import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonWriter;

import org.apache.gobblin.source.extractor.Extractor;
import org.apache.gobblin.source.extractor.Watermark;
Expand Down Expand Up @@ -365,6 +367,56 @@ public int hashCode() {
return result;
}

public String toJsonString() {
StringWriter stringWriter = new StringWriter();
try (JsonWriter jsonWriter = new JsonWriter(stringWriter)) {
jsonWriter.setIndent("\t");
this.toJson(jsonWriter);
} catch (IOException ioe) {
// Ignored
}
return stringWriter.toString();
}

public void toJson(JsonWriter jsonWriter) throws IOException {
jsonWriter.beginObject();

jsonWriter.name("id").value(this.getId());
jsonWriter.name("properties");
jsonWriter.beginObject();
for (String key : this.getPropertyNames()) {
jsonWriter.name(key).value(this.getProp(key));
}
jsonWriter.endObject();

jsonWriter.name("extract");
jsonWriter.beginObject();
jsonWriter.name("extractId").value(this.getExtract().getId());
jsonWriter.name("extractProperties");
jsonWriter.beginObject();
for (String key : this.getExtract().getPropertyNames()) {
jsonWriter.name(key).value(this.getExtract().getProp(key));
}
jsonWriter.endObject();

State prevTableState = this.getExtract().getPreviousTableState();
if (prevTableState != null) {
jsonWriter.name("extractPrevTableState");
jsonWriter.beginObject();
jsonWriter.name("prevStateId").value(prevTableState.getId());
jsonWriter.name("prevStateProperties");
jsonWriter.beginObject();
for (String key : prevTableState.getPropertyNames()) {
jsonWriter.name(key).value(prevTableState.getProp(key));
}
jsonWriter.endObject();
jsonWriter.endObject();
}
jsonWriter.endObject();

jsonWriter.endObject();
}

public String getOutputFilePath() {
// Search for the properties in the workunit.
// This search for the property first in State and then in the Extract of this workunit.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

package org.apache.gobblin.data.management.copy;

import com.google.common.cache.Cache;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import java.util.Map;

Expand All @@ -34,8 +34,10 @@

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.cache.Cache;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.gson.stream.JsonWriter;

import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
Expand Down Expand Up @@ -132,6 +134,51 @@ public CopyableFile(FileStatus origin, Path destination, OwnerAndPermission dest
this.datasetOutputPath = datasetOutputPath;
}


public String toJsonString() {
return toJsonString(true);
}

public String toJsonString(boolean includeMetadata) {
StringWriter stringWriter = new StringWriter();
try (JsonWriter jsonWriter = new JsonWriter(stringWriter)) {
jsonWriter.setIndent("\t");
this.toJson(jsonWriter, includeMetadata);
} catch (IOException ioe) {
// Ignored
}
return stringWriter.toString();
}

public void toJson(JsonWriter jsonWriter, boolean includeMetadata) throws IOException {
jsonWriter.beginObject();

jsonWriter
.name("file set").value(this.getFileSet())
.name("origin").value(this.getOrigin().toString())
.name("destination").value(this.getDestination().toString())
.name("destinationOwnerAndPermission").value(this.getDestinationOwnerAndPermission().toString())
// TODO:
// this.ancestorsOwnerAndPermission
// this.checksum
// this.preserve
// this.dataFileVersionStrategy
// this.originTimestamp
// this.upstreamTimestamp
.name("datasetOutputPath").value(this.getDatasetOutputPath().toString());

if (includeMetadata && this.getAdditionalMetadata() != null) {
jsonWriter.name("metadata");
jsonWriter.beginObject();
for (Map.Entry<String, String> entry : this.getAdditionalMetadata().entrySet()) {
jsonWriter.name(entry.getKey()).value(entry.getValue());
}
jsonWriter.endObject();
}

jsonWriter.endObject();
}

/**
* Set file system based source and destination dataset for this {@link CopyableFile}
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,9 @@ public JobContext(Properties jobProps, Logger logger, SharedResourcesBroker<Gobb
"A job must have a job name specified by job.name");

this.jobName = JobState.getJobNameFromProps(jobProps);
this.jobId = jobProps.containsKey(ConfigurationKeys.JOB_ID_KEY) ? jobProps.getProperty(ConfigurationKeys.JOB_ID_KEY)
: JobLauncherUtils.newJobId(this.jobName);
this.jobId = JobState.getJobIdFromProps(jobProps);
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, this.jobId); // in case not yet directly defined as such
this.jobSequence = Long.toString(Id.Job.parse(this.jobId).getSequence());
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, this.jobId);

this.jobBroker = instanceBroker.newSubscopedBuilder(new JobScopeInstance(this.jobName, this.jobId))
.withOverridingConfig(ConfigUtils.propertiesToConfig(jobProps)).build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import org.apache.gobblin.source.extractor.JobCommitPolicy;
import org.apache.gobblin.source.workunit.WorkUnit;
import org.apache.gobblin.util.ImmutableProperties;
import org.apache.gobblin.util.JobLauncherUtils;


/**
Expand Down Expand Up @@ -172,6 +173,11 @@ public static String getJobNameFromProps(Properties props) {
return props.getProperty(ConfigurationKeys.JOB_NAME_KEY);
}

public static String getJobIdFromProps(Properties props) {
return props.containsKey(ConfigurationKeys.JOB_ID_KEY) ? props.getProperty(ConfigurationKeys.JOB_ID_KEY)
: JobLauncherUtils.newJobId(JobState.getJobNameFromProps(props));
}

public static String getJobGroupFromState(State state) {
return state.getProp(ConfigurationKeys.JOB_GROUP_KEY);
}
Expand Down Expand Up @@ -684,10 +690,14 @@ public int hashCode() {

@Override
public String toString() {
return toJsonString(false);
}

public String toJsonString(boolean includeProperties) {
StringWriter stringWriter = new StringWriter();
try (JsonWriter jsonWriter = new JsonWriter(stringWriter)) {
jsonWriter.setIndent("\t");
this.toJson(jsonWriter, false);
this.toJson(jsonWriter, includeProperties);
} catch (IOException ioe) {
// Ignored
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -384,6 +385,21 @@ public int hashCode() {
return result;
}

public String toJsonString() {
return toJsonString(true);
}

public String toJsonString(boolean includeProperties) {
StringWriter stringWriter = new StringWriter();
try (JsonWriter jsonWriter = new JsonWriter(stringWriter)) {
jsonWriter.setIndent("\t");
this.toJson(jsonWriter, includeProperties);
} catch (IOException ioe) {
// Ignored
}
return stringWriter.toString();
}

/**
* Convert this {@link TaskState} to a json document.
*
Expand Down
1 change: 1 addition & 0 deletions gobblin-temporal/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dependencies {
compile project(":gobblin-api")
compile project(":gobblin-cluster")
compile project(":gobblin-core")
compile project(":gobblin-data-management")
compile project(":gobblin-metrics-libs:gobblin-metrics")
compile project(":gobblin-metastore")
compile project(":gobblin-runtime")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,18 @@ public interface GobblinTemporalConfigurationKeys {

String PREFIX = "gobblin.temporal.";

String WORKER_CLASS = PREFIX + "worker";
String WORKER_CLASS = PREFIX + "worker.class";
String DEFAULT_WORKER_CLASS = HelloWorldWorker.class.getName();
String GOBBLIN_TEMPORAL_NAMESPACE = PREFIX + "namespace";
String DEFAULT_GOBBLIN_TEMPORAL_NAMESPACE = PREFIX + "namespace";

String GOBBLIN_TEMPORAL_TASK_QUEUE = PREFIX + "task.queue.name";
String DEFAULT_GOBBLIN_TEMPORAL_TASK_QUEUE = "GobblinTemporalTaskQueue";
String GOBBLIN_TEMPORAL_JOB_LAUNCHER = PREFIX + "job.launcher";
String DEFAULT_GOBBLIN_TEMPORAL_JOB_LAUNCHER = HelloWorldJobLauncher.class.getName();
String GOBBLIN_TEMPORAL_JOB_LAUNCHER_PREFIX = PREFIX + "job.launcher.";
String GOBBLIN_TEMPORAL_JOB_LAUNCHER_CLASS = GOBBLIN_TEMPORAL_JOB_LAUNCHER_PREFIX + "class";
String DEFAULT_GOBBLIN_TEMPORAL_JOB_LAUNCHER_CLASS = HelloWorldJobLauncher.class.getName();

String GOBBLIN_TEMPORAL_JOB_LAUNCHER_ARG_PREFIX = GOBBLIN_TEMPORAL_JOB_LAUNCHER_PREFIX + "arg.";

/**
* Number of worker processes to spin up per task runner
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,21 @@

package org.apache.gobblin.temporal.cluster;

import java.util.Arrays;

import com.typesafe.config.Config;

import io.temporal.client.WorkflowClient;
import io.temporal.worker.Worker;
import io.temporal.worker.WorkerFactory;
import io.temporal.worker.WorkerOptions;

import org.apache.gobblin.temporal.GobblinTemporalConfigurationKeys;
import org.apache.gobblin.util.ConfigUtils;


public abstract class AbstractTemporalWorker {
/** Basic boilerplate for a temporal "worker" to register its activity and workflow capabilities and listen on a particular queue */
public abstract class AbstractTemporalWorker implements TemporalWorker {
private final WorkflowClient workflowClient;
private final String queueName;
private final WorkerFactory workerFactory;
Expand All @@ -42,10 +46,13 @@ public AbstractTemporalWorker(Config cfg, WorkflowClient client) {

// Create a Worker factory that can be used to create Workers that poll specific Task Queues.
workerFactory = WorkerFactory.newInstance(workflowClient);

stashWorkerConfig(cfg);
}

@Override
public void start() {
Worker worker = workerFactory.newWorker(queueName);
Worker worker = workerFactory.newWorker(queueName, createWorkerOptions());
// This Worker hosts both Workflow and Activity implementations.
// Workflows are stateful, so you need to supply a type to create instances.
worker.registerWorkflowImplementationTypes(getWorkflowImplClasses());
Expand All @@ -55,16 +62,25 @@ public void start() {
workerFactory.start();
}

/**
* Shuts down the worker.
*/
@Override
public void shutdown() {
workerFactory.shutdown();
}

protected WorkerOptions createWorkerOptions() {
return null;
}

/** @return workflow types for *implementation* classes (not interface) */
protected abstract Class<?>[] getWorkflowImplClasses();

/** @return activity instances; NOTE: activities must be stateless and thread-safe, so a shared instance is used. */
protected abstract Object[] getActivityImplInstances();

private final void stashWorkerConfig(Config cfg) {
// stash in association with...
WorkerConfig.forWorker(this.getClass(), cfg); // the worker itself
Arrays.stream(getWorkflowImplClasses()).forEach(clazz -> WorkerConfig.withImpl(clazz, cfg)); // its workflow impls
Arrays.stream(getActivityImplInstances()).forEach(obj -> WorkerConfig.withImpl(obj.getClass(), cfg)); // its activity impls
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public class GobblinTemporalTaskRunner implements StandardMetricsBridge {
protected final String temporalQueueName;
private final boolean isMetricReportingFailureFatal;
private final boolean isEventReportingFailureFatal;
private final List<AbstractTemporalWorker> workers;
private final List<TemporalWorker> workers;

public GobblinTemporalTaskRunner(String applicationName,
String applicationId,
Expand Down Expand Up @@ -234,7 +234,7 @@ public void start()
}
}

private AbstractTemporalWorker initiateWorker() throws Exception{
private TemporalWorker initiateWorker() throws Exception {
logger.info("Starting Temporal Worker");

String connectionUri = clusterConfig.getString(GobblinTemporalConfigurationKeys.TEMPORAL_CONNECTION_STRING);
Expand All @@ -246,8 +246,8 @@ private AbstractTemporalWorker initiateWorker() throws Exception{

String workerClassName = ConfigUtils.getString(clusterConfig,
GobblinTemporalConfigurationKeys.WORKER_CLASS, GobblinTemporalConfigurationKeys.DEFAULT_WORKER_CLASS);
AbstractTemporalWorker worker = GobblinConstructorUtils.invokeLongestConstructor(
(Class<AbstractTemporalWorker>) Class.forName(workerClassName), clusterConfig, client);
TemporalWorker worker = GobblinConstructorUtils.invokeLongestConstructor(
(Class<TemporalWorker>)Class.forName(workerClassName), clusterConfig, client);
worker.start();
logger.info("A new worker is started.");
return worker;
Expand Down Expand Up @@ -286,9 +286,7 @@ public synchronized void stop() {
this.containerMetrics.get().stopMetricsReporting();
}

for (AbstractTemporalWorker worker : workers) {
worker.shutdown();
}
workers.forEach(TemporalWorker::shutdown);

logger.info("All services are stopped.");

Expand Down
Loading