Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FSTORE-1580] OnlineFS Observability #396

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions java/beam/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

<properties>
<beam.version>2.48.0</beam.version>
<kafka.version>3.4.0</kafka.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -43,13 +42,6 @@
<artifactId>beam-sdks-java-io-kafka</artifactId>
<version>${beam.version}</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version}</version>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@
package com.logicalclocks.hsfs.beam.engine;

import com.google.common.base.Strings;
import com.logicalclocks.hsfs.FeatureGroupBase;
import com.logicalclocks.hsfs.FeatureStoreException;
import com.logicalclocks.hsfs.StorageConnector;
import com.logicalclocks.hsfs.beam.StreamFeatureGroup;
import com.logicalclocks.hsfs.metadata.DatasetApi;
import com.logicalclocks.hsfs.engine.EngineBase;
import com.logicalclocks.hsfs.engine.FeatureGroupUtils;
import com.logicalclocks.hsfs.metadata.HopsworksInternalClient;
import com.logicalclocks.hsfs.engine.KafkaEngine;
import org.apache.avro.Schema;

import java.io.FileOutputStream;
Expand All @@ -35,7 +33,9 @@

public class BeamEngine extends EngineBase {
private static BeamEngine INSTANCE = null;

private FeatureGroupUtils featureGroupUtils = new FeatureGroupUtils();
private final KafkaEngine kafkaEngine;

public static synchronized BeamEngine getInstance() throws FeatureStoreException {
if (INSTANCE == null) {
Expand All @@ -45,6 +45,7 @@ public static synchronized BeamEngine getInstance() throws FeatureStoreException
}

private BeamEngine() throws FeatureStoreException {
kafkaEngine = new KafkaEngine(this);
}

public BeamProducer insertStream(StreamFeatureGroup streamFeatureGroup, Map<String, String> writeOptions)
Expand All @@ -57,7 +58,7 @@ public BeamProducer insertStream(StreamFeatureGroup streamFeatureGroup, Map<Stri
Schema deserializedEncodedSchema = new Schema.Parser().parse(streamFeatureGroup.getEncodedAvroSchema());

return new BeamProducer(streamFeatureGroup.getOnlineTopicName(),
getKafkaConfig(streamFeatureGroup, writeOptions),
kafkaEngine.getKafkaConfig(streamFeatureGroup, writeOptions, KafkaEngine.ConfigType.KAFKA),
streamFeatureGroup.getDeserializedAvroSchema(), deserializedEncodedSchema, complexFeatureSchemas,
streamFeatureGroup.getPrimaryKeys(), streamFeatureGroup);
}
Expand All @@ -77,24 +78,4 @@ public String addFile(String filePath) throws IOException, FeatureStoreException
}
return targetPath;
}

@Override
public Map<String, String> getKafkaConfig(FeatureGroupBase featureGroup, Map<String, String> writeOptions)
throws FeatureStoreException, IOException {
boolean external = !(System.getProperties().containsKey(HopsworksInternalClient.REST_ENDPOINT_SYS)
|| (writeOptions != null
&& Boolean.parseBoolean(writeOptions.getOrDefault("internal_kafka", "false"))));

StorageConnector.KafkaConnector storageConnector =
storageConnectorApi.getKafkaStorageConnector(featureGroup.getFeatureStore(), external);
storageConnector.setSslTruststoreLocation(addFile(storageConnector.getSslTruststoreLocation()));
storageConnector.setSslKeystoreLocation(addFile(storageConnector.getSslKeystoreLocation()));

Map<String, String> config = storageConnector.kafkaOptions();

if (writeOptions != null) {
config.putAll(writeOptions);
}
return config;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@
package com.logicalclocks.hsfs.flink.engine;

import com.google.common.base.Strings;
import com.logicalclocks.hsfs.FeatureGroupBase;
import com.logicalclocks.hsfs.FeatureStoreException;
import com.logicalclocks.hsfs.StorageConnector;
import com.logicalclocks.hsfs.engine.EngineBase;
import com.logicalclocks.hsfs.engine.KafkaEngine;
import com.logicalclocks.hsfs.flink.StreamFeatureGroup;

import com.logicalclocks.hsfs.metadata.HopsworksInternalClient;
import lombok.Getter;

import org.apache.avro.generic.GenericRecord;
Expand Down Expand Up @@ -76,19 +74,21 @@ public static synchronized FlinkEngine getInstance() throws FeatureStoreExceptio
.stringType()
.defaultValue("material_passwd")
.withDescription("path to material_passwd");
private final KafkaEngine kafkaEngine;

private FlinkEngine() throws FeatureStoreException {
streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
// Configure the streamExecutionEnvironment
streamExecutionEnvironment.getConfig().enableObjectReuse();
kafkaEngine = new KafkaEngine(this);
}

public DataStreamSink<?> writeDataStream(StreamFeatureGroup streamFeatureGroup, DataStream<?> dataStream,
Map<String, String> writeOptions) throws FeatureStoreException, IOException {

DataStream<Object> genericDataStream = (DataStream<Object>) dataStream;
Properties properties = new Properties();
properties.putAll(getKafkaConfig(streamFeatureGroup, writeOptions));
properties.putAll(kafkaEngine.getKafkaConfig(streamFeatureGroup, writeOptions, KafkaEngine.ConfigType.KAFKA));

KafkaSink<GenericRecord> sink = KafkaSink.<GenericRecord>builder()
.setBootstrapServers(properties.getProperty("bootstrap.servers"))
Expand Down Expand Up @@ -128,27 +128,6 @@ public String addFile(String filePath) throws IOException {
return targetPath;
}

@Override
public Map<String, String> getKafkaConfig(FeatureGroupBase featureGroup, Map<String, String> writeOptions)
throws FeatureStoreException, IOException {
boolean external = !(System.getProperties().containsKey(HopsworksInternalClient.REST_ENDPOINT_SYS)
|| (writeOptions != null
&& Boolean.parseBoolean(writeOptions.getOrDefault("internal_kafka", "false"))));

StorageConnector.KafkaConnector storageConnector =
storageConnectorApi.getKafkaStorageConnector(featureGroup.getFeatureStore(), external);
storageConnector.setSslTruststoreLocation(addFile(storageConnector.getSslTruststoreLocation()));
storageConnector.setSslKeystoreLocation(addFile(storageConnector.getSslKeystoreLocation()));

Map<String, String> config = storageConnector.kafkaOptions();

if (writeOptions != null) {
config.putAll(writeOptions);
}
config.put("enable.idempotence", "false");
return config;
}

public String getTrustStorePath() {
return flinkConfig.getString(trustStorePath);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,31 @@

package com.logicalclocks.hsfs;

import com.fasterxml.jackson.annotation.JsonIgnore;
import java.io.IOException;
import java.text.ParseException;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.avro.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.logicalclocks.hsfs.constructor.QueryBase;
import com.logicalclocks.hsfs.engine.FeatureGroupEngineBase;
import com.logicalclocks.hsfs.engine.FeatureGroupUtils;
import com.logicalclocks.hsfs.metadata.IngestionRunApi;
import com.logicalclocks.hsfs.metadata.Statistics;
import com.logicalclocks.hsfs.metadata.Subject;
import com.logicalclocks.hsfs.metadata.User;

import lombok.Getter;
import lombok.Setter;

import org.apache.avro.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.text.ParseException;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;


public abstract class FeatureGroupBase<T> {

Expand Down Expand Up @@ -543,5 +544,12 @@ public Schema getDeserializedAvroSchema() throws FeatureStoreException, IOExcept
return utils.getDeserializedAvroSchema(getAvroSchema());
}

@JsonIgnore
public IngestionRun getLatestIngestionRun() throws IOException, FeatureStoreException {
return new IngestionRunApi().getIngestionRun(this, new HashMap<String, String>() {{
put("filter_by", "LATEST");
}
});
}

}
57 changes: 57 additions & 0 deletions java/hsfs/src/main/java/com/logicalclocks/hsfs/IngestionRun.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright (c) 2024. Hopsworks AB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and limitations under the License.
*
*/

package com.logicalclocks.hsfs;

import com.logicalclocks.hsfs.metadata.RestDto;

import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;

@NoArgsConstructor
@AllArgsConstructor
public class IngestionRun extends RestDto<IngestionRun> {

@Getter
@Setter
private Integer id;

@Getter
@Setter
private String startingOffsets;

@Getter
@Setter
private String endingOffsets;

@Getter
private String currentOffsets;

@Getter
private Integer totalEntries;

@Getter
private Integer processedEntries;

public IngestionRun(String startingOffsets, String endingOffsets) {
this.startingOffsets = startingOffsets;
this.endingOffsets = endingOffsets;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,15 @@

package com.logicalclocks.hsfs.engine;

import com.logicalclocks.hsfs.FeatureGroupBase;
import com.logicalclocks.hsfs.FeatureStoreException;
import com.logicalclocks.hsfs.metadata.StorageConnectorApi;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Map;

public abstract class EngineBase {

protected static final Logger LOGGER = LoggerFactory.getLogger(EngineBase.class);

public StorageConnectorApi storageConnectorApi = new StorageConnectorApi();

public abstract String addFile(String filePath) throws IOException, FeatureStoreException;

public abstract Map<String, String> getKafkaConfig(FeatureGroupBase featureGroup, Map<String, String> writeOptions)
throws FeatureStoreException, IOException;
}
Loading
Loading