numaproj · dpadhiar · Sep 19, 2023 · Sep 18, 2023 · Sep 18, 2023 · Sep 18, 2023
@@ -107,6 +107,10 @@ func ValidatePipeline(pl *dfv1.Pipeline) error {
 				return fmt.Errorf("invalid user-defined source vertex %q, only one of 'http', 'kafka', 'nats', 'redisStreams', 'generator' and 'udSource' can be specified", k)
 			}
 		}
+
+		if findForests(s, pl) {
+			return fmt.Errorf("invalid pipeline, cannot be disjointed")
+		}
 	}
 
 	for k, t := range udTransformers {
@@ -472,3 +476,37 @@ func toVerticesMappedByFrom(edges []dfv1.Edge, verticesByName map[string]*dfv1.A
 	}
 	return mappedEdges, nil
 }
+
+func findForests(vtx dfv1.AbstractVertex, pl *dfv1.Pipeline) bool {
+
+	visited := map[string]struct{}{}
+	findForestHelper(vtx.Name, visited, pl)
+
+	// true if forest is found
+	return len(visited) != len(pl.Spec.Vertices)
+
+}
+
+func findForestHelper(vtxName string, visited map[string]struct{}, pl *dfv1.Pipeline) {
-func findForestHelper(vtxName string, visited map[string]struct{}, pl *dfv1.Pipeline) {
+func buildVisitedMap(vtxName string, visited map[string]struct{}, pl *dfv1.Pipeline) {
-func findForestHelper(vtxName string, visited map[string]struct{}, pl *dfv1.Pipeline) {
+func buildVisitedMap(vtxName string, visited map[string]struct{}, pl *dfv1.Pipeline) {
+
+	visited[vtxName] = struct{}{}
+
+	// construct list all to and from vertices
+	neighbors := make(map[string]string)
+	toEdges := pl.GetToEdges(vtxName)
+	fromEdges := pl.GetFromEdges(vtxName)
+	for _, e := range toEdges {
+		neighbors[e.To] = e.To
+	}
+	for _, e := range fromEdges {
+		neighbors[e.From] = e.From
+	}
+
+	// visit all to and from vertices
+	for _, v := range neighbors {
+		if _, alreadyVisited := visited[v]; !alreadyVisited {
+			findForestHelper(v, visited, pl)
+		}
+	}
+
+}
@@ -163,6 +163,57 @@ var (
 			},
 		},
 	}
+
+	testForestPipeline = &dfv1.Pipeline{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-pl",
+			Namespace: "test-ns",
+		},
+		Spec: dfv1.PipelineSpec{
+			Vertices: []dfv1.AbstractVertex{
+				{
+					Name: "input",
+					Source: &dfv1.Source{
+						UDTransformer: &dfv1.UDTransformer{
+							Builtin: &dfv1.Transformer{Name: "filter"},
+						}},
+				},
+				{
+					Name: "input-1",
+					Source: &dfv1.Source{
+						UDTransformer: &dfv1.UDTransformer{
+							Builtin: &dfv1.Transformer{Name: "filter"},
+						}},
+				},
+				{
+					Name: "p1",
+					UDF: &dfv1.UDF{
+						Builtin: &dfv1.Function{Name: "cat"},
+					},
+				},
+				{
+					Name: "p2",
+					UDF: &dfv1.UDF{
+						Builtin: &dfv1.Function{Name: "cat"},
+					},
+				},
+				{
+					Name: "output",
+					Sink: &dfv1.Sink{},
+				},
+				{
+					Name: "output-1",
+					Sink: &dfv1.Sink{},
+				},
+			},
+			Edges: []dfv1.Edge{
+				{From: "input", To: "p1"},
+				{From: "p1", To: "output"},
+				{From: "input-1", To: "p2"},
+				{From: "p2", To: "output-1"},
+			},
+		},
+	}
 )
 
 func TestValidatePipeline(t *testing.T) {
@@ -273,6 +324,42 @@ func TestValidatePipeline(t *testing.T) {
 		assert.Contains(t, err.Error(), "can not specify both builtin function, and a customized image")
 	})
 
+	t.Run("forest - two pipelines with 1 source/sink", func(t *testing.T) {
+		testObj := testForestPipeline.DeepCopy()
+		err := ValidatePipeline(testObj)
+		assert.Error(t, err)
+		assert.Contains(t, err.Error(), "invalid pipeline")
+	})
+
+	t.Run("forest - second pipeline has no sink", func(t *testing.T) {
+		testObj := testForestPipeline.DeepCopy()
+		testObj.Spec.Vertices[5].Sink = nil
+		testObj.Spec.Vertices[5].UDF = &dfv1.UDF{}
+		err := ValidatePipeline(testObj)
+		assert.Error(t, err)
+		assert.Contains(t, err.Error(), "invalid vertex")
+	})
+
+	t.Run("forest - two pipelines with multiple sources/sinks", func(t *testing.T) {
+		testObj := testForestPipeline.DeepCopy()
+		testObj.Spec.Vertices = append(testObj.Spec.Vertices, dfv1.AbstractVertex{Name: "input-2", Source: &dfv1.Source{}})
+		testObj.Spec.Vertices = append(testObj.Spec.Vertices, dfv1.AbstractVertex{Name: "output-2", Sink: &dfv1.Sink{}})
+		testObj.Spec.Edges = append(testObj.Spec.Edges, dfv1.Edge{From: "input-2", To: "p1"})
+		testObj.Spec.Edges = append(testObj.Spec.Edges, dfv1.Edge{From: "p2", To: "output-2"})
+		err := ValidatePipeline(testObj)
+		assert.Error(t, err)
+		assert.Contains(t, err.Error(), "invalid pipeline")
+	})
+
+	t.Run("forest - pipelines have cycles", func(t *testing.T) {
+		testObj := testForestPipeline.DeepCopy()
+		testObj.Spec.Edges = append(testObj.Spec.Edges, dfv1.Edge{From: "p1", To: "p1"})
+		testObj.Spec.Edges = append(testObj.Spec.Edges, dfv1.Edge{From: "p2", To: "p2"})
+		err := ValidatePipeline(testObj)
+		assert.Error(t, err)
+		assert.Contains(t, err.Error(), "invalid pipeline")
+	})
+
 	t.Run("edge - invalid vertex name", func(t *testing.T) {
 		testObj := testPipeline.DeepCopy()
 		testObj.Spec.Edges = append(testObj.Spec.Edges, dfv1.Edge{From: "a", To: "b"})