From dab92a5f60c7d01f67250fad5fdc52bcaa2785c5 Mon Sep 17 00:00:00 2001 From: roryqi Date: Fri, 27 Sep 2024 15:17:03 +0800 Subject: [PATCH 01/15] [#4886] feat(server,core): Supports to list roles by object (#5023) ### What changes were proposed in this pull request? Supports to list roles by object ### Why are the changes needed? Fix: #4886 ### Does this PR introduce _any_ user-facing change? I will add the document later. ### How was this patch tested? Add new UT. --- .../java/org/apache/gravitino/Catalog.java | 9 + .../java/org/apache/gravitino/Metalake.java | 9 + .../java/org/apache/gravitino/Schema.java | 9 + .../authorization/SupportsRoles.java | 36 +++ .../org/apache/gravitino/file/Fileset.java | 9 + .../org/apache/gravitino/messaging/Topic.java | 9 + .../java/org/apache/gravitino/rel/Table.java | 9 + .../gravitino/client/BaseSchemaCatalog.java | 16 +- .../gravitino/client/GenericFileset.java | 16 +- .../gravitino/client/GenericSchema.java | 16 +- .../apache/gravitino/client/GenericTopic.java | 16 +- .../gravitino/client/GravitinoMetalake.java | 19 +- .../client/MetadataObjectRoleOperations.java | 56 ++++ .../gravitino/client/RelationalTable.java | 16 +- .../gravitino/client/TestSupportRoles.java | 256 ++++++++++++++++++ .../test/authorization/AccessControlIT.java | 53 ++++ .../gravitino/SupportsRelationOperations.java | 23 +- .../AccessControlDispatcher.java | 14 + .../authorization/AccessControlManager.java | 10 +- .../authorization/FutureGrantManager.java | 2 +- .../gravitino/authorization/RoleManager.java | 33 +++ .../hook/AccessControlHookDispatcher.java | 8 + .../storage/relational/JDBCBackend.java | 6 +- .../relational/RelationalEntityStore.java | 6 +- .../relational/service/RoleMetaService.java | 96 ++++--- .../TestAccessControlManager.java | 29 ++ .../service/TestRoleMetaService.java | 2 +- .../rest/MetadataObjectRoleOperations.java | 89 ++++++ .../TestMetadataObjectRoleOperations.java | 146 ++++++++++ .../server/web/rest/TestRoleOperations.java | 34 +-- 30 files changed, 968 insertions(+), 84 deletions(-) create mode 100644 api/src/main/java/org/apache/gravitino/authorization/SupportsRoles.java create mode 100644 clients/client-java/src/main/java/org/apache/gravitino/client/MetadataObjectRoleOperations.java create mode 100644 clients/client-java/src/test/java/org/apache/gravitino/client/TestSupportRoles.java create mode 100644 server/src/main/java/org/apache/gravitino/server/web/rest/MetadataObjectRoleOperations.java create mode 100644 server/src/test/java/org/apache/gravitino/server/web/rest/TestMetadataObjectRoleOperations.java diff --git a/api/src/main/java/org/apache/gravitino/Catalog.java b/api/src/main/java/org/apache/gravitino/Catalog.java index 052a04d9484..431a798d51d 100644 --- a/api/src/main/java/org/apache/gravitino/Catalog.java +++ b/api/src/main/java/org/apache/gravitino/Catalog.java @@ -21,6 +21,7 @@ import java.util.Locale; import java.util.Map; import org.apache.gravitino.annotation.Evolving; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.file.FilesetCatalog; import org.apache.gravitino.messaging.TopicCatalog; import org.apache.gravitino.rel.TableCatalog; @@ -181,4 +182,12 @@ default TopicCatalog asTopicCatalog() throws UnsupportedOperationException { default SupportsTags supportsTags() throws UnsupportedOperationException { throw new UnsupportedOperationException("Catalog does not support tag operations"); } + + /** + * @return the {@link SupportsRoles} if the catalog supports role operations. + * @throws UnsupportedOperationException if the catalog does not support role operations. + */ + default SupportsRoles supportsRoles() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Catalog does not support role operations"); + } } diff --git a/api/src/main/java/org/apache/gravitino/Metalake.java b/api/src/main/java/org/apache/gravitino/Metalake.java index 6b4ac76ba3c..fb6fdbee094 100644 --- a/api/src/main/java/org/apache/gravitino/Metalake.java +++ b/api/src/main/java/org/apache/gravitino/Metalake.java @@ -20,6 +20,7 @@ import java.util.Map; import org.apache.gravitino.annotation.Evolving; +import org.apache.gravitino.authorization.SupportsRoles; /** * The interface of a metalake. The metalake is the top level entity in the Apache Gravitino system, @@ -50,4 +51,12 @@ public interface Metalake extends Auditable { * @return The properties of the metalake. */ Map properties(); + + /** + * @return the {@link SupportsRoles} if the metalake supports role operations. + * @throws UnsupportedOperationException if the metalake does not support role operations. + */ + default SupportsRoles supportsRoles() { + throw new UnsupportedOperationException("Metalake does not support role operations."); + } } diff --git a/api/src/main/java/org/apache/gravitino/Schema.java b/api/src/main/java/org/apache/gravitino/Schema.java index 872b0a25e33..7cedf94f694 100644 --- a/api/src/main/java/org/apache/gravitino/Schema.java +++ b/api/src/main/java/org/apache/gravitino/Schema.java @@ -22,6 +22,7 @@ import java.util.Map; import javax.annotation.Nullable; import org.apache.gravitino.annotation.Evolving; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.tag.SupportsTags; /** @@ -56,4 +57,12 @@ default Map properties() { default SupportsTags supportsTags() { throw new UnsupportedOperationException("Schema does not support tag operations."); } + + /** + * @return the {@link SupportsRoles} if the schema supports role operations. + * @throws UnsupportedOperationException if the schema does not support role operations. + */ + default SupportsRoles supportsRoles() { + throw new UnsupportedOperationException("Schema does not support role operations."); + } } diff --git a/api/src/main/java/org/apache/gravitino/authorization/SupportsRoles.java b/api/src/main/java/org/apache/gravitino/authorization/SupportsRoles.java new file mode 100644 index 00000000000..e83a7e20ecc --- /dev/null +++ b/api/src/main/java/org/apache/gravitino/authorization/SupportsRoles.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.authorization; + +import org.apache.gravitino.annotation.Evolving; + +/** + * Interface for supporting list role names for objects. This interface will be mixed with metadata + * objects to provide listing role operations. + */ +@Evolving +public interface SupportsRoles { + + /** + * List all the role names associated with this metadata object. + * + * @return The role name list associated with this metadata object. + */ + String[] listBindingRoleNames(); +} diff --git a/api/src/main/java/org/apache/gravitino/file/Fileset.java b/api/src/main/java/org/apache/gravitino/file/Fileset.java index ccff039da92..97afcc650f1 100644 --- a/api/src/main/java/org/apache/gravitino/file/Fileset.java +++ b/api/src/main/java/org/apache/gravitino/file/Fileset.java @@ -24,6 +24,7 @@ import org.apache.gravitino.Auditable; import org.apache.gravitino.Namespace; import org.apache.gravitino.annotation.Evolving; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.tag.SupportsTags; /** @@ -114,4 +115,12 @@ default Map properties() { default SupportsTags supportsTags() { throw new UnsupportedOperationException("Fileset does not support tag operations."); } + + /** + * @return The {@link SupportsRoles} if the fileset supports role operations. + * @throws UnsupportedOperationException If the fileset does not support role operations. + */ + default SupportsRoles supportsRoles() { + throw new UnsupportedOperationException("Fileset does not support role operations."); + } } diff --git a/api/src/main/java/org/apache/gravitino/messaging/Topic.java b/api/src/main/java/org/apache/gravitino/messaging/Topic.java index 78607f4865d..7162c45d2b0 100644 --- a/api/src/main/java/org/apache/gravitino/messaging/Topic.java +++ b/api/src/main/java/org/apache/gravitino/messaging/Topic.java @@ -24,6 +24,7 @@ import org.apache.gravitino.Auditable; import org.apache.gravitino.Namespace; import org.apache.gravitino.annotation.Evolving; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.tag.SupportsTags; /** @@ -58,4 +59,12 @@ default Map properties() { default SupportsTags supportsTags() { throw new UnsupportedOperationException("Topic does not support tag operations."); } + + /** + * @return the {@link SupportsRoles} if the topic supports role operations. + * @throws UnsupportedOperationException if the topic does not support role operations. + */ + default SupportsRoles supportsRoles() { + throw new UnsupportedOperationException("Topic does not support role operations."); + } } diff --git a/api/src/main/java/org/apache/gravitino/rel/Table.java b/api/src/main/java/org/apache/gravitino/rel/Table.java index c6bafb97a43..8bb9e3c1206 100644 --- a/api/src/main/java/org/apache/gravitino/rel/Table.java +++ b/api/src/main/java/org/apache/gravitino/rel/Table.java @@ -24,6 +24,7 @@ import org.apache.gravitino.Auditable; import org.apache.gravitino.Namespace; import org.apache.gravitino.annotation.Evolving; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.rel.expressions.distributions.Distribution; import org.apache.gravitino.rel.expressions.distributions.Distributions; import org.apache.gravitino.rel.expressions.sorts.SortOrder; @@ -103,4 +104,12 @@ default SupportsPartitions supportPartitions() throws UnsupportedOperationExcept default SupportsTags supportsTags() { throw new UnsupportedOperationException("Table does not support tag operations."); } + + /** + * @return The {@link SupportsRoles} if the table supports role operations. + * @throws UnsupportedOperationException If the table does not support role operations. + */ + default SupportsRoles supportsRoles() { + throw new UnsupportedOperationException("Table does not support role operations."); + } } diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/BaseSchemaCatalog.java b/clients/client-java/src/main/java/org/apache/gravitino/client/BaseSchemaCatalog.java index 7d46af3a5a2..9359ea439b0 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/BaseSchemaCatalog.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/BaseSchemaCatalog.java @@ -31,6 +31,7 @@ import org.apache.gravitino.Schema; import org.apache.gravitino.SchemaChange; import org.apache.gravitino.SupportsSchemas; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.dto.AuditDTO; import org.apache.gravitino.dto.CatalogDTO; import org.apache.gravitino.dto.requests.SchemaCreateRequest; @@ -53,7 +54,7 @@ * create, load, alter and drop a schema with specified identifier. */ abstract class BaseSchemaCatalog extends CatalogDTO - implements Catalog, SupportsSchemas, SupportsTags { + implements Catalog, SupportsSchemas, SupportsTags, SupportsRoles { /** The REST client to send the requests. */ protected final RESTClient restClient; @@ -61,6 +62,7 @@ abstract class BaseSchemaCatalog extends CatalogDTO private final Namespace catalogNamespace; private final MetadataObjectTagOperations objectTagOperations; + private final MetadataObjectRoleOperations objectRoleOperations; BaseSchemaCatalog( Namespace catalogNamespace, @@ -84,6 +86,8 @@ abstract class BaseSchemaCatalog extends CatalogDTO MetadataObjects.of(null, this.name(), MetadataObject.Type.CATALOG); this.objectTagOperations = new MetadataObjectTagOperations(catalogNamespace.level(0), metadataObject, restClient); + this.objectRoleOperations = + new MetadataObjectRoleOperations(catalogNamespace.level(0), metadataObject, restClient); } @Override @@ -96,6 +100,11 @@ public SupportsTags supportsTags() throws UnsupportedOperationException { return this; } + @Override + public SupportsRoles supportsRoles() throws UnsupportedOperationException { + return this; + } + /** * List all the schemas under the given catalog namespace. * @@ -239,6 +248,11 @@ public String[] associateTags(String[] tagsToAdd, String[] tagsToRemove) { return objectTagOperations.associateTags(tagsToAdd, tagsToRemove); } + @Override + public String[] listBindingRoleNames() { + return objectRoleOperations.listBindingRoleNames(); + } + /** * Get the namespace of the current catalog, which is "metalake". * diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericFileset.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericFileset.java index 32e1d7392e2..68eda6985ab 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericFileset.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericFileset.java @@ -26,6 +26,7 @@ import org.apache.gravitino.MetadataObject; import org.apache.gravitino.MetadataObjects; import org.apache.gravitino.Namespace; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.dto.file.FilesetDTO; import org.apache.gravitino.exceptions.NoSuchTagException; import org.apache.gravitino.file.Fileset; @@ -33,11 +34,12 @@ import org.apache.gravitino.tag.Tag; /** Represents a generic fileset. */ -class GenericFileset implements Fileset, SupportsTags { +class GenericFileset implements Fileset, SupportsTags, SupportsRoles { private final FilesetDTO filesetDTO; private final MetadataObjectTagOperations objectTagOperations; + private final MetadataObjectRoleOperations objectRoleOperations; GenericFileset(FilesetDTO filesetDTO, RESTClient restClient, Namespace filesetNs) { this.filesetDTO = filesetDTO; @@ -46,6 +48,8 @@ class GenericFileset implements Fileset, SupportsTags { MetadataObject filesetObject = MetadataObjects.of(filesetFullName, MetadataObject.Type.FILESET); this.objectTagOperations = new MetadataObjectTagOperations(filesetNs.level(0), filesetObject, restClient); + this.objectRoleOperations = + new MetadataObjectRoleOperations(filesetNs.level(0), filesetObject, restClient); } @Override @@ -84,6 +88,11 @@ public SupportsTags supportsTags() { return this; } + @Override + public SupportsRoles supportsRoles() { + return this; + } + @Override public String[] listTags() { return objectTagOperations.listTags(); @@ -104,6 +113,11 @@ public String[] associateTags(String[] tagsToAdd, String[] tagsToRemove) { return objectTagOperations.associateTags(tagsToAdd, tagsToRemove); } + @Override + public String[] listBindingRoleNames() { + return objectRoleOperations.listBindingRoleNames(); + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericSchema.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericSchema.java index e595a53ab11..22af2e3a271 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericSchema.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericSchema.java @@ -23,23 +23,27 @@ import org.apache.gravitino.MetadataObject; import org.apache.gravitino.MetadataObjects; import org.apache.gravitino.Schema; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.dto.SchemaDTO; import org.apache.gravitino.exceptions.NoSuchTagException; import org.apache.gravitino.tag.SupportsTags; import org.apache.gravitino.tag.Tag; /** Represents a generic schema. */ -class GenericSchema implements Schema, SupportsTags { +class GenericSchema implements Schema, SupportsTags, SupportsRoles { private final SchemaDTO schemaDTO; private final MetadataObjectTagOperations objectTagOperations; + private final MetadataObjectRoleOperations objectRoleOperations; GenericSchema(SchemaDTO schemaDTO, RESTClient restClient, String metalake, String catalog) { this.schemaDTO = schemaDTO; MetadataObject schemaObject = MetadataObjects.of(catalog, schemaDTO.name(), MetadataObject.Type.SCHEMA); this.objectTagOperations = new MetadataObjectTagOperations(metalake, schemaObject, restClient); + this.objectRoleOperations = + new MetadataObjectRoleOperations(metalake, schemaObject, restClient); } @Override @@ -47,6 +51,11 @@ public SupportsTags supportsTags() { return this; } + @Override + public SupportsRoles supportsRoles() { + return this; + } + @Override public String name() { return schemaDTO.name(); @@ -87,6 +96,11 @@ public String[] associateTags(String[] tagsToAdd, String[] tagsToRemove) { return objectTagOperations.associateTags(tagsToAdd, tagsToRemove); } + @Override + public String[] listBindingRoleNames() { + return objectRoleOperations.listBindingRoleNames(); + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericTopic.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericTopic.java index 55edfdd54bf..0048d489ce4 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GenericTopic.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GenericTopic.java @@ -25,6 +25,7 @@ import org.apache.gravitino.MetadataObject; import org.apache.gravitino.MetadataObjects; import org.apache.gravitino.Namespace; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.dto.messaging.TopicDTO; import org.apache.gravitino.exceptions.NoSuchTagException; import org.apache.gravitino.messaging.Topic; @@ -32,11 +33,12 @@ import org.apache.gravitino.tag.Tag; /** Represents a generic topic. */ -class GenericTopic implements Topic, SupportsTags { +class GenericTopic implements Topic, SupportsTags, SupportsRoles { private final TopicDTO topicDTO; private final MetadataObjectTagOperations objectTagOperations; + private final MetadataObjectRoleOperations objectRoleOperations; GenericTopic(TopicDTO topicDTO, RESTClient restClient, Namespace topicNs) { this.topicDTO = topicDTO; @@ -45,6 +47,8 @@ class GenericTopic implements Topic, SupportsTags { MetadataObject topicObject = MetadataObjects.of(topicFullName, MetadataObject.Type.TOPIC); this.objectTagOperations = new MetadataObjectTagOperations(topicNs.level(0), topicObject, restClient); + this.objectRoleOperations = + new MetadataObjectRoleOperations(topicNs.level(0), topicObject, restClient); } @Override @@ -72,6 +76,11 @@ public SupportsTags supportsTags() { return this; } + @Override + public SupportsRoles supportsRoles() { + return this; + } + @Override public String[] listTags() { return objectTagOperations.listTags(); @@ -92,6 +101,11 @@ public String[] associateTags(String[] tagsToAdd, String[] tagsToRemove) { return objectTagOperations.associateTags(tagsToAdd, tagsToRemove); } + @Override + public String[] listBindingRoleNames() { + return objectRoleOperations.listBindingRoleNames(); + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java index 58973b4cf63..8f98b6fd3c8 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java @@ -32,12 +32,14 @@ import org.apache.gravitino.Catalog; import org.apache.gravitino.CatalogChange; import org.apache.gravitino.MetadataObject; +import org.apache.gravitino.MetadataObjects; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.SupportsCatalogs; import org.apache.gravitino.authorization.Group; import org.apache.gravitino.authorization.Owner; import org.apache.gravitino.authorization.Role; import org.apache.gravitino.authorization.SecurableObject; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.authorization.User; import org.apache.gravitino.dto.AuditDTO; import org.apache.gravitino.dto.MetalakeDTO; @@ -93,7 +95,8 @@ * catalogs as sub-level metadata collections. With {@link GravitinoMetalake}, users can list, * create, load, alter and drop a catalog with specified identifier. */ -public class GravitinoMetalake extends MetalakeDTO implements SupportsCatalogs, TagOperations { +public class GravitinoMetalake extends MetalakeDTO + implements SupportsCatalogs, TagOperations, SupportsRoles { private static final String API_METALAKES_CATALOGS_PATH = "api/metalakes/%s/catalogs/%s"; private static final String API_PERMISSION_PATH = "api/metalakes/%s/permissions/%s"; private static final String API_METALAKES_USERS_PATH = "api/metalakes/%s/users/%s"; @@ -105,6 +108,7 @@ public class GravitinoMetalake extends MetalakeDTO implements SupportsCatalogs, private static final String BLANK_PLACEHOLDER = ""; private final RESTClient restClient; + private final MetadataObjectRoleOperations metadataObjectRoleOperations; GravitinoMetalake( String name, @@ -114,6 +118,9 @@ public class GravitinoMetalake extends MetalakeDTO implements SupportsCatalogs, RESTClient restClient) { super(name, comment, properties, auditDTO); this.restClient = restClient; + this.metadataObjectRoleOperations = + new MetadataObjectRoleOperations( + name, MetadataObjects.of(null, name, MetadataObject.Type.METALAKE), restClient); } /** @@ -308,6 +315,11 @@ public void testConnection( ErrorHandlers.catalogErrorHandler().accept(resp); } + @Override + public SupportsRoles supportsRoles() { + return this; + } + /* * List all the tag names under a metalake. * @@ -896,6 +908,11 @@ public void setOwner(MetadataObject object, String ownerName, Owner.Type ownerTy resp.validate(); } + @Override + public String[] listBindingRoleNames() { + return metadataObjectRoleOperations.listBindingRoleNames(); + } + static class Builder extends MetalakeDTO.Builder { private RESTClient restClient; diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/MetadataObjectRoleOperations.java b/clients/client-java/src/main/java/org/apache/gravitino/client/MetadataObjectRoleOperations.java new file mode 100644 index 00000000000..54a6634355f --- /dev/null +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/MetadataObjectRoleOperations.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.client; + +import java.util.Collections; +import java.util.Locale; +import org.apache.gravitino.MetadataObject; +import org.apache.gravitino.authorization.SupportsRoles; +import org.apache.gravitino.dto.responses.NameListResponse; + +class MetadataObjectRoleOperations implements SupportsRoles { + + private final RESTClient restClient; + + private final String roleRequestPath; + + MetadataObjectRoleOperations( + String metalakeName, MetadataObject metadataObject, RESTClient restClient) { + this.restClient = restClient; + this.roleRequestPath = + String.format( + "api/metalakes/%s/objects/%s/%s/roles", + metalakeName, + metadataObject.type().name().toLowerCase(Locale.ROOT), + metadataObject.fullName()); + } + + @Override + public String[] listBindingRoleNames() { + NameListResponse resp = + restClient.get( + roleRequestPath, + NameListResponse.class, + Collections.emptyMap(), + ErrorHandlers.roleErrorHandler()); + resp.validate(); + + return resp.getNames(); + } +} diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/RelationalTable.java b/clients/client-java/src/main/java/org/apache/gravitino/client/RelationalTable.java index af7e094b114..83634295f95 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/RelationalTable.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/RelationalTable.java @@ -32,6 +32,7 @@ import org.apache.gravitino.MetadataObject; import org.apache.gravitino.MetadataObjects; import org.apache.gravitino.Namespace; +import org.apache.gravitino.authorization.SupportsRoles; import org.apache.gravitino.dto.rel.TableDTO; import org.apache.gravitino.dto.rel.partitions.PartitionDTO; import org.apache.gravitino.dto.requests.AddPartitionsRequest; @@ -55,7 +56,7 @@ import org.apache.gravitino.tag.Tag; /** Represents a relational table. */ -class RelationalTable implements Table, SupportsPartitions, SupportsTags { +class RelationalTable implements Table, SupportsPartitions, SupportsTags, SupportsRoles { private static final Joiner DOT_JOINER = Joiner.on("."); @@ -66,6 +67,7 @@ class RelationalTable implements Table, SupportsPartitions, SupportsTags { private final Namespace namespace; private final MetadataObjectTagOperations objectTagOperations; + private final MetadataObjectRoleOperations objectRoleOperations; /** * Creates a new RelationalTable. @@ -94,6 +96,8 @@ private RelationalTable(Namespace namespace, TableDTO tableDTO, RESTClient restC MetadataObjects.parse(tableFullName(namespace, tableDTO.name()), MetadataObject.Type.TABLE); this.objectTagOperations = new MetadataObjectTagOperations(namespace.level(0), tableObject, restClient); + this.objectRoleOperations = + new MetadataObjectRoleOperations(namespace.level(0), tableObject, restClient); } /** @@ -284,6 +288,11 @@ public SupportsTags supportsTags() { return this; } + @Override + public SupportsRoles supportsRoles() { + return this; + } + private static String tableFullName(Namespace tableNS, String tableName) { return DOT_JOINER.join(tableNS.level(1), tableNS.level(2), tableName); } @@ -307,4 +316,9 @@ public Tag getTag(String name) throws NoSuchTagException { public String[] associateTags(String[] tagsToAdd, String[] tagsToRemove) { return objectTagOperations.associateTags(tagsToAdd, tagsToRemove); } + + @Override + public String[] listBindingRoleNames() { + return objectRoleOperations.listBindingRoleNames(); + } } diff --git a/clients/client-java/src/test/java/org/apache/gravitino/client/TestSupportRoles.java b/clients/client-java/src/test/java/org/apache/gravitino/client/TestSupportRoles.java new file mode 100644 index 00000000000..b22d5b21b41 --- /dev/null +++ b/clients/client-java/src/test/java/org/apache/gravitino/client/TestSupportRoles.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.client; + +import static org.apache.hc.core5.http.HttpStatus.SC_INTERNAL_SERVER_ERROR; +import static org.apache.hc.core5.http.HttpStatus.SC_NOT_FOUND; +import static org.apache.hc.core5.http.HttpStatus.SC_OK; + +import com.fasterxml.jackson.core.JsonProcessingException; +import java.util.Collections; +import java.util.Locale; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.MetadataObject; +import org.apache.gravitino.MetadataObjects; +import org.apache.gravitino.Metalake; +import org.apache.gravitino.Namespace; +import org.apache.gravitino.Schema; +import org.apache.gravitino.authorization.SupportsRoles; +import org.apache.gravitino.dto.AuditDTO; +import org.apache.gravitino.dto.SchemaDTO; +import org.apache.gravitino.dto.file.FilesetDTO; +import org.apache.gravitino.dto.messaging.TopicDTO; +import org.apache.gravitino.dto.rel.ColumnDTO; +import org.apache.gravitino.dto.rel.TableDTO; +import org.apache.gravitino.dto.responses.ErrorResponse; +import org.apache.gravitino.dto.responses.NameListResponse; +import org.apache.gravitino.exceptions.NotFoundException; +import org.apache.gravitino.file.Fileset; +import org.apache.gravitino.messaging.Topic; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.rel.types.Types; +import org.apache.hc.core5.http.Method; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class TestSupportRoles extends TestBase { + private static final String METALAKE_NAME = "metalake"; + + private static Catalog relationalCatalog; + + private static Catalog filesetCatalog; + + private static Catalog messagingCatalog; + + private static Schema genericSchema; + + private static Table relationalTable; + + private static Fileset genericFileset; + + private static Topic genericTopic; + private static Metalake metalake; + + @BeforeAll + public static void setUp() throws Exception { + TestBase.setUp(); + metalake = TestGravitinoMetalake.createMetalake(client, METALAKE_NAME); + + relationalCatalog = + new RelationalCatalog( + Namespace.of(METALAKE_NAME), + "catalog1", + Catalog.Type.RELATIONAL, + "test", + "comment", + Collections.emptyMap(), + AuditDTO.builder().build(), + client.restClient()); + + filesetCatalog = + new FilesetCatalog( + Namespace.of(METALAKE_NAME), + "catalog2", + Catalog.Type.FILESET, + "test", + "comment", + Collections.emptyMap(), + AuditDTO.builder().build(), + client.restClient()); + + messagingCatalog = + new MessagingCatalog( + Namespace.of(METALAKE_NAME), + "catalog3", + Catalog.Type.MESSAGING, + "test", + "comment", + Collections.emptyMap(), + AuditDTO.builder().build(), + client.restClient()); + + genericSchema = + new GenericSchema( + SchemaDTO.builder() + .withName("schema1") + .withComment("comment1") + .withProperties(Collections.emptyMap()) + .withAudit(AuditDTO.builder().withCreator("test").build()) + .build(), + client.restClient(), + METALAKE_NAME, + "catalog1"); + + relationalTable = + RelationalTable.from( + Namespace.of(METALAKE_NAME, "catalog1", "schema1"), + TableDTO.builder() + .withName("table1") + .withComment("comment1") + .withColumns( + new ColumnDTO[] { + ColumnDTO.builder() + .withName("col1") + .withDataType(Types.IntegerType.get()) + .build() + }) + .withProperties(Collections.emptyMap()) + .withAudit(AuditDTO.builder().withCreator("test").build()) + .build(), + client.restClient()); + + genericFileset = + new GenericFileset( + FilesetDTO.builder() + .name("fileset1") + .comment("comment1") + .type(Fileset.Type.EXTERNAL) + .storageLocation("s3://bucket/path") + .properties(Collections.emptyMap()) + .audit(AuditDTO.builder().withCreator("test").build()) + .build(), + client.restClient(), + Namespace.of(METALAKE_NAME, "catalog1", "schema1")); + + genericTopic = + new GenericTopic( + TopicDTO.builder() + .withName("topic1") + .withComment("comment1") + .withProperties(Collections.emptyMap()) + .withAudit(AuditDTO.builder().withCreator("test").build()) + .build(), + client.restClient(), + Namespace.of(METALAKE_NAME, "catalog1", "schema1")); + } + + @Test + public void testListRolesForMetalake() throws JsonProcessingException { + testListRoles( + metalake.supportsRoles(), + MetadataObjects.of(null, metalake.name(), MetadataObject.Type.METALAKE)); + } + + @Test + public void testListRolesForCatalog() throws JsonProcessingException { + testListRoles( + relationalCatalog.supportsRoles(), + MetadataObjects.of(null, relationalCatalog.name(), MetadataObject.Type.CATALOG)); + + testListRoles( + filesetCatalog.supportsRoles(), + MetadataObjects.of(null, filesetCatalog.name(), MetadataObject.Type.CATALOG)); + + testListRoles( + messagingCatalog.supportsRoles(), + MetadataObjects.of(null, messagingCatalog.name(), MetadataObject.Type.CATALOG)); + } + + @Test + public void testListRolesForSchema() throws JsonProcessingException { + testListRoles( + genericSchema.supportsRoles(), + MetadataObjects.of("catalog1", genericSchema.name(), MetadataObject.Type.SCHEMA)); + } + + @Test + public void testListRolesForTable() throws JsonProcessingException { + testListRoles( + relationalTable.supportsRoles(), + MetadataObjects.of("catalog1.schema1", relationalTable.name(), MetadataObject.Type.TABLE)); + } + + @Test + public void testListRolesForFileset() throws JsonProcessingException { + testListRoles( + genericFileset.supportsRoles(), + MetadataObjects.of("catalog1.schema1", genericFileset.name(), MetadataObject.Type.FILESET)); + } + + @Test + public void testListRolesForTopic() throws JsonProcessingException { + testListRoles( + genericTopic.supportsRoles(), + MetadataObjects.of("catalog1.schema1", genericTopic.name(), MetadataObject.Type.TOPIC)); + } + + private void testListRoles(SupportsRoles supportsRoles, MetadataObject metadataObject) + throws JsonProcessingException { + String path = + "/api/metalakes/" + + METALAKE_NAME + + "/objects/" + + metadataObject.type().name().toLowerCase(Locale.ROOT) + + "/" + + metadataObject.fullName() + + "/roles"; + + String[] roles = new String[] {"role1", "role2"}; + NameListResponse resp = new NameListResponse(roles); + buildMockResource(Method.GET, path, null, resp, SC_OK); + + String[] actualTags = supportsRoles.listBindingRoleNames(); + Assertions.assertArrayEquals(roles, actualTags); + + // Return empty list + NameListResponse resp1 = new NameListResponse(new String[0]); + buildMockResource(Method.GET, path, null, resp1, SC_OK); + + String[] actualRoles1 = supportsRoles.listBindingRoleNames(); + Assertions.assertArrayEquals(new String[0], actualRoles1); + + // Test throw NotFoundException + ErrorResponse errorResp = + ErrorResponse.notFound(NotFoundException.class.getSimpleName(), "mock error"); + buildMockResource(Method.GET, path, null, errorResp, SC_NOT_FOUND); + + Throwable ex = + Assertions.assertThrows(NotFoundException.class, supportsRoles::listBindingRoleNames); + Assertions.assertTrue(ex.getMessage().contains("mock error")); + + // Test throw internal error + ErrorResponse errorResp1 = ErrorResponse.internalError("mock error"); + buildMockResource(Method.GET, path, null, errorResp1, SC_INTERNAL_SERVER_ERROR); + + Throwable ex1 = + Assertions.assertThrows(RuntimeException.class, supportsRoles::listBindingRoleNames); + Assertions.assertTrue(ex1.getMessage().contains("mock error")); + } +} diff --git a/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java b/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java index 965c31fdf45..d9b85bf0dfc 100644 --- a/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java +++ b/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java @@ -26,7 +26,10 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import org.apache.gravitino.Catalog; import org.apache.gravitino.Configs; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Schema; import org.apache.gravitino.auth.AuthConstants; import org.apache.gravitino.authorization.Group; import org.apache.gravitino.authorization.Privilege; @@ -42,6 +45,7 @@ import org.apache.gravitino.exceptions.NoSuchRoleException; import org.apache.gravitino.exceptions.NoSuchUserException; import org.apache.gravitino.exceptions.UserAlreadyExistsException; +import org.apache.gravitino.file.Fileset; import org.apache.gravitino.integration.test.util.AbstractIT; import org.apache.gravitino.utils.RandomNameUtils; import org.junit.jupiter.api.Assertions; @@ -61,6 +65,15 @@ public static void startIntegrationTest() throws Exception { registerCustomConfigs(configs); AbstractIT.startIntegrationTest(); metalake = client.createMetalake(metalakeName, "metalake comment", Collections.emptyMap()); + + Catalog filesetCatalog = + metalake.createCatalog( + "fileset_catalog", Catalog.Type.FILESET, "hadoop", "comment", Collections.emptyMap()); + NameIdentifier fileIdent = NameIdentifier.of("fileset_schema", "fileset"); + filesetCatalog.asSchemas().createSchema("fileset_schema", "comment", Collections.emptyMap()); + filesetCatalog + .asFilesetCatalog() + .createFileset(fileIdent, "comment", Fileset.Type.EXTERNAL, "tmp", Collections.emptyMap()); } @Test @@ -187,6 +200,46 @@ void testManageRoles() { Assertions.assertEquals( Lists.newArrayList(anotherRoleName, roleName), Arrays.asList(roleNames)); + // List roles by the object (metalake) + roleNames = metalake.listBindingRoleNames(); + Arrays.sort(roleNames); + Assertions.assertEquals( + Lists.newArrayList(anotherRoleName, roleName), Arrays.asList(roleNames)); + + String testObjectRole = "testObjectRole"; + SecurableObject anotherCatalogObject = + SecurableObjects.ofCatalog( + "fileset_catalog", Lists.newArrayList(Privileges.UseCatalog.allow())); + SecurableObject schemaObject = + SecurableObjects.ofSchema( + anotherCatalogObject, + "fileset_schema", + Lists.newArrayList(Privileges.UseSchema.allow())); + SecurableObject filesetObject = + SecurableObjects.ofFileset( + schemaObject, "fileset", Lists.newArrayList(Privileges.ReadFileset.allow())); + + metalake.createRole( + testObjectRole, + properties, + Lists.newArrayList(anotherCatalogObject, schemaObject, filesetObject)); + + // List roles by the object (catalog) + Catalog catalog = metalake.loadCatalog("fileset_catalog"); + roleNames = catalog.supportsRoles().listBindingRoleNames(); + Assertions.assertEquals(Lists.newArrayList(testObjectRole), Arrays.asList(roleNames)); + + // List roles by the object (schema) + Schema schema = catalog.asSchemas().loadSchema("fileset_schema"); + roleNames = schema.supportsRoles().listBindingRoleNames(); + Assertions.assertEquals(Lists.newArrayList(testObjectRole), Arrays.asList(roleNames)); + + // List roles by the object (fileset) + Fileset fileset = + catalog.asFilesetCatalog().loadFileset(NameIdentifier.of("fileset_schema", "fileset")); + roleNames = fileset.supportsRoles().listBindingRoleNames(); + Assertions.assertEquals(Lists.newArrayList(testObjectRole), Arrays.asList(roleNames)); + // Verify the object Assertions.assertEquals(1, role.securableObjects().size()); createdObject = role.securableObjects().get(0); diff --git a/core/src/main/java/org/apache/gravitino/SupportsRelationOperations.java b/core/src/main/java/org/apache/gravitino/SupportsRelationOperations.java index 617f72ab95d..d203b94deb4 100644 --- a/core/src/main/java/org/apache/gravitino/SupportsRelationOperations.java +++ b/core/src/main/java/org/apache/gravitino/SupportsRelationOperations.java @@ -40,7 +40,7 @@ enum Type { } /** - * List the entities according to a give entity in a specific relation. + * List the entities according to a given entity in a specific relation. * * @param relType The type of relation. * @param nameIdentifier The given entity identifier @@ -48,8 +48,27 @@ enum Type { * @return The list of entities * @throws IOException When occurs storage issues, it will throw IOException. */ + default List listEntitiesByRelation( + Type relType, NameIdentifier nameIdentifier, Entity.EntityType identType) throws IOException { + return listEntitiesByRelation(relType, nameIdentifier, identType, true /* allFields*/); + } + + /** + * List the entities according to a given entity in a specific relation. + * + * @param relType The type of relation. + * @param nameIdentifier The given entity identifier + * @param identType The given entity type. + * @param allFields Some fields may have a relatively high acquisition cost, EntityStore provide + * an optional setting to avoid fetching these high-cost fields to improve the performance. If + * true, the method will fetch all the fields, Otherwise, the method will fetch all the fields + * except for high-cost fields. + * @return The list of entities + * @throws IOException When occurs storage issues, it will throw IOException. + */ List listEntitiesByRelation( - Type relType, NameIdentifier nameIdentifier, Entity.EntityType identType) throws IOException; + Type relType, NameIdentifier nameIdentifier, Entity.EntityType identType, boolean allFields) + throws IOException; /** * insert a relation between two entities diff --git a/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java b/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java index 95cb304de26..3214c187fc4 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java +++ b/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java @@ -20,8 +20,10 @@ import java.util.List; import java.util.Map; +import org.apache.gravitino.MetadataObject; import org.apache.gravitino.exceptions.GroupAlreadyExistsException; import org.apache.gravitino.exceptions.NoSuchGroupException; +import org.apache.gravitino.exceptions.NoSuchMetadataObjectException; import org.apache.gravitino.exceptions.NoSuchMetalakeException; import org.apache.gravitino.exceptions.NoSuchRoleException; import org.apache.gravitino.exceptions.NoSuchUserException; @@ -246,4 +248,16 @@ Role createRole( * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. */ String[] listRoleNames(String metalake) throws NoSuchMetalakeException; + + /** + * Lists the role names associated the metadata object. + * + * @param metalake The Metalake of the Role. + * @return The role list. + * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. + * @throws NoSuchMetadataObjectException If the Metadata object with the given name does not + * exist. + */ + String[] listRoleNamesByObject(String metalake, MetadataObject object) + throws NoSuchMetalakeException, NoSuchMetadataObjectException; } diff --git a/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java b/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java index 8872afade70..c2f2976aa3c 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java +++ b/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java @@ -18,14 +18,15 @@ */ package org.apache.gravitino.authorization; -import com.google.common.annotations.VisibleForTesting; import java.util.List; import java.util.Map; import org.apache.gravitino.Config; import org.apache.gravitino.Configs; import org.apache.gravitino.EntityStore; +import org.apache.gravitino.MetadataObject; import org.apache.gravitino.exceptions.GroupAlreadyExistsException; import org.apache.gravitino.exceptions.NoSuchGroupException; +import org.apache.gravitino.exceptions.NoSuchMetadataObjectException; import org.apache.gravitino.exceptions.NoSuchMetalakeException; import org.apache.gravitino.exceptions.NoSuchRoleException; import org.apache.gravitino.exceptions.NoSuchUserException; @@ -148,8 +149,9 @@ public String[] listRoleNames(String metalake) throws NoSuchMetalakeException { return roleManager.listRoleNames(metalake); } - @VisibleForTesting - RoleManager getRoleManager() { - return roleManager; + @Override + public String[] listRoleNamesByObject(String metalake, MetadataObject object) + throws NoSuchMetalakeException, NoSuchMetadataObjectException { + return roleManager.listRoleNamesByObject(metalake, object); } } diff --git a/core/src/main/java/org/apache/gravitino/authorization/FutureGrantManager.java b/core/src/main/java/org/apache/gravitino/authorization/FutureGrantManager.java index c24817ea5eb..b838e195686 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/FutureGrantManager.java +++ b/core/src/main/java/org/apache/gravitino/authorization/FutureGrantManager.java @@ -20,6 +20,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import java.io.IOException; import java.util.List; import java.util.Map; @@ -37,7 +38,6 @@ import org.apache.gravitino.meta.GroupEntity; import org.apache.gravitino.meta.RoleEntity; import org.apache.gravitino.meta.UserEntity; -import org.glassfish.jersey.internal.guava.Sets; /** * FutureGrantManager is responsible for granting privileges to future object. When you grant a diff --git a/core/src/main/java/org/apache/gravitino/authorization/RoleManager.java b/core/src/main/java/org/apache/gravitino/authorization/RoleManager.java index 8b195894f4a..dc675fdcef5 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/RoleManager.java +++ b/core/src/main/java/org/apache/gravitino/authorization/RoleManager.java @@ -27,15 +27,19 @@ import org.apache.gravitino.Entity; import org.apache.gravitino.EntityAlreadyExistsException; import org.apache.gravitino.EntityStore; +import org.apache.gravitino.MetadataObject; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.Namespace; +import org.apache.gravitino.SupportsRelationOperations; import org.apache.gravitino.exceptions.NoSuchEntityException; +import org.apache.gravitino.exceptions.NoSuchMetadataObjectException; import org.apache.gravitino.exceptions.NoSuchMetalakeException; import org.apache.gravitino.exceptions.NoSuchRoleException; import org.apache.gravitino.exceptions.RoleAlreadyExistsException; import org.apache.gravitino.meta.AuditInfo; import org.apache.gravitino.meta.RoleEntity; import org.apache.gravitino.storage.IdGenerator; +import org.apache.gravitino.utils.MetadataObjectUtil; import org.apache.gravitino.utils.PrincipalUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -148,6 +152,35 @@ String[] listRoleNames(String metalake) { } } + String[] listRoleNamesByObject(String metalake, MetadataObject object) { + try { + AuthorizationUtils.checkMetalakeExists(metalake); + + return store.relationOperations() + .listEntitiesByRelation( + SupportsRelationOperations.Type.METADATA_OBJECT_ROLE_REL, + MetadataObjectUtil.toEntityIdent(metalake, object), + MetadataObjectUtil.toEntityType(object), + false /* allFields */) + .stream() + .map(entity -> ((RoleEntity) entity).name()) + .toArray(String[]::new); + + } catch (NoSuchEntityException nse) { + LOG.error("Metadata object {} (type {}) doesn't exist", object.fullName(), object.type()); + throw new NoSuchMetadataObjectException( + "Metadata object %s (type %s) doesn't exist", object.fullName(), object.type()); + } catch (IOException ioe) { + LOG.error( + "Listing roles under metalake {} by object full name {} and type {} failed due to storage issues", + metalake, + object.fullName(), + object.type(), + ioe); + throw new RuntimeException(ioe); + } + } + private RoleEntity getRoleEntity(NameIdentifier identifier) { try { return store.get(identifier, Entity.EntityType.ROLE, RoleEntity.class); diff --git a/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java b/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java index 7882e9c8a5e..65ed2c9da09 100644 --- a/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java +++ b/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.gravitino.Entity; import org.apache.gravitino.GravitinoEnv; +import org.apache.gravitino.MetadataObject; import org.apache.gravitino.authorization.AccessControlDispatcher; import org.apache.gravitino.authorization.AuthorizationUtils; import org.apache.gravitino.authorization.Group; @@ -32,6 +33,7 @@ import org.apache.gravitino.authorization.User; import org.apache.gravitino.exceptions.GroupAlreadyExistsException; import org.apache.gravitino.exceptions.NoSuchGroupException; +import org.apache.gravitino.exceptions.NoSuchMetadataObjectException; import org.apache.gravitino.exceptions.NoSuchMetalakeException; import org.apache.gravitino.exceptions.NoSuchRoleException; import org.apache.gravitino.exceptions.NoSuchUserException; @@ -162,4 +164,10 @@ public boolean deleteRole(String metalake, String role) throws NoSuchMetalakeExc public String[] listRoleNames(String metalake) throws NoSuchMetalakeException { return dispatcher.listRoleNames(metalake); } + + @Override + public String[] listRoleNamesByObject(String metalake, MetadataObject object) + throws NoSuchMetalakeException, NoSuchMetadataObjectException { + return dispatcher.listRoleNamesByObject(metalake, object); + } } diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java b/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java index 2b9a6d0e4ed..42b079234a8 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java @@ -369,9 +369,7 @@ public List associateTagsWithMetadataObject( @Override public List listEntitiesByRelation( - SupportsRelationOperations.Type relType, - NameIdentifier nameIdentifier, - Entity.EntityType identType) { + Type relType, NameIdentifier nameIdentifier, Entity.EntityType identType, boolean allFields) { switch (relType) { case OWNER_REL: List list = Lists.newArrayList(); @@ -382,7 +380,7 @@ public List listEntitiesByRelation( case METADATA_OBJECT_ROLE_REL: return (List) RoleMetaService.getInstance() - .listRolesByMetadataObjectIdentAndType(nameIdentifier, identType); + .listRolesByMetadataObjectIdentAndType(nameIdentifier, identType, allFields); case ROLE_GROUP_REL: if (identType == Entity.EntityType.ROLE) { return (List) GroupMetaService.getInstance().listGroupsByRoleIdent(nameIdentifier); diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/RelationalEntityStore.java b/core/src/main/java/org/apache/gravitino/storage/relational/RelationalEntityStore.java index c95db1a0710..a337e7a785e 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/RelationalEntityStore.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/RelationalEntityStore.java @@ -188,11 +188,9 @@ public List associateTagsWithMetadataObject( @Override public List listEntitiesByRelation( - SupportsRelationOperations.Type relType, - NameIdentifier nameIdentifier, - Entity.EntityType identType) + Type relType, NameIdentifier nameIdentifier, Entity.EntityType identType, boolean allFields) throws IOException { - return backend.listEntitiesByRelation(relType, nameIdentifier, identType); + return backend.listEntitiesByRelation(relType, nameIdentifier, identType, allFields); } @Override diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/service/RoleMetaService.java b/core/src/main/java/org/apache/gravitino/storage/relational/service/RoleMetaService.java index 1e914f59ad4..915a1495022 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/service/RoleMetaService.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/service/RoleMetaService.java @@ -57,21 +57,6 @@ public static RoleMetaService getInstance() { private RoleMetaService() {} - private RolePO getRolePOByMetalakeIdAndName(Long metalakeId, String roleName) { - RolePO rolePO = - SessionUtils.getWithoutCommit( - RoleMetaMapper.class, - mapper -> mapper.selectRoleMetaByMetalakeIdAndName(metalakeId, roleName)); - - if (rolePO == null) { - throw new NoSuchEntityException( - NoSuchEntityException.NO_SUCH_ENTITY_MESSAGE, - Entity.EntityType.ROLE.name().toLowerCase(), - roleName); - } - return rolePO; - } - public Long getRoleIdByMetalakeIdAndName(Long metalakeId, String roleName) { Long roleId = SessionUtils.getWithoutCommit( @@ -93,7 +78,7 @@ public List listRolesByUserId(Long userId) { } public List listRolesByMetadataObjectIdentAndType( - NameIdentifier metadataObjectIdent, Entity.EntityType metadataObjectType) { + NameIdentifier metadataObjectIdent, Entity.EntityType metadataObjectType, boolean allFields) { String metalake = NameIdentifierUtil.getMetalake(metadataObjectIdent); long metalakeId = MetalakeMetaService.getInstance().getMetalakeIdByName(metalake); MetadataObject metadataObject = @@ -109,35 +94,18 @@ public List listRolesByMetadataObjectIdentAndType( metadataObjectId, metadataObject.type().name())); return rolePOs.stream() .map( - po -> - POConverters.fromRolePO( - po, listSecurableObjects(po), AuthorizationUtils.ofRoleNamespace(metalake))) + po -> { + if (allFields) { + return POConverters.fromRolePO( + po, listSecurableObjects(po), AuthorizationUtils.ofRoleNamespace(metalake)); + } else { + return POConverters.fromRolePO( + po, Collections.emptyList(), AuthorizationUtils.ofRoleNamespace(metalake)); + } + }) .collect(Collectors.toList()); } - private List listSecurableObjects(RolePO po) { - List securableObjectPOs = listSecurableObjectsByRoleId(po.getRoleId()); - List securableObjects = Lists.newArrayList(); - - for (SecurableObjectPO securableObjectPO : securableObjectPOs) { - String fullName = - MetadataObjectService.getMetadataObjectFullName( - securableObjectPO.getType(), securableObjectPO.getMetadataObjectId()); - if (fullName != null) { - securableObjects.add( - POConverters.fromSecurableObjectPO( - fullName, securableObjectPO, getType(securableObjectPO.getType()))); - } else { - LOG.info( - "The securable object {} {} may be deleted", - securableObjectPO.getMetadataObjectId(), - securableObjectPO.getType()); - } - } - - return securableObjects; - } - public List listRolesByGroupId(Long groupId) { return SessionUtils.getWithoutCommit( RoleMetaMapper.class, mapper -> mapper.listRolesByGroupId(groupId)); @@ -234,7 +202,7 @@ public boolean deleteRole(NameIdentifier identifier) { return true; } - private List listSecurableObjectsByRoleId(Long roleId) { + private static List listSecurableObjectsByRoleId(Long roleId) { return SessionUtils.getWithoutCommit( SecurableObjectMapper.class, mapper -> mapper.listSecurableObjectsByRoleId(roleId)); } @@ -291,11 +259,49 @@ public int deleteRoleMetasByLegacyTimeline(long legacyTimeline, int limit) { + securableObjectsCount[0]; } - private MetadataObject.Type getType(String type) { + private static List listSecurableObjects(RolePO po) { + List securableObjectPOs = listSecurableObjectsByRoleId(po.getRoleId()); + List securableObjects = Lists.newArrayList(); + + for (SecurableObjectPO securableObjectPO : securableObjectPOs) { + String fullName = + MetadataObjectService.getMetadataObjectFullName( + securableObjectPO.getType(), securableObjectPO.getMetadataObjectId()); + if (fullName != null) { + securableObjects.add( + POConverters.fromSecurableObjectPO( + fullName, securableObjectPO, getType(securableObjectPO.getType()))); + } else { + LOG.warn( + "The securable object {} {} may be deleted", + securableObjectPO.getMetadataObjectId(), + securableObjectPO.getType()); + } + } + + return securableObjects; + } + + private static RolePO getRolePOByMetalakeIdAndName(Long metalakeId, String roleName) { + RolePO rolePO = + SessionUtils.getWithoutCommit( + RoleMetaMapper.class, + mapper -> mapper.selectRoleMetaByMetalakeIdAndName(metalakeId, roleName)); + + if (rolePO == null) { + throw new NoSuchEntityException( + NoSuchEntityException.NO_SUCH_ENTITY_MESSAGE, + Entity.EntityType.ROLE.name().toLowerCase(), + roleName); + } + return rolePO; + } + + private static MetadataObject.Type getType(String type) { return MetadataObject.Type.valueOf(type); } - private String getEntityType(SecurableObject securableObject) { + private static String getEntityType(SecurableObject securableObject) { return securableObject.type().name(); } } diff --git a/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java b/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java index 6dfaf54fecf..b299c15ef97 100644 --- a/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java +++ b/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java @@ -115,6 +115,7 @@ public class TestAccessControlManager { public static void setUp() throws Exception { File dbDir = new File(DB_DIR); dbDir.mkdirs(); + Mockito.when(config.get(SERVICE_ADMINS)).thenReturn(Lists.newArrayList("admin1", "admin2")); Mockito.when(config.get(ENTITY_STORE)).thenReturn(RELATIONAL_ENTITY_STORE); Mockito.when(config.get(ENTITY_RELATIONAL_STORE)).thenReturn(DEFAULT_ENTITY_RELATIONAL_STORE); @@ -125,10 +126,12 @@ public static void setUp() throws Exception { Mockito.when(config.get(STORE_DELETE_AFTER_TIME)).thenReturn(20 * 60 * 1000L); Mockito.when(config.get(VERSION_RETENTION_COUNT)).thenReturn(1L); Mockito.when(config.get(CATALOG_CACHE_EVICTION_INTERVAL_MS)).thenReturn(1000L); + Mockito.doReturn(100000L).when(config).get(TREE_LOCK_MAX_NODE_IN_MEMORY); Mockito.doReturn(1000L).when(config).get(TREE_LOCK_MIN_NODE_IN_MEMORY); Mockito.doReturn(36000L).when(config).get(TREE_LOCK_CLEAN_INTERVAL); FieldUtils.writeField(GravitinoEnv.getInstance(), "lockManager", new LockManager(config), true); + entityStore = EntityStoreFactory.createEntityStore(config); entityStore.initialize(config); @@ -146,6 +149,7 @@ public static void setUp() throws Exception { AuditInfo.builder().withCreator("test").withCreateTime(Instant.now()).build()) .build(); entityStore.put(catalogEntity, true); + CatalogEntity anotherCatalogEntity = CatalogEntity.builder() .withId(4L) @@ -421,6 +425,31 @@ public void testListRoles() { String[] actualRoles = accessControlManager.listRoleNames("metalake_list"); Arrays.sort(actualRoles); Assertions.assertArrayEquals(new String[] {"testList1", "testList2"}, actualRoles); + + accessControlManager.deleteRole("metalake_list", "testList1"); + accessControlManager.deleteRole("metalake_list", "testList2"); + } + + @Test + public void testListRolesByObject() { + Map props = ImmutableMap.of("k1", "v1"); + SecurableObject catalogObject = + SecurableObjects.ofCatalog("catalog", Lists.newArrayList(Privileges.UseCatalog.allow())); + + accessControlManager.createRole( + "metalake_list", "testList1", props, Lists.newArrayList(catalogObject)); + + accessControlManager.createRole( + "metalake_list", "testList2", props, Lists.newArrayList(catalogObject)); + + // Test to list roles + String[] listedRoles = + accessControlManager.listRoleNamesByObject("metalake_list", catalogObject); + Arrays.sort(listedRoles); + Assertions.assertArrayEquals(new String[] {"testList1", "testList2"}, listedRoles); + + accessControlManager.deleteRole("metalake_list", "testList1"); + accessControlManager.deleteRole("metalake_list", "testList2"); } private void testProperties(Map expectedProps, Map testProps) { diff --git a/core/src/test/java/org/apache/gravitino/storage/relational/service/TestRoleMetaService.java b/core/src/test/java/org/apache/gravitino/storage/relational/service/TestRoleMetaService.java index 4a781f01861..1f818b11253 100644 --- a/core/src/test/java/org/apache/gravitino/storage/relational/service/TestRoleMetaService.java +++ b/core/src/test/java/org/apache/gravitino/storage/relational/service/TestRoleMetaService.java @@ -441,7 +441,7 @@ void listRolesBySecurableObject() throws IOException { List roleEntities = roleMetaService.listRolesByMetadataObjectIdentAndType( - catalog.nameIdentifier(), catalog.type()); + catalog.nameIdentifier(), catalog.type(), true); roleEntities.sort(Comparator.comparing(RoleEntity::name)); Assertions.assertEquals(Lists.newArrayList(role1, role2), roleEntities); } diff --git a/server/src/main/java/org/apache/gravitino/server/web/rest/MetadataObjectRoleOperations.java b/server/src/main/java/org/apache/gravitino/server/web/rest/MetadataObjectRoleOperations.java new file mode 100644 index 00000000000..ad27b22a3eb --- /dev/null +++ b/server/src/main/java/org/apache/gravitino/server/web/rest/MetadataObjectRoleOperations.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.server.web.rest; + +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; +import java.util.Locale; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.Response; +import org.apache.gravitino.GravitinoEnv; +import org.apache.gravitino.MetadataObject; +import org.apache.gravitino.MetadataObjects; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.authorization.AccessControlDispatcher; +import org.apache.gravitino.dto.responses.NameListResponse; +import org.apache.gravitino.lock.LockType; +import org.apache.gravitino.lock.TreeLockUtils; +import org.apache.gravitino.metrics.MetricNames; +import org.apache.gravitino.server.authorization.NameBindings; +import org.apache.gravitino.server.web.Utils; +import org.apache.gravitino.utils.MetadataObjectUtil; + +@NameBindings.AccessControlInterfaces +@Path("/metalakes/{metalake}/objects/{type}/{fullName}/roles") +public class MetadataObjectRoleOperations { + + private final AccessControlDispatcher accessControlDispatcher; + + @Context private HttpServletRequest httpRequest; + + public MetadataObjectRoleOperations() { + // Because accessControlManager may be null when Gravitino doesn't enable authorization, + // and Jersey injection doesn't support null value. So MedataObjectRoleOperations chooses to + // retrieve + // accessControlDispatcher from GravitinoEnv instead of injection here. + this.accessControlDispatcher = GravitinoEnv.getInstance().accessControlDispatcher(); + } + + @GET + @Produces("application/vnd.gravitino.v1+json") + @Timed(name = "list-role-by-object." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "list-role-by-object", absolute = true) + public Response listRoles( + @PathParam("metalake") String metalake, + @PathParam("type") String type, + @PathParam("fullName") String fullName) { + try { + MetadataObject object = + MetadataObjects.parse( + fullName, MetadataObject.Type.valueOf(type.toUpperCase(Locale.ROOT))); + + NameIdentifier identifier = MetadataObjectUtil.toEntityIdent(metalake, object); + return Utils.doAs( + httpRequest, + () -> + TreeLockUtils.doWithTreeLock( + identifier, + LockType.READ, + () -> { + String[] names = + accessControlDispatcher.listRoleNamesByObject(metalake, object); + return Utils.ok(new NameListResponse(names)); + })); + } catch (Exception e) { + return ExceptionHandlers.handleRoleException(OperationType.LIST, "", metalake, e); + } + } +} diff --git a/server/src/test/java/org/apache/gravitino/server/web/rest/TestMetadataObjectRoleOperations.java b/server/src/test/java/org/apache/gravitino/server/web/rest/TestMetadataObjectRoleOperations.java new file mode 100644 index 00000000000..19c545a08af --- /dev/null +++ b/server/src/test/java/org/apache/gravitino/server/web/rest/TestMetadataObjectRoleOperations.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.server.web.rest; + +import static org.apache.gravitino.Configs.TREE_LOCK_CLEAN_INTERVAL; +import static org.apache.gravitino.Configs.TREE_LOCK_MAX_NODE_IN_MEMORY; +import static org.apache.gravitino.Configs.TREE_LOCK_MIN_NODE_IN_MEMORY; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.core.Application; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.commons.lang3.reflect.FieldUtils; +import org.apache.gravitino.Config; +import org.apache.gravitino.GravitinoEnv; +import org.apache.gravitino.authorization.AccessControlManager; +import org.apache.gravitino.dto.responses.ErrorConstants; +import org.apache.gravitino.dto.responses.ErrorResponse; +import org.apache.gravitino.dto.responses.NameListResponse; +import org.apache.gravitino.exceptions.NoSuchMetalakeException; +import org.apache.gravitino.lock.LockManager; +import org.apache.gravitino.rest.RESTUtils; +import org.glassfish.hk2.utilities.binding.AbstractBinder; +import org.glassfish.jersey.server.ResourceConfig; +import org.glassfish.jersey.test.JerseyTest; +import org.glassfish.jersey.test.TestProperties; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +public class TestMetadataObjectRoleOperations extends JerseyTest { + + private static final AccessControlManager manager = mock(AccessControlManager.class); + + private static class MockServletRequestFactory extends ServletRequestFactoryBase { + @Override + public HttpServletRequest get() { + HttpServletRequest request = mock(HttpServletRequest.class); + when(request.getRemoteUser()).thenReturn(null); + return request; + } + } + + @BeforeAll + public static void setup() throws IllegalAccessException { + Config config = mock(Config.class); + Mockito.doReturn(100000L).when(config).get(TREE_LOCK_MAX_NODE_IN_MEMORY); + Mockito.doReturn(1000L).when(config).get(TREE_LOCK_MIN_NODE_IN_MEMORY); + Mockito.doReturn(36000L).when(config).get(TREE_LOCK_CLEAN_INTERVAL); + FieldUtils.writeField(GravitinoEnv.getInstance(), "lockManager", new LockManager(config), true); + FieldUtils.writeField(GravitinoEnv.getInstance(), "accessControlDispatcher", manager, true); + } + + @Override + protected Application configure() { + try { + forceSet( + TestProperties.CONTAINER_PORT, String.valueOf(RESTUtils.findAvailablePort(2000, 3000))); + } catch (IOException e) { + throw new RuntimeException(e); + } + + ResourceConfig resourceConfig = new ResourceConfig(); + resourceConfig.register(MetadataObjectRoleOperations.class); + resourceConfig.register( + new AbstractBinder() { + @Override + protected void configure() { + bindFactory(MockServletRequestFactory.class).to(HttpServletRequest.class); + } + }); + + return resourceConfig; + } + + @Test + public void testListRoleNames() { + when(manager.listRoleNamesByObject(any(), any())).thenReturn(new String[] {"role"}); + + Response resp = + target("/metalakes/metalake1/objects/metalake/metalake1/roles/") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); + + NameListResponse listResponse = resp.readEntity(NameListResponse.class); + Assertions.assertEquals(0, listResponse.getCode()); + + Assertions.assertEquals(1, listResponse.getNames().length); + Assertions.assertEquals("role", listResponse.getNames()[0]); + + // Test to throw NoSuchMetalakeException + doThrow(new NoSuchMetalakeException("mock error")) + .when(manager) + .listRoleNamesByObject(any(), any()); + Response resp1 = + target("/metalakes/metalake1/objects/metalake/metalake1/roles/") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + + Assertions.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), resp1.getStatus()); + + ErrorResponse errorResponse = resp1.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.NOT_FOUND_CODE, errorResponse.getCode()); + Assertions.assertEquals(NoSuchMetalakeException.class.getSimpleName(), errorResponse.getType()); + + // Test to throw internal RuntimeException + doThrow(new RuntimeException("mock error")).when(manager).listRoleNamesByObject(any(), any()); + Response resp3 = + target("/metalakes/metalake1/objects/metalake/metalake1/roles") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + + Assertions.assertEquals( + Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp3.getStatus()); + + ErrorResponse errorResponse2 = resp3.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.INTERNAL_ERROR_CODE, errorResponse2.getCode()); + Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse2.getType()); + } +} diff --git a/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java b/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java index eb365d1ac69..a2f0c4847d6 100644 --- a/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java +++ b/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java @@ -334,23 +334,6 @@ public void testGetRole() { Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse2.getType()); } - private Role buildRole(String role) { - SecurableObject catalog = - SecurableObjects.ofCatalog("catalog", Lists.newArrayList(Privileges.UseCatalog.allow())); - SecurableObject anotherSecurableObject = - SecurableObjects.ofCatalog( - "another_catalog", Lists.newArrayList(Privileges.CreateSchema.deny())); - - return RoleEntity.builder() - .withId(1L) - .withName(role) - .withProperties(Collections.emptyMap()) - .withSecurableObjects(Lists.newArrayList(catalog, anotherSecurableObject)) - .withAuditInfo( - AuditInfo.builder().withCreator("creator").withCreateTime(Instant.now()).build()) - .build(); - } - @Test public void testDeleteRole() { when(manager.deleteRole(any(), any())).thenReturn(true); @@ -502,4 +485,21 @@ public void testListRoleNames() { Assertions.assertEquals(ErrorConstants.INTERNAL_ERROR_CODE, errorResponse2.getCode()); Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse2.getType()); } + + private Role buildRole(String role) { + SecurableObject catalog = + SecurableObjects.ofCatalog("catalog", Lists.newArrayList(Privileges.UseCatalog.allow())); + SecurableObject anotherSecurableObject = + SecurableObjects.ofCatalog( + "another_catalog", Lists.newArrayList(Privileges.CreateSchema.deny())); + + return RoleEntity.builder() + .withId(1L) + .withName(role) + .withProperties(Collections.emptyMap()) + .withSecurableObjects(Lists.newArrayList(catalog, anotherSecurableObject)) + .withAuditInfo( + AuditInfo.builder().withCreator("creator").withCreateTime(Instant.now()).build()) + .build(); + } } From 3c20d95065ea5d0139f6413b457221673e1d55ce Mon Sep 17 00:00:00 2001 From: Qiang-Liu Date: Fri, 27 Sep 2024 15:49:28 +0800 Subject: [PATCH 02/15] [#4545] improvement(paimon-catalog): reduce catalog-lakehouse-paimon libs size from 222MB to 75MB (#4547) ### What changes were proposed in this pull request? remove some unnecessary dependencies ### Why are the changes needed? reduce catalog-lakehouse-paimon libs Fix: #4545 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed --------- Co-authored-by: fanng --- .../catalog-lakehouse-paimon/build.gradle.kts | 62 ++++++++++++++++--- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 3974fba61f3..f30fd888005 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -30,40 +30,80 @@ val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val paimonVersion: String = libs.versions.paimon.get() dependencies { - implementation(project(":api")) - implementation(project(":common")) - implementation(project(":core")) + implementation(project(":api")) { + exclude("*") + } + implementation(project(":common")) { + exclude("*") + } + implementation(project(":core")) { + exclude("*") + } implementation(libs.bundles.paimon) { exclude("com.sun.jersey") exclude("javax.servlet") + exclude("org.apache.curator") + exclude("org.apache.hive") + exclude("org.apache.hbase") + exclude("org.apache.zookeeper") + exclude("org.eclipse.jetty.aggregate:jetty-all") + exclude("org.mortbay.jetty") + exclude("org.mortbay.jetty:jetty") + exclude("org.mortbay.jetty:jetty-util") + exclude("org.mortbay.jetty:jetty-sslengine") + exclude("it.unimi.dsi") + exclude("com.ververica") + exclude("org.apache.hadoop") + exclude("org.apache.commons") + exclude("org.xerial.snappy") + exclude("com.github.luben") + exclude("com.google.protobuf") + exclude("joda-time") + exclude("org.apache.parquet:parquet-jackson") + exclude("org.apache.parquet:parquet-format-structures") + exclude("org.apache.parquet:parquet-encoding") + exclude("org.apache.parquet:parquet-common") + exclude("org.apache.parquet:parquet-hadoop") + exclude("org.apache.paimon:paimon-codegen-loader") + exclude("org.apache.paimon:paimon-shade-caffeine-2") + exclude("org.apache.paimon:paimon-shade-guava-30") } implementation(libs.bundles.log4j) implementation(libs.commons.lang3) - implementation(libs.caffeine) implementation(libs.guava) implementation(libs.hadoop2.common) { exclude("com.github.spotbugs") exclude("com.sun.jersey") exclude("javax.servlet") + exclude("org.apache.curator") + exclude("org.apache.zookeeper") + exclude("org.mortbay.jetty") } implementation(libs.hadoop2.hdfs) { + exclude("*") + } + implementation(libs.hadoop2.hdfs.client) { exclude("com.sun.jersey") exclude("javax.servlet") + exclude("org.fusesource.leveldbjni") + exclude("org.mortbay.jetty") } implementation(libs.hadoop2.mapreduce.client.core) { - exclude("com.sun.jersey") - exclude("javax.servlet") + exclude("*") } - annotationProcessor(libs.lombok) compileOnly(libs.lombok) testImplementation(project(":clients:client-java")) testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(project(":server")) - testImplementation(project(":server-common")) + testImplementation(project(":server-common")) { + exclude("org.mortbay.jetty") + exclude("com.sun.jersey.contribs") + } testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { exclude("org.apache.hadoop") + exclude("org.rocksdb") } testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { exclude("org.apache.avro") @@ -94,7 +134,11 @@ tasks { val copyCatalogLibs by registering(Copy::class) { dependsOn("jar", "runtimeJars") - from("build/libs") + from("build/libs") { + exclude("guava-*.jar") + exclude("log4j-*.jar") + exclude("slf4j-*.jar") + } into("$rootDir/distribution/package/catalogs/lakehouse-paimon/libs") } From 78b5116cc12fc22f6aa19b0c31af68833a91a2ca Mon Sep 17 00:00:00 2001 From: yangyuxia <13853186257@139.com> Date: Fri, 27 Sep 2024 17:20:51 +0800 Subject: [PATCH 03/15] fix(authorization-ranger):the descr field of ranger table x_group is not null (#4975) ### What changes were proposed in this pull request? When calling the ranger CREATE_GROUP api, the desc field cannot be null ### Why are the changes needed? When calling the ranger CREATE_GROUP api, ranger response is error. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? yes, by hand. --------- Co-authored-by: yangyx <360508847@qq.com> --- .../authorization/ranger/RangerAuthorizationPlugin.java | 3 ++- .../gravitino/authorization/ranger/reference/VXGroup.java | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/RangerAuthorizationPlugin.java b/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/RangerAuthorizationPlugin.java index 3b503a3f91c..021fca2a996 100644 --- a/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/RangerAuthorizationPlugin.java +++ b/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/RangerAuthorizationPlugin.java @@ -383,7 +383,8 @@ public Boolean onUserAcquired(User user) throws RuntimeException { @Override public Boolean onGroupAdded(Group group) throws RuntimeException { - return rangerClient.createGroup(VXGroup.builder().withName(group.name()).build()); + return rangerClient.createGroup( + VXGroup.builder().withName(group.name()).withDescription(group.name()).build()); } @Override diff --git a/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/reference/VXGroup.java b/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/reference/VXGroup.java index f409cdeb045..3a58f5c95a0 100644 --- a/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/reference/VXGroup.java +++ b/authorizations/authorization-ranger/src/main/java/org/apache/gravitino/authorization/ranger/reference/VXGroup.java @@ -91,6 +91,11 @@ public Builder withName(String name) { return this; } + public Builder withDescription(String description) { + vxGroup.description = description; + return this; + } + public VXGroup build() { return vxGroup; } From 517f66c775ff20f052a32bbd91933c7e2091323d Mon Sep 17 00:00:00 2001 From: Qiang-Liu Date: Fri, 27 Sep 2024 19:09:43 +0800 Subject: [PATCH 04/15] [#4873] feat(core): support list group (#4879) ### What changes were proposed in this pull request? support list group ### Why are the changes needed? support list group Fix: #4873 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? UT --------- Co-authored-by: Rory --- .../gravitino/client/GravitinoClient.java | 20 +++ .../gravitino/client/GravitinoMetalake.java | 39 +++++ .../gravitino/client/TestUserGroup.java | 52 +++++++ .../test/authorization/AccessControlIT.java | 30 ++++ .../dto/responses/GroupListResponse.java | 74 ++++++++++ .../gravitino/dto/util/DTOConverters.java | 13 ++ .../AccessControlDispatcher.java | 18 +++ .../authorization/AccessControlManager.java | 10 ++ .../authorization/UserGroupManager.java | 61 +++++--- .../hook/AccessControlHookDispatcher.java | 10 ++ .../storage/relational/JDBCBackend.java | 2 + .../relational/mapper/GroupMetaMapper.java | 9 ++ .../mapper/GroupMetaSQLProviderFactory.java | 11 +- .../base/GroupMetaBaseSQLProvider.java | 36 +++++ .../provider/h2/GroupMetaH2Provider.java | 52 +++++++ .../GroupMetaPostgreSQLProvider.java | 26 ++++ .../relational/po/ExtendedGroupPO.java | 59 ++++++++ .../relational/service/GroupMetaService.java | 32 +++++ .../relational/utils/POConverters.java | 54 ++++++- .../TestAccessControlManager.java | 22 +++ .../service/TestGroupMetaService.java | 84 ++++++++++- .../server/web/rest/GroupOperations.java | 32 +++++ .../server/web/rest/TestGroupOperations.java | 134 +++++++++++++++--- 23 files changed, 837 insertions(+), 43 deletions(-) create mode 100644 common/src/main/java/org/apache/gravitino/dto/responses/GroupListResponse.java create mode 100644 core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/GroupMetaH2Provider.java create mode 100644 core/src/main/java/org/apache/gravitino/storage/relational/po/ExtendedGroupPO.java diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java index a7656fd02de..a8a46ff8f47 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java @@ -227,6 +227,26 @@ public Group getGroup(String group) throws NoSuchGroupException, NoSuchMetalakeE return getMetalake().getGroup(group); } + /** + * List the groups. + * + * @return The Group list + * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. + */ + public Group[] listGroups() throws NoSuchMetalakeException { + return getMetalake().listGroups(); + } + + /** + * List the group names. + * + * @return The group names list. + * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. + */ + public String[] listGroupNames() throws NoSuchMetalakeException { + return getMetalake().listGroupNames(); + } + /** * Gets a Role. * diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java index 8f98b6fd3c8..4905681b7e5 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoMetalake.java @@ -62,6 +62,7 @@ import org.apache.gravitino.dto.responses.DropResponse; import org.apache.gravitino.dto.responses.EntityListResponse; import org.apache.gravitino.dto.responses.ErrorResponse; +import org.apache.gravitino.dto.responses.GroupListResponse; import org.apache.gravitino.dto.responses.GroupResponse; import org.apache.gravitino.dto.responses.NameListResponse; import org.apache.gravitino.dto.responses.OwnerResponse; @@ -635,6 +636,44 @@ public Group getGroup(String group) throws NoSuchGroupException, NoSuchMetalakeE return resp.getGroup(); } + /** + * Lists the groups + * + * @return The Group list + * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. + */ + public Group[] listGroups() throws NoSuchMetalakeException { + Map params = new HashMap<>(); + params.put("details", "true"); + + GroupListResponse resp = + restClient.get( + String.format(API_METALAKES_GROUPS_PATH, name(), BLANK_PLACEHOLDER), + params, + GroupListResponse.class, + Collections.emptyMap(), + ErrorHandlers.groupErrorHandler()); + resp.validate(); + return resp.getGroups(); + } + + /** + * Lists the group names + * + * @return The Group Name List + * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. + */ + public String[] listGroupNames() throws NoSuchMetalakeException { + NameListResponse resp = + restClient.get( + String.format(API_METALAKES_GROUPS_PATH, name(), BLANK_PLACEHOLDER), + NameListResponse.class, + Collections.emptyMap(), + ErrorHandlers.groupErrorHandler()); + resp.validate(); + return resp.getNames(); + } + /** * Gets a Role. * diff --git a/clients/client-java/src/test/java/org/apache/gravitino/client/TestUserGroup.java b/clients/client-java/src/test/java/org/apache/gravitino/client/TestUserGroup.java index 67a3035ed8b..ff98b2ca6c7 100644 --- a/clients/client-java/src/test/java/org/apache/gravitino/client/TestUserGroup.java +++ b/clients/client-java/src/test/java/org/apache/gravitino/client/TestUserGroup.java @@ -23,8 +23,10 @@ import static javax.servlet.http.HttpServletResponse.SC_OK; import static org.apache.hc.core5.http.HttpStatus.SC_SERVER_ERROR; +import com.fasterxml.jackson.core.JsonProcessingException; import java.time.Instant; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import org.apache.gravitino.authorization.Group; import org.apache.gravitino.authorization.User; @@ -35,6 +37,7 @@ import org.apache.gravitino.dto.requests.GroupAddRequest; import org.apache.gravitino.dto.requests.UserAddRequest; import org.apache.gravitino.dto.responses.ErrorResponse; +import org.apache.gravitino.dto.responses.GroupListResponse; import org.apache.gravitino.dto.responses.GroupResponse; import org.apache.gravitino.dto.responses.MetalakeResponse; import org.apache.gravitino.dto.responses.NameListResponse; @@ -327,6 +330,55 @@ public void testRemoveGroups() throws Exception { Assertions.assertThrows(RuntimeException.class, () -> gravitinoClient.removeGroup(groupName)); } + @Test + public void testListGroupNames() throws JsonProcessingException { + String groupPath = withSlash(String.format(API_METALAKES_GROUPS_PATH, metalakeName, "")); + NameListResponse listResponse = new NameListResponse(new String[] {"group1", "group2"}); + buildMockResource(Method.GET, groupPath, null, listResponse, SC_OK); + Assertions.assertArrayEquals( + new String[] {"group1", "group2"}, gravitinoClient.listGroupNames()); + ErrorResponse errRespNoMetaLake = + ErrorResponse.notFound(NoSuchMetalakeException.class.getSimpleName(), "metalake not found"); + buildMockResource(Method.GET, groupPath, null, errRespNoMetaLake, SC_NOT_FOUND); + Exception ex = + Assertions.assertThrows( + NoSuchMetalakeException.class, () -> gravitinoClient.listGroupNames()); + Assertions.assertEquals("metalake not found", ex.getMessage()); + + // Test RuntimeException + ErrorResponse errResp = ErrorResponse.internalError("internal error"); + buildMockResource(Method.GET, groupPath, null, errResp, SC_SERVER_ERROR); + + Assertions.assertThrows(RuntimeException.class, () -> gravitinoClient.listGroupNames()); + } + + @Test + public void testListGroups() throws JsonProcessingException { + String groupPath = withSlash(String.format(API_METALAKES_GROUPS_PATH, metalakeName, "")); + GroupDTO group1 = mockGroupDTO("group1"); + GroupDTO group2 = mockGroupDTO("group2"); + GroupDTO group3 = mockGroupDTO("group3"); + Map params = new HashMap<>(); + GroupListResponse listResponse = new GroupListResponse(new GroupDTO[] {group1, group2, group3}); + buildMockResource(Method.GET, groupPath, params, null, listResponse, SC_OK); + + Group[] groups = gravitinoClient.listGroups(); + Assertions.assertEquals(3, groups.length); + assertGroup(group1, groups[0]); + assertGroup(group2, groups[1]); + assertGroup(group3, groups[2]); + ErrorResponse errResNoMetaLake = + ErrorResponse.notFound(NoSuchMetalakeException.class.getSimpleName(), "metalake not found"); + buildMockResource(Method.GET, groupPath, params, null, errResNoMetaLake, SC_NOT_FOUND); + Exception ex = + Assertions.assertThrows(NoSuchMetalakeException.class, () -> gravitinoClient.listGroups()); + Assertions.assertEquals("metalake not found", ex.getMessage()); + // Test RuntimeException + ErrorResponse errResp = ErrorResponse.internalError("internal error"); + buildMockResource(Method.GET, groupPath, params, null, errResp, SC_SERVER_ERROR); + Assertions.assertThrows(RuntimeException.class, () -> gravitinoClient.listGroups()); + } + private UserDTO mockUserDTO(String name) { return UserDTO.builder() .withName(name) diff --git a/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java b/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java index d9b85bf0dfc..76cd938faae 100644 --- a/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java +++ b/clients/client-java/src/test/java/org/apache/gravitino/client/integration/test/authorization/AccessControlIT.java @@ -146,8 +146,38 @@ void testManageGroups() { // Get a not-existed group Assertions.assertThrows(NoSuchGroupException.class, () -> metalake.getGroup("not-existed")); + Map properties = Maps.newHashMap(); + properties.put("k1", "v1"); + SecurableObject metalakeObject = + SecurableObjects.ofMetalake( + metalakeName, Lists.newArrayList(Privileges.CreateCatalog.allow())); + + // Test the group with the role + metalake.createRole("role2", properties, Lists.newArrayList(metalakeObject)); + metalake.grantRolesToGroup(Lists.newArrayList("role2"), groupName); + + // List groups + String anotherGroup = "group2#456"; + metalake.addGroup(anotherGroup); + String[] groupNames = metalake.listGroupNames(); + Arrays.sort(groupNames); + Assertions.assertEquals(Lists.newArrayList(groupName, anotherGroup), Arrays.asList(groupNames)); + + List groups = + Arrays.stream(metalake.listGroups()) + .sorted(Comparator.comparing(Group::name)) + .collect(Collectors.toList()); + Assertions.assertEquals( + Lists.newArrayList(groupName, anotherGroup), + groups.stream().map(Group::name).collect(Collectors.toList())); + Assertions.assertEquals(Lists.newArrayList("role2"), groups.get(0).roles()); + Assertions.assertTrue(metalake.removeGroup(groupName)); Assertions.assertFalse(metalake.removeGroup(groupName)); + + // clean up + metalake.removeGroup(anotherGroup); + metalake.deleteRole("role2"); } @Test diff --git a/common/src/main/java/org/apache/gravitino/dto/responses/GroupListResponse.java b/common/src/main/java/org/apache/gravitino/dto/responses/GroupListResponse.java new file mode 100644 index 00000000000..271fb9a92ba --- /dev/null +++ b/common/src/main/java/org/apache/gravitino/dto/responses/GroupListResponse.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.dto.responses; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import java.util.Arrays; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.dto.authorization.GroupDTO; + +/** Represents a response for a list of groups. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = true) +public class GroupListResponse extends BaseResponse { + + @JsonProperty("groups") + private final GroupDTO[] groups; + + /** + * Constructor for GroupListResponse. + * + * @param groups The array of group DTOs. + */ + public GroupListResponse(GroupDTO[] groups) { + super(0); + this.groups = groups; + } + + /** Default constructor for GroupListResponse. (Used for Jackson deserialization.) */ + public GroupListResponse() { + super(); + this.groups = null; + } + + /** + * Validates the response data. + * + * @throws IllegalArgumentException if the name or audit is not set. + */ + @Override + public void validate() throws IllegalArgumentException { + super.validate(); + + Preconditions.checkArgument(groups != null, "groups must not be null"); + Arrays.stream(groups) + .forEach( + group -> { + Preconditions.checkArgument( + StringUtils.isNotBlank(group.name()), "group 'name' must not be blank"); + Preconditions.checkArgument( + group.auditInfo() != null, "group 'auditInfo' must not be null"); + }); + } +} diff --git a/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java b/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java index 8e706c139e9..38224493b71 100644 --- a/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java +++ b/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java @@ -691,6 +691,19 @@ public static UserDTO[] toDTOs(User[] users) { return Arrays.stream(users).map(DTOConverters::toDTO).toArray(UserDTO[]::new); } + /** + * Converts an array of Groups to an array of GroupDTOs. + * + * @param groups The groups to be converted. + * @return The array of GroupDTOs. + */ + public static GroupDTO[] toDTOs(Group[] groups) { + if (ArrayUtils.isEmpty(groups)) { + return new GroupDTO[0]; + } + return Arrays.stream(groups).map(DTOConverters::toDTO).toArray(GroupDTO[]::new); + } + /** * Converts a DistributionDTO to a Distribution. * diff --git a/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java b/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java index 3214c187fc4..a3919512b87 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java +++ b/core/src/main/java/org/apache/gravitino/authorization/AccessControlDispatcher.java @@ -129,6 +129,24 @@ Group addGroup(String metalake, String group) Group getGroup(String metalake, String group) throws NoSuchGroupException, NoSuchMetalakeException; + /** + * List groups + * + * @param metalake The Metalake of the Group. + * @return The list of groups + * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. + */ + Group[] listGroups(String metalake); + + /** + * List group names + * + * @param metalake The Metalake of the Group. + * @return The list of group names + * @throws NoSuchMetalakeException If the Metalake with the given name does not exist. + */ + String[] listGroupNames(String metalake); + /** * Grant roles to a user. * diff --git a/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java b/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java index c2f2976aa3c..75b0f9f1e53 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java +++ b/core/src/main/java/org/apache/gravitino/authorization/AccessControlManager.java @@ -95,6 +95,16 @@ public Group getGroup(String metalake, String group) return userGroupManager.getGroup(metalake, group); } + @Override + public Group[] listGroups(String metalake) throws NoSuchMetalakeException { + return userGroupManager.listGroups(metalake); + } + + @Override + public String[] listGroupNames(String metalake) throws NoSuchMetalakeException { + return userGroupManager.listGroupNames(metalake); + } + @Override public User grantRolesToUser(String metalake, List roles, String user) throws NoSuchUserException, NoSuchRoleException, NoSuchMetalakeException { diff --git a/core/src/main/java/org/apache/gravitino/authorization/UserGroupManager.java b/core/src/main/java/org/apache/gravitino/authorization/UserGroupManager.java index be1b687f3e4..cd852ab66a7 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/UserGroupManager.java +++ b/core/src/main/java/org/apache/gravitino/authorization/UserGroupManager.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.Collections; import org.apache.gravitino.Entity; +import org.apache.gravitino.Entity.EntityType; import org.apache.gravitino.EntityAlreadyExistsException; import org.apache.gravitino.EntityStore; import org.apache.gravitino.Namespace; @@ -124,23 +125,6 @@ User[] listUsers(String metalake) { return listUsersInternal(metalake, true /* allFields */); } - private User[] listUsersInternal(String metalake, boolean allFields) { - try { - AuthorizationUtils.checkMetalakeExists(metalake); - - Namespace namespace = AuthorizationUtils.ofUserNamespace(metalake); - return store - .list(namespace, UserEntity.class, Entity.EntityType.USER, allFields) - .toArray(new User[0]); - } catch (NoSuchEntityException e) { - LOG.error("Metalake {} does not exist", metalake, e); - throw new NoSuchMetalakeException(METALAKE_DOES_NOT_EXIST_MSG, metalake); - } catch (IOException ioe) { - LOG.error("Listing user under metalake {} failed due to storage issues", metalake, ioe); - throw new RuntimeException(ioe); - } - } - Group addGroup(String metalake, String group) throws GroupAlreadyExistsException { try { AuthorizationUtils.checkMetalakeExists(metalake); @@ -197,4 +181,47 @@ Group getGroup(String metalake, String group) { throw new RuntimeException(ioe); } } + + Group[] listGroups(String metalake) { + return listGroupInternal(metalake, true); + } + + String[] listGroupNames(String metalake) { + return Arrays.stream(listGroupInternal(metalake, false)) + .map(Group::name) + .toArray(String[]::new); + } + + private User[] listUsersInternal(String metalake, boolean allFields) { + try { + AuthorizationUtils.checkMetalakeExists(metalake); + + Namespace namespace = AuthorizationUtils.ofUserNamespace(metalake); + return store + .list(namespace, UserEntity.class, Entity.EntityType.USER, allFields) + .toArray(new User[0]); + } catch (NoSuchEntityException e) { + LOG.error("Metalake {} does not exist", metalake, e); + throw new NoSuchMetalakeException(METALAKE_DOES_NOT_EXIST_MSG, metalake); + } catch (IOException ioe) { + LOG.error("Listing user under metalake {} failed due to storage issues", metalake, ioe); + throw new RuntimeException(ioe); + } + } + + private Group[] listGroupInternal(String metalake, boolean allFields) { + try { + AuthorizationUtils.checkMetalakeExists(metalake); + Namespace namespace = AuthorizationUtils.ofGroupNamespace(metalake); + return store + .list(namespace, GroupEntity.class, EntityType.GROUP, allFields) + .toArray(new Group[0]); + } catch (NoSuchEntityException e) { + LOG.error("Metalake {} does not exist", metalake, e); + throw new NoSuchMetalakeException(METALAKE_DOES_NOT_EXIST_MSG, metalake); + } catch (IOException ioe) { + LOG.error("Listing group under metalake {} failed due to storage issues", metalake, ioe); + throw new RuntimeException(ioe); + } + } } diff --git a/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java b/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java index 65ed2c9da09..e16974764b1 100644 --- a/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java +++ b/core/src/main/java/org/apache/gravitino/hook/AccessControlHookDispatcher.java @@ -98,6 +98,16 @@ public Group getGroup(String metalake, String group) return dispatcher.getGroup(metalake, group); } + @Override + public Group[] listGroups(String metalake) throws NoSuchMetalakeException { + return dispatcher.listGroups(metalake); + } + + @Override + public String[] listGroupNames(String metalake) throws NoSuchMetalakeException { + return dispatcher.listGroupNames(metalake); + } + @Override public User grantRolesToUser(String metalake, List roles, String user) throws NoSuchUserException, NoSuchRoleException, NoSuchMetalakeException { diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java b/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java index 42b079234a8..c1f72c36098 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/JDBCBackend.java @@ -108,6 +108,8 @@ public List list( return (List) UserMetaService.getInstance().listUsersByNamespace(namespace, allFields); case ROLE: return (List) RoleMetaService.getInstance().listRolesByNamespace(namespace); + case GROUP: + return (List) GroupMetaService.getInstance().listGroupsByNamespace(namespace, allFields); default: throw new UnsupportedEntityTypeException( "Unsupported entity type: %s for list operation", entityType); diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaMapper.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaMapper.java index 5743095dd72..ae554a2a436 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaMapper.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaMapper.java @@ -20,6 +20,7 @@ package org.apache.gravitino.storage.relational.mapper; import java.util.List; +import org.apache.gravitino.storage.relational.po.ExtendedGroupPO; import org.apache.gravitino.storage.relational.po.GroupPO; import org.apache.ibatis.annotations.DeleteProvider; import org.apache.ibatis.annotations.InsertProvider; @@ -51,6 +52,14 @@ Long selectGroupIdBySchemaIdAndName( GroupPO selectGroupMetaByMetalakeIdAndName( @Param("metalakeId") Long metalakeId, @Param("groupName") String name); + @SelectProvider(type = GroupMetaSQLProviderFactory.class, method = "listGroupPOsByMetalake") + List listGroupPOsByMetalake(@Param("metalakeName") String metalakeName); + + @SelectProvider( + type = GroupMetaSQLProviderFactory.class, + method = "listExtendedGroupPOsByMetalakeId") + List listExtendedGroupPOsByMetalakeId(Long metalakeId); + @InsertProvider(type = GroupMetaSQLProviderFactory.class, method = "insertGroupMeta") void insertGroupMeta(@Param("groupMeta") GroupPO groupPO); diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaSQLProviderFactory.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaSQLProviderFactory.java index 75841e7bd90..591ac2e9a1c 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaSQLProviderFactory.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/GroupMetaSQLProviderFactory.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.gravitino.storage.relational.JDBCBackend.JDBCBackendType; import org.apache.gravitino.storage.relational.mapper.provider.base.GroupMetaBaseSQLProvider; +import org.apache.gravitino.storage.relational.mapper.provider.h2.GroupMetaH2Provider; import org.apache.gravitino.storage.relational.mapper.provider.postgresql.GroupMetaPostgreSQLProvider; import org.apache.gravitino.storage.relational.po.GroupPO; import org.apache.gravitino.storage.relational.session.SqlSessionFactoryHelper; @@ -47,8 +48,6 @@ public static GroupMetaBaseSQLProvider getProvider() { static class GroupMetaMySQLProvider extends GroupMetaBaseSQLProvider {} - static class GroupMetaH2Provider extends GroupMetaBaseSQLProvider {} - public static String selectGroupIdBySchemaIdAndName( @Param("metalakeId") Long metalakeId, @Param("groupName") String name) { return getProvider().selectGroupIdBySchemaIdAndName(metalakeId, name); @@ -84,6 +83,14 @@ public static String listGroupsByRoleId(@Param("roleId") Long roleId) { return getProvider().listGroupsByRoleId(roleId); } + public static String listGroupPOsByMetalake(@Param("metalakeName") String metalakeName) { + return getProvider().listGroupPOsByMetalake(metalakeName); + } + + public static String listExtendedGroupPOsByMetalakeId(@Param("metalakeId") Long metalakeId) { + return getProvider().listExtendedGroupPOsByMetalakeId(metalakeId); + } + public static String deleteGroupMetasByLegacyTimeline( @Param("legacyTimeline") Long legacyTimeline, @Param("limit") int limit) { return getProvider().deleteGroupMetasByLegacyTimeline(legacyTimeline, limit); diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/GroupMetaBaseSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/GroupMetaBaseSQLProvider.java index 0c26d74885a..a52e1b86144 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/GroupMetaBaseSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/GroupMetaBaseSQLProvider.java @@ -20,7 +20,9 @@ import static org.apache.gravitino.storage.relational.mapper.GroupMetaMapper.GROUP_TABLE_NAME; import static org.apache.gravitino.storage.relational.mapper.RoleMetaMapper.GROUP_ROLE_RELATION_TABLE_NAME; +import static org.apache.gravitino.storage.relational.mapper.RoleMetaMapper.ROLE_TABLE_NAME; +import org.apache.gravitino.storage.relational.mapper.MetalakeMetaMapper; import org.apache.gravitino.storage.relational.po.GroupPO; import org.apache.ibatis.annotations.Param; @@ -34,6 +36,40 @@ public String selectGroupIdBySchemaIdAndName( + " AND deleted_at = 0"; } + public String listGroupPOsByMetalake(@Param("metalakeName") String metalakeName) { + return "SELECT gt.group_id as groupId, gt.group_name as groupName, gt.metalake_id as metalakeId," + + " gt.audit_info as auditInfo, gt.current_version as currentVersion, gt.last_version as lastVersion," + + " gt.deleted_at as deletedAt FROM " + + GROUP_TABLE_NAME + + " gt JOIN " + + MetalakeMetaMapper.TABLE_NAME + + " mt ON gt.metalake_id = mt.metalake_id WHERE mt.metalake_name = #{metalakeName}" + + " AND gt.deleted_at = 0 AND mt.deleted_at = 0"; + } + + public String listExtendedGroupPOsByMetalakeId(Long metalakeId) { + return "SELECT gt.group_id as groupId, gt.group_name as groupName," + + " gt.metalake_id as metalakeId," + + " gt.audit_info as auditInfo," + + " gt.current_version as currentVersion, gt.last_version as lastVersion," + + " gt.deleted_at as deletedAt," + + " JSON_ARRAYAGG(rot.role_name) as roleNames," + + " JSON_ARRAYAGG(rot.role_id) as roleIds" + + " FROM " + + GROUP_TABLE_NAME + + " gt LEFT OUTER JOIN " + + GROUP_ROLE_RELATION_TABLE_NAME + + " rt ON rt.group_id = gt.group_id" + + " LEFT OUTER JOIN " + + ROLE_TABLE_NAME + + " rot ON rot.role_id = rt.role_id" + + " WHERE " + + " gt.deleted_at = 0 AND" + + " (rot.deleted_at = 0 OR rot.deleted_at is NULL) AND" + + " (rt.deleted_at = 0 OR rt.deleted_at is NULL) AND gt.metalake_id = #{metalakeId}" + + " GROUP BY gt.group_id"; + } + public String selectGroupMetaByMetalakeIdAndName( @Param("metalakeId") Long metalakeId, @Param("groupName") String name) { return "SELECT group_id as groupId, group_name as groupName," diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/GroupMetaH2Provider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/GroupMetaH2Provider.java new file mode 100644 index 00000000000..175d9d8ae9a --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/h2/GroupMetaH2Provider.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.storage.relational.mapper.provider.h2; + +import static org.apache.gravitino.storage.relational.mapper.GroupMetaMapper.GROUP_TABLE_NAME; +import static org.apache.gravitino.storage.relational.mapper.RoleMetaMapper.GROUP_ROLE_RELATION_TABLE_NAME; +import static org.apache.gravitino.storage.relational.mapper.RoleMetaMapper.ROLE_TABLE_NAME; + +import org.apache.gravitino.storage.relational.mapper.provider.base.GroupMetaBaseSQLProvider; +import org.apache.ibatis.annotations.Param; + +public class GroupMetaH2Provider extends GroupMetaBaseSQLProvider { + @Override + public String listExtendedGroupPOsByMetalakeId(@Param("metalakeId") Long metalakeId) { + return "SELECT gt.group_id as groupId, gt.group_name as groupName," + + " gt.metalake_id as metalakeId," + + " gt.audit_info as auditInfo," + + " gt.current_version as currentVersion, gt.last_version as lastVersion," + + " gt.deleted_at as deletedAt," + + " '[' || GROUP_CONCAT('\"' || rot.role_name || '\"') || ']' as roleNames," + + " '[' || GROUP_CONCAT('\"' || rot.role_id || '\"') || ']' as roleIds" + + " FROM " + + GROUP_TABLE_NAME + + " gt LEFT OUTER JOIN " + + GROUP_ROLE_RELATION_TABLE_NAME + + " rt ON rt.group_id = gt.group_id" + + " LEFT OUTER JOIN " + + ROLE_TABLE_NAME + + " rot ON rot.role_id = rt.role_id" + + " WHERE " + + " gt.deleted_at = 0 AND" + + " (rot.deleted_at = 0 OR rot.deleted_at is NULL) AND" + + " (rt.deleted_at = 0 OR rt.deleted_at is NULL) AND gt.metalake_id = #{metalakeId}" + + " GROUP BY gt.group_id"; + } +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/GroupMetaPostgreSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/GroupMetaPostgreSQLProvider.java index 4dddcad42be..51cf47bf7d7 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/GroupMetaPostgreSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/GroupMetaPostgreSQLProvider.java @@ -19,6 +19,8 @@ package org.apache.gravitino.storage.relational.mapper.provider.postgresql; import static org.apache.gravitino.storage.relational.mapper.GroupMetaMapper.GROUP_TABLE_NAME; +import static org.apache.gravitino.storage.relational.mapper.RoleMetaMapper.GROUP_ROLE_RELATION_TABLE_NAME; +import static org.apache.gravitino.storage.relational.mapper.RoleMetaMapper.ROLE_TABLE_NAME; import org.apache.gravitino.storage.relational.mapper.provider.base.GroupMetaBaseSQLProvider; import org.apache.gravitino.storage.relational.po.GroupPO; @@ -66,4 +68,28 @@ public String insertGroupMetaOnDuplicateKeyUpdate(GroupPO groupPO) { + " last_version = #{groupMeta.lastVersion}," + " deleted_at = #{groupMeta.deletedAt}"; } + + @Override + public String listExtendedGroupPOsByMetalakeId(Long metalakeId) { + return "SELECT gt.group_id as groupId, gt.group_name as groupName," + + " gt.metalake_id as metalakeId," + + " gt.audit_info as auditInfo," + + " gt.current_version as currentVersion, gt.last_version as lastVersion," + + " gt.deleted_at as deletedAt," + + " JSON_AGG(rot.role_name) as roleNames," + + " JSON_AGG(rot.role_id) as roleIds" + + " FROM " + + GROUP_TABLE_NAME + + " gt LEFT OUTER JOIN " + + GROUP_ROLE_RELATION_TABLE_NAME + + " rt ON rt.group_id = gt.group_id" + + " LEFT OUTER JOIN " + + ROLE_TABLE_NAME + + " rot ON rot.role_id = rt.role_id" + + " WHERE " + + " gt.deleted_at = 0 AND" + + " (rot.deleted_at = 0 OR rot.deleted_at is NULL) AND" + + " (rt.deleted_at = 0 OR rt.deleted_at is NULL) AND gt.metalake_id = #{metalakeId}" + + " GROUP BY gt.group_id"; + } } diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/po/ExtendedGroupPO.java b/core/src/main/java/org/apache/gravitino/storage/relational/po/ExtendedGroupPO.java new file mode 100644 index 00000000000..390a0039833 --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/storage/relational/po/ExtendedGroupPO.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.storage.relational.po; + +import java.util.Objects; + +/** + * ExtendedGroupPO add extra roleNames and roleIds for GroupPO. This PO is only used for reading the + * data from multiple joined tables. The PO won't be written to database. So we don't need the inner + * class Builder. + */ +public class ExtendedGroupPO extends GroupPO { + + private String roleNames; + private String roleIds; + + public String getRoleNames() { + return roleNames; + } + + public String getRoleIds() { + return roleIds; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof ExtendedGroupPO)) { + return false; + } + ExtendedGroupPO that = (ExtendedGroupPO) o; + return Objects.equals(getRoleIds(), that.getRoleIds()) + && Objects.equals(getRoleNames(), that.getRoleNames()); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), getRoleIds(), getRoleNames()); + } +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/service/GroupMetaService.java b/core/src/main/java/org/apache/gravitino/storage/relational/service/GroupMetaService.java index 2ffc10dac59..4329b3a0a10 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/service/GroupMetaService.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/service/GroupMetaService.java @@ -32,6 +32,7 @@ import org.apache.gravitino.Entity; import org.apache.gravitino.HasIdentifier; import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Namespace; import org.apache.gravitino.authorization.AuthorizationUtils; import org.apache.gravitino.exceptions.NoSuchEntityException; import org.apache.gravitino.meta.GroupEntity; @@ -39,6 +40,7 @@ import org.apache.gravitino.storage.relational.mapper.GroupMetaMapper; import org.apache.gravitino.storage.relational.mapper.GroupRoleRelMapper; import org.apache.gravitino.storage.relational.mapper.OwnerMetaMapper; +import org.apache.gravitino.storage.relational.po.ExtendedGroupPO; import org.apache.gravitino.storage.relational.po.GroupPO; import org.apache.gravitino.storage.relational.po.GroupRoleRelPO; import org.apache.gravitino.storage.relational.po.RolePO; @@ -249,6 +251,36 @@ public GroupEntity updateGroup( return newEntity; } + public List listGroupsByNamespace(Namespace namespace, boolean allFields) { + AuthorizationUtils.checkGroupNamespace(namespace); + String metalakeName = namespace.level(0); + + if (allFields) { + Long metalakeId = MetalakeMetaService.getInstance().getMetalakeIdByName(metalakeName); + List groupPOs = + SessionUtils.getWithoutCommit( + GroupMetaMapper.class, mapper -> mapper.listExtendedGroupPOsByMetalakeId(metalakeId)); + return groupPOs.stream() + .map( + po -> + POConverters.fromExtendedGroupPO( + po, AuthorizationUtils.ofGroupNamespace(metalakeName))) + .collect(Collectors.toList()); + } else { + List groupPOs = + SessionUtils.getWithoutCommit( + GroupMetaMapper.class, mapper -> mapper.listGroupPOsByMetalake(metalakeName)); + return groupPOs.stream() + .map( + po -> + POConverters.fromGroupPO( + po, + Collections.emptyList(), + AuthorizationUtils.ofGroupNamespace(metalakeName))) + .collect(Collectors.toList()); + } + } + public int deleteGroupMetasByLegacyTimeline(long legacyTimeline, int limit) { int[] groupDeletedCount = new int[] {0}; int[] groupRoleRelDeletedCount = new int[] {0}; diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java b/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java index da1f3d06a3b..f6392127b36 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java @@ -49,6 +49,7 @@ import org.apache.gravitino.meta.TopicEntity; import org.apache.gravitino.meta.UserEntity; import org.apache.gravitino.storage.relational.po.CatalogPO; +import org.apache.gravitino.storage.relational.po.ExtendedGroupPO; import org.apache.gravitino.storage.relational.po.ExtendedUserPO; import org.apache.gravitino.storage.relational.po.FilesetPO; import org.apache.gravitino.storage.relational.po.FilesetVersionPO; @@ -733,7 +734,7 @@ public static UserEntity fromUserPO(UserPO userPO, List rolePOs, Namespa /** * Convert {@link ExtendedUserPO} to {@link UserEntity} * - * @param userPO CombinedUserPo object to be converted + * @param userPO ExtendedUserPO object to be converted * @param namespace Namespace object to be associated with the user * @return UserEntity object from ExtendedUserPO object */ @@ -814,6 +815,57 @@ public static GroupEntity fromGroupPO( } } + /** + * Convert {@link ExtendedGroupPO} to {@link GroupEntity} + * + * @param groupPO ExtendedGroupPO object to be converted + * @param namespace Namespace object to be associated with the user + * @return GroupEntity object from ExtendedGroupPO object + */ + public static GroupEntity fromExtendedGroupPO(ExtendedGroupPO groupPO, Namespace namespace) { + try { + GroupEntity.Builder builder = + GroupEntity.builder() + .withId(groupPO.getGroupId()) + .withName(groupPO.getGroupName()) + .withNamespace(namespace) + .withAuditInfo( + JsonUtils.anyFieldMapper().readValue(groupPO.getAuditInfo(), AuditInfo.class)); + + if (StringUtils.isNotBlank(groupPO.getRoleNames())) { + List roleNamesFromJson = + JsonUtils.anyFieldMapper().readValue(groupPO.getRoleNames(), List.class); + List roleNames = + roleNamesFromJson.stream().filter(StringUtils::isNotBlank).collect(Collectors.toList()); + if (!roleNames.isEmpty()) { + builder.withRoleNames(roleNames); + } + } + + if (StringUtils.isNotBlank(groupPO.getRoleIds())) { + // Different JSON AGG from backends will produce different types data, we + // can only use Object. PostSQL produces the data with type Long. H2 produces + // the data with type String. + List roleIdsFromJson = + JsonUtils.anyFieldMapper().readValue(groupPO.getRoleIds(), List.class); + List roleIds = + roleIdsFromJson.stream() + .filter(Objects::nonNull) + .map(String::valueOf) + .filter(StringUtils::isNotBlank) + .map(Long::valueOf) + .collect(Collectors.toList()); + + if (!roleIds.isEmpty()) { + builder.withRoleIds(roleIds); + } + } + return builder.build(); + } catch (JsonProcessingException e) { + throw new RuntimeException("Failed to deserialize json object:", e); + } + } + /** * Initialize UserRoleRelPO * diff --git a/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java b/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java index b299c15ef97..a2b6dace1d7 100644 --- a/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java +++ b/core/src/test/java/org/apache/gravitino/authorization/TestAccessControlManager.java @@ -298,6 +298,28 @@ public void testGetGroup() { Assertions.assertTrue(exception.getMessage().contains("Group not-exist does not exist")); } + @Test + public void testListGroupss() { + accessControlManager.addGroup("metalake_list", "testList1"); + accessControlManager.addGroup("metalake_list", "testList2"); + + // Test to list groups + String[] expectGroupNames = new String[] {"testList1", "testList2"}; + String[] actualGroupNames = accessControlManager.listGroupNames("metalake_list"); + Arrays.sort(actualGroupNames); + Assertions.assertArrayEquals(expectGroupNames, actualGroupNames); + Group[] groups = accessControlManager.listGroups("metalake_list"); + Arrays.sort(groups, Comparator.comparing(Group::name)); + Assertions.assertArrayEquals( + expectGroupNames, Arrays.stream(groups).map(Group::name).toArray(String[]::new)); + + // Test with NoSuchMetalakeException + Assertions.assertThrows( + NoSuchMetalakeException.class, () -> accessControlManager.listGroupNames("no-exist")); + Assertions.assertThrows( + NoSuchMetalakeException.class, () -> accessControlManager.listGroups("no-exist")); + } + @Test public void testRemoveGroup() { accessControlManager.addGroup(METALAKE, "testRemove"); diff --git a/core/src/test/java/org/apache/gravitino/storage/relational/service/TestGroupMetaService.java b/core/src/test/java/org/apache/gravitino/storage/relational/service/TestGroupMetaService.java index 22246ba0cf3..77cd9d110bc 100644 --- a/core/src/test/java/org/apache/gravitino/storage/relational/service/TestGroupMetaService.java +++ b/core/src/test/java/org/apache/gravitino/storage/relational/service/TestGroupMetaService.java @@ -27,6 +27,7 @@ import java.sql.SQLException; import java.sql.Statement; import java.time.Instant; +import java.util.Comparator; import java.util.List; import java.util.Optional; import java.util.function.Function; @@ -119,6 +120,77 @@ void getGroupByIdentifier() throws IOException { Sets.newHashSet(group2.roleNames()), Sets.newHashSet(actualGroup.roleNames())); } + @Test + void testListGroups() throws IOException { + AuditInfo auditInfo = + AuditInfo.builder().withCreator("creator").withCreateTime(Instant.now()).build(); + BaseMetalake metalake = + createBaseMakeLake(RandomIdGenerator.INSTANCE.nextId(), metalakeName, auditInfo); + backend.insert(metalake, false); + + CatalogEntity catalog = + createCatalog( + RandomIdGenerator.INSTANCE.nextId(), Namespace.of(metalakeName), "catalog", auditInfo); + backend.insert(catalog, false); + + GroupEntity group1 = + createGroupEntity( + RandomIdGenerator.INSTANCE.nextId(), + AuthorizationUtils.ofGroupNamespace(metalakeName), + "group1", + auditInfo); + + RoleEntity role1 = + createRoleEntity( + RandomIdGenerator.INSTANCE.nextId(), + AuthorizationUtils.ofRoleNamespace("metalake"), + "role1", + auditInfo, + "catalog"); + backend.insert(role1, false); + + RoleEntity role2 = + createRoleEntity( + RandomIdGenerator.INSTANCE.nextId(), + AuthorizationUtils.ofRoleNamespace("metalake"), + "role2", + auditInfo, + "catalog"); + backend.insert(role2, false); + + GroupEntity group2 = + createGroupEntity( + RandomIdGenerator.INSTANCE.nextId(), + AuthorizationUtils.ofGroupNamespace("metalake"), + "group2", + auditInfo, + Lists.newArrayList(role1.name(), role2.name()), + Lists.newArrayList(role1.id(), role2.id())); + + backend.insert(group1, false); + backend.insert(group2, false); + + GroupMetaService groupMetaService = GroupMetaService.getInstance(); + List actualGroups = + groupMetaService.listGroupsByNamespace( + AuthorizationUtils.ofGroupNamespace(metalakeName), true); + actualGroups.sort(Comparator.comparing(GroupEntity::name)); + List expectGroups = Lists.newArrayList(group1, group2); + Assertions.assertEquals(expectGroups.size(), actualGroups.size()); + for (int index = 0; index < expectGroups.size(); index++) { + Assertions.assertEquals(expectGroups.get(index).name(), actualGroups.get(index).name()); + if (expectGroups.get(index).roleNames() == null) { + Assertions.assertNull(actualGroups.get(index).roleNames()); + } else { + Assertions.assertEquals( + expectGroups.get(index).roleNames().size(), actualGroups.get(index).roleNames().size()); + for (String roleName : expectGroups.get(index).roleNames()) { + Assertions.assertTrue(actualGroups.get(index).roleNames().contains(roleName)); + } + } + } + } + @Test void insertGroup() throws IOException { AuditInfo auditInfo = @@ -243,7 +315,7 @@ void insertGroup() throws IOException { GroupEntity group3Overwrite = createGroupEntity( group1.id(), - AuthorizationUtils.ofUserNamespace(metalakeName), + AuthorizationUtils.ofGroupNamespace(metalakeName), "group3Overwrite", auditInfo, Lists.newArrayList(role3.name()), @@ -260,7 +332,7 @@ void insertGroup() throws IOException { GroupEntity group4Overwrite = createGroupEntity( group1.id(), - AuthorizationUtils.ofUserNamespace(metalakeName), + AuthorizationUtils.ofGroupNamespace(metalakeName), "group4Overwrite", auditInfo); Assertions.assertDoesNotThrow(() -> groupMetaService.insertGroup(group4Overwrite, true)); @@ -779,7 +851,7 @@ void deleteGroupMetasByLegacyTimeline() throws IOException { GroupEntity group1 = createGroupEntity( RandomIdGenerator.INSTANCE.nextId(), - AuthorizationUtils.ofUserNamespace(metalakeName), + AuthorizationUtils.ofGroupNamespace(metalakeName), "group1", auditInfo, Lists.newArrayList(role1.name(), role2.name()), @@ -787,7 +859,7 @@ void deleteGroupMetasByLegacyTimeline() throws IOException { GroupEntity group2 = createGroupEntity( RandomIdGenerator.INSTANCE.nextId(), - AuthorizationUtils.ofUserNamespace(metalakeName), + AuthorizationUtils.ofGroupNamespace(metalakeName), "group2", auditInfo, Lists.newArrayList(role1.name(), role2.name()), @@ -795,7 +867,7 @@ void deleteGroupMetasByLegacyTimeline() throws IOException { GroupEntity group3 = createGroupEntity( RandomIdGenerator.INSTANCE.nextId(), - AuthorizationUtils.ofUserNamespace(metalakeName), + AuthorizationUtils.ofGroupNamespace(metalakeName), "group3", auditInfo, Lists.newArrayList(role1.name(), role2.name()), @@ -803,7 +875,7 @@ void deleteGroupMetasByLegacyTimeline() throws IOException { GroupEntity group4 = createGroupEntity( RandomIdGenerator.INSTANCE.nextId(), - AuthorizationUtils.ofUserNamespace(metalakeName), + AuthorizationUtils.ofGroupNamespace(metalakeName), "group4", auditInfo, Lists.newArrayList(role1.name(), role2.name()), diff --git a/server/src/main/java/org/apache/gravitino/server/web/rest/GroupOperations.java b/server/src/main/java/org/apache/gravitino/server/web/rest/GroupOperations.java index 537bafb9e78..12cf769932e 100644 --- a/server/src/main/java/org/apache/gravitino/server/web/rest/GroupOperations.java +++ b/server/src/main/java/org/apache/gravitino/server/web/rest/GroupOperations.java @@ -22,19 +22,24 @@ import com.codahale.metrics.annotation.Timed; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.DELETE; +import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Namespace; import org.apache.gravitino.authorization.AccessControlDispatcher; import org.apache.gravitino.authorization.AuthorizationUtils; import org.apache.gravitino.dto.requests.GroupAddRequest; +import org.apache.gravitino.dto.responses.GroupListResponse; import org.apache.gravitino.dto.responses.GroupResponse; +import org.apache.gravitino.dto.responses.NameListResponse; import org.apache.gravitino.dto.responses.RemoveResponse; import org.apache.gravitino.dto.util.DTOConverters; import org.apache.gravitino.lock.LockType; @@ -134,4 +139,31 @@ public Response removeGroup( return ExceptionHandlers.handleGroupException(OperationType.REMOVE, group, metalake, e); } } + + @GET + @Produces("application/vnd.gravitino.v1+json") + @Timed(name = "list-group." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "list-group", absolute = true) + public Response listGroups( + @PathParam("metalake") String metalake, + @QueryParam("details") @DefaultValue("false") boolean verbose) { + LOG.info("Received list groups request."); + try { + return Utils.doAs( + httpRequest, + () -> { + if (verbose) { + return Utils.ok( + new GroupListResponse( + DTOConverters.toDTOs(accessControlManager.listGroups(metalake)))); + } else { + return Utils.ok(new NameListResponse(accessControlManager.listGroupNames(metalake))); + } + }); + + } catch (Exception e) { + return ExceptionHandlers.handleGroupException( + OperationType.LIST, Namespace.empty().toString(), metalake, e); + } + } } diff --git a/server/src/test/java/org/apache/gravitino/server/web/rest/TestGroupOperations.java b/server/src/test/java/org/apache/gravitino/server/web/rest/TestGroupOperations.java index c3b34bc6bff..77f0cf97988 100644 --- a/server/src/test/java/org/apache/gravitino/server/web/rest/TestGroupOperations.java +++ b/server/src/test/java/org/apache/gravitino/server/web/rest/TestGroupOperations.java @@ -34,6 +34,7 @@ import javax.ws.rs.core.Application; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.gravitino.Config; import org.apache.gravitino.GravitinoEnv; @@ -43,7 +44,9 @@ import org.apache.gravitino.dto.requests.GroupAddRequest; import org.apache.gravitino.dto.responses.ErrorConstants; import org.apache.gravitino.dto.responses.ErrorResponse; +import org.apache.gravitino.dto.responses.GroupListResponse; import org.apache.gravitino.dto.responses.GroupResponse; +import org.apache.gravitino.dto.responses.NameListResponse; import org.apache.gravitino.dto.responses.RemoveResponse; import org.apache.gravitino.exceptions.GroupAlreadyExistsException; import org.apache.gravitino.exceptions.NoSuchGroupException; @@ -119,7 +122,7 @@ public void testAddGroup() { .accept("application/vnd.gravitino.v1+json") .post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE)); - Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); + Assertions.assertEquals(Status.OK.getStatusCode(), resp.getStatus()); Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); GroupResponse groupResponse = resp.readEntity(GroupResponse.class); @@ -138,7 +141,7 @@ public void testAddGroup() { .accept("application/vnd.gravitino.v1+json") .post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE)); - Assertions.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), resp1.getStatus()); + Assertions.assertEquals(Status.NOT_FOUND.getStatusCode(), resp1.getStatus()); Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp1.getMediaType()); ErrorResponse errorResponse = resp1.readEntity(ErrorResponse.class); @@ -153,7 +156,7 @@ public void testAddGroup() { .accept("application/vnd.gravitino.v1+json") .post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE)); - Assertions.assertEquals(Response.Status.CONFLICT.getStatusCode(), resp2.getStatus()); + Assertions.assertEquals(Status.CONFLICT.getStatusCode(), resp2.getStatus()); ErrorResponse errorResponse1 = resp2.readEntity(ErrorResponse.class); Assertions.assertEquals(ErrorConstants.ALREADY_EXISTS_CODE, errorResponse1.getCode()); @@ -168,8 +171,7 @@ public void testAddGroup() { .accept("application/vnd.gravitino.v1+json") .post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE)); - Assertions.assertEquals( - Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp3.getStatus()); + Assertions.assertEquals(Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp3.getStatus()); ErrorResponse errorResponse2 = resp3.readEntity(ErrorResponse.class); Assertions.assertEquals(ErrorConstants.INTERNAL_ERROR_CODE, errorResponse2.getCode()); @@ -241,14 +243,103 @@ public void testGetGroup() { Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse2.getType()); } - private Group buildGroup(String group) { - return GroupEntity.builder() - .withId(1L) - .withName(group) - .withRoleNames(Collections.emptyList()) - .withAuditInfo( - AuditInfo.builder().withCreator("creator").withCreateTime(Instant.now()).build()) - .build(); + @Test + public void testListGroupNames() { + when(manager.listGroupNames(any())).thenReturn(new String[] {"group"}); + + Response resp = + target("/metalakes/metalake1/groups/") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); + + NameListResponse listResponse = resp.readEntity(NameListResponse.class); + Assertions.assertEquals(0, listResponse.getCode()); + + Assertions.assertEquals(1, listResponse.getNames().length); + Assertions.assertEquals("group", listResponse.getNames()[0]); + + // Test to throw NoSuchMetalakeException + doThrow(new NoSuchMetalakeException("mock error")).when(manager).listGroupNames(any()); + Response resp1 = + target("/metalakes/metalake1/groups/") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + + Assertions.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), resp1.getStatus()); + + ErrorResponse errorResponse = resp1.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.NOT_FOUND_CODE, errorResponse.getCode()); + Assertions.assertEquals(NoSuchMetalakeException.class.getSimpleName(), errorResponse.getType()); + + // Test to throw internal RuntimeException + doThrow(new RuntimeException("mock error")).when(manager).listGroupNames(any()); + Response resp3 = + target("/metalakes/metalake1/groups") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + + Assertions.assertEquals( + Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp3.getStatus()); + + ErrorResponse errorResponse2 = resp3.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.INTERNAL_ERROR_CODE, errorResponse2.getCode()); + Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse2.getType()); + } + + @Test + public void testListGroups() { + Group group = buildGroup("group"); + when(manager.listGroups(any())).thenReturn(new Group[] {group}); + + Response resp = + target("/metalakes/metalake1/groups/") + .queryParam("details", "true") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); + + GroupListResponse listResponse = resp.readEntity(GroupListResponse.class); + Assertions.assertEquals(0, listResponse.getCode()); + + Assertions.assertEquals(1, listResponse.getGroups().length); + Assertions.assertEquals(group.name(), listResponse.getGroups()[0].name()); + Assertions.assertEquals(group.roles(), listResponse.getGroups()[0].roles()); + + // Test to throw NoSuchMetalakeException + doThrow(new NoSuchMetalakeException("mock error")).when(manager).listGroups(any()); + Response resp1 = + target("/metalakes/metalake1/groups/") + .queryParam("details", "true") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + + Assertions.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), resp1.getStatus()); + + ErrorResponse errorResponse = resp1.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.NOT_FOUND_CODE, errorResponse.getCode()); + Assertions.assertEquals(NoSuchMetalakeException.class.getSimpleName(), errorResponse.getType()); + + // Test to throw internal RuntimeException + doThrow(new RuntimeException("mock error")).when(manager).listGroups(any()); + Response resp3 = + target("/metalakes/metalake1/groups") + .queryParam("details", "true") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .get(); + + Assertions.assertEquals( + Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp3.getStatus()); + + ErrorResponse errorResponse2 = resp3.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.INTERNAL_ERROR_CODE, errorResponse2.getCode()); + Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse2.getType()); } @Test @@ -261,7 +352,7 @@ public void testRemoveGroup() { .accept("application/vnd.gravitino.v1+json") .delete(); - Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); + Assertions.assertEquals(Status.OK.getStatusCode(), resp.getStatus()); RemoveResponse removeResponse = resp.readEntity(RemoveResponse.class); Assertions.assertEquals(0, removeResponse.getCode()); Assertions.assertTrue(removeResponse.removed()); @@ -274,7 +365,7 @@ public void testRemoveGroup() { .accept("application/vnd.gravitino.v1+json") .delete(); - Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp2.getStatus()); + Assertions.assertEquals(Status.OK.getStatusCode(), resp2.getStatus()); RemoveResponse removeResponse2 = resp2.readEntity(RemoveResponse.class); Assertions.assertEquals(0, removeResponse2.getCode()); Assertions.assertFalse(removeResponse2.removed()); @@ -286,11 +377,20 @@ public void testRemoveGroup() { .accept("application/vnd.gravitino.v1+json") .delete(); - Assertions.assertEquals( - Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp3.getStatus()); + Assertions.assertEquals(Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp3.getStatus()); ErrorResponse errorResponse = resp3.readEntity(ErrorResponse.class); Assertions.assertEquals(ErrorConstants.INTERNAL_ERROR_CODE, errorResponse.getCode()); Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse.getType()); } + + private Group buildGroup(String group) { + return GroupEntity.builder() + .withId(1L) + .withName(group) + .withRoleNames(Collections.emptyList()) + .withAuditInfo( + AuditInfo.builder().withCreator("creator").withCreateTime(Instant.now()).build()) + .build(); + } } From ee6dee617348216edd40dadf5a99b9bb8ad98316 Mon Sep 17 00:00:00 2001 From: Qi Yu Date: Fri, 27 Sep 2024 21:29:22 +0800 Subject: [PATCH 05/15] [#5005] improvement(docs): Add a document about how to create Hive S3 tables through Gravitino. (#5006) ### What changes were proposed in this pull request? Add a document about how to create Hive S3 tables through Gravitino ### Why are the changes needed? To enhance the user experience. Fix: #5005 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? N/A --- docs/apache-hive-catalog.md | 6 + docs/hive-catalog-with-s3.md | 216 +++++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 docs/hive-catalog-with-s3.md diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md index 8dd6ed09467..6197c476cb8 100644 --- a/docs/apache-hive-catalog.md +++ b/docs/apache-hive-catalog.md @@ -197,3 +197,9 @@ As Gravitino has a separate interface for updating the comment of a table, the H :::note Support for altering partitions is under development. ::: + +## Hive catalog with S3 storage + +To create a Hive catalog with S3 storage, you can refer to the [Hive catalog with S3](./hive-catalog-with-s3.md) documentation. No special configurations are required for the Hive catalog to work with S3 storage. +The only difference is the storage location of the files, which is in S3. You can use `location` to specify the S3 path for the database or table. + diff --git a/docs/hive-catalog-with-s3.md b/docs/hive-catalog-with-s3.md new file mode 100644 index 00000000000..2275fc30112 --- /dev/null +++ b/docs/hive-catalog-with-s3.md @@ -0,0 +1,216 @@ +--- +title: "Hive catalog with s3" +slug: /hive-catalog +date: 2024-9-24 +keyword: Hive catalog cloud storage S3 +license: "This software is licensed under the Apache License version 2." +--- + + +## Introduction + +Since Hive 2.x, Hive has supported S3 as a storage backend, enabling users to store and manage data in Amazon S3 directly through Hive. Gravitino enhances this capability by supporting the Hive catalog with S3, allowing users to efficiently manage the storage locations of files located in S3. This integration simplifies data operations and enables seamless access to S3 data from Hive queries. + +The following sections will guide you through the necessary steps to configure the Hive catalog to utilize S3 as a storage backend, including configuration details and examples for creating databases and tables. + +## Hive metastore configuration + +To use the Hive catalog with S3, you must configure your Hive metastore to recognize S3 as a storage backend. The following example illustrates the required changes in the `hive-site.xml` configuration file: + +### Example Configuration Changes + +Below are the essential properties to add or modify in the `hive-site.xml` file to support S3: + +```xml + + + fs.s3a.access.key + S3_ACCESS_KEY_ID + + + + fs.s3a.secret.key + S3_SECRET_KEY_ID + + + + fs.s3a.endpoint + S3_ENDPOINT_ID + + + + + hive.metastore.warehouse.dir + S3_BUCKET_PATH + +``` + +### Adding Required JARs + +After updating the `hive-site.xml`, you need to ensure that the necessary S3-related JARs are included in the Hive classpath. You can do this by executing the following command: +```shell +cp ${HADOOP_HOME}/share/hadoop/tools/lib/*aws* ${HIVE_HOME}/lib +``` +Alternatively, you can download the required JARs from the Maven repository and place them in the Hive classpath. It is crucial to verify that the JARs are compatible with the version of Hadoop you are using to avoid any compatibility issue. + +### Restart Hive metastore + +Once all configurations have been correctly set, restart the Hive cluster to apply the changes. This step is essential to ensure that the new configurations take effect and that the Hive services can communicate with S3. + + +## Creating Tables or Databases with S3 Storage using Gravitino + +Assuming you have already set up a Hive catalog with Gravitino, you can proceed to create tables or databases using S3 storage. For more information on catalog operations, refer to [Catalog operations](./manage-fileset-metadata-using-gravitino.md#catalog-operations) + +### Example: Creating a Database with S3 Storage + +The following is an example of how to create a database in S3 using Gravitino: + + + + +```shell +curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "name": "hive_schema", + "comment": "comment", + "properties": { + "location": "s3a://bucket-name/path" + } +}' http://localhost:8090/api/metalakes/metalake/catalogs/catalog/schemas +``` + + + + +```java +GravitinoClient gravitinoClient = GravitinoClient + .builder("http://localhost:8090") + .withMetalake("metalake") + .build(); + +// Assuming you have just created a Hadoop catalog named `catalog` +Catalog catalog = gravitinoClient.loadCatalog("catalog"); + +SupportsSchemas supportsSchemas = catalog.asSchemas(); + +Map schemaProperties = ImmutableMap.builder() + .put("location", "s3a://bucket-name/path") + .build(); +Schema schema = supportsSchemas.createSchema("hive_schema", + "This is a schema", + schemaProperties +); +// ... +``` + + + + +After creating the database, you can proceed to create tables under this schema using S3 storage. For further details on table operations, please refer to [Table operations](./manage-relational-metadata-using-gravitino.md#table-operations). + +## Access tables with S3 storage by Hive CLI + +Assuming you have already created a table in the section [Creating Tables or Databases with S3 Storage using Gravitino](#creating-tables-or-databases-with-s3-storage-using-gravitino), let’s say the table is named `hive_table`. You can access the database/table and view its details using the Hive CLI as follows: + + +```shell +hive> show create database hive_schema; +OK +CREATE DATABASE `hive_schema` +COMMENT + 'comment' +LOCATION + 's3a://my-test-bucket/test-1727168792125' +WITH DBPROPERTIES ( + 'gravitino.identifier'='gravitino.v1.uid2173913050348296645', + 'key1'='val1', + 'key2'='val2') +Time taken: 0.019 seconds, Fetched: 9 row(s) +hive> use hive_schema; +OK +Time taken: 0.019 seconds +hive> show create table cataloghiveit_table_fc7c7d16; +OK +CREATE TABLE `hive_table`( + `hive_col_name1` tinyint COMMENT 'col_1_comment', + `hive_col_name2` date COMMENT 'col_2_comment', + `hive_col_name3` string COMMENT 'col_3_comment') +COMMENT 'table_comment' +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 's3a://my-test-bucket/test-1727168821335/hive_table' +TBLPROPERTIES ( + 'EXTERNAL'='FALSE', + 'gravitino.identifier'='gravitino.v1.uid292928775813252841', + 'key1'='val1', + 'key2'='val2', + 'transient_lastDdlTime'='1727168821') +Time taken: 0.071 seconds, Fetched: 19 row(s) +> insert into hive_table values(1, '2022-11-12', 'hello'); +Query ID = root_20240924091305_58ab83c7-7091-4cc7-a0d9-fa44945f45c6 +Total jobs = 3 +Launching Job 1 out of 3 +Number of reduce tasks is set to 0 since there's no reduce operator +Job running in-process (local Hadoop) +2024-09-24 09:13:08,381 Stage-1 map = 100%, reduce = 0% +Ended Job = job_local1096072998_0001 +Stage-4 is selected by condition resolver. +Stage-3 is filtered out by condition resolver. +Stage-5 is filtered out by condition resolver. +Loading data to table hive_schema.hive_table +MapReduce Jobs Launched: +Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +Total MapReduce CPU Time Spent: 0 msec +OK +Time taken: 2.843 seconds +hive> select * from hive_table; +OK +1 2022-11-12 hello +Time taken: 0.116 seconds, Fetched: 1 row(s) +``` + +This command shows the creation details of the database hive_schema, including its location in S3 and any associated properties. + +## Accessing Tables with S3 Storage via Spark + +To access S3-stored tables using Spark, you need to configure the SparkSession appropriately. Below is an example of how to set up the SparkSession with the necessary S3 configurations: + + + +```java + SparkSession sparkSession = + SparkSession.builder() + .master("local[1]") + .appName("Hive Catalog integration test") + .config("hive.metastore.uris", HIVE_METASTORE_URIS) + .config("spark.hadoop.fs.s3a.access.key", accessKey) + .config("spark.hadoop.fs.s3a.secret.key", secretKey) + .config("spark.hadoop.fs.s3a.endpoint", getS3Endpoint) + .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") + .config("spark.hadoop.fs.s3a.path.style.access", "true") + .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") + .config( + "spark.hadoop.fs.s3a.aws.credentials.provider", + "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") + .config("spark.sql.storeAssignmentPolicy", "LEGACY") + .config("mapreduce.input.fileinputformat.input.dir.recursive", "true") + .enableHiveSupport() + .getOrCreate(); + + sparkSession.sql("..."); +``` + +:::Note +Please ensure that the necessary S3-related JAR files are included in the Spark classpath. If the JARs are missing, Spark will not be able to access the S3 storage. +::: + +By following these instructions, you can effectively manage and access your S3-stored data through both Hive CLI and Spark, leveraging the capabilities of Gravitino for optimal data management. \ No newline at end of file From 5f97eabe6b5e19b2d41ce4bed49492c7f2ac9153 Mon Sep 17 00:00:00 2001 From: Jerry Shao Date: Fri, 27 Sep 2024 21:55:37 +0800 Subject: [PATCH 06/15] [MINOR] improve(CI): Increase the Python timeout minutes to 45 minutes (#5037) ### What changes were proposed in this pull request? Change the Python CI time to 45 minutes to reduce the chances of CI failure. ### Why are the changes needed? Current Python CI running time will hit the timeout and fail the CI, so increasing CI timeout to mitigate the failures. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI. --- .github/workflows/python-integration-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-integration-test.yml b/.github/workflows/python-integration-test.yml index e29ed3fc46a..546aa928584 100644 --- a/.github/workflows/python-integration-test.yml +++ b/.github/workflows/python-integration-test.yml @@ -48,7 +48,7 @@ jobs: needs: changes if: needs.changes.outputs.source_changes == 'true' runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 45 strategy: matrix: # Integration test for AMD64 architecture @@ -92,4 +92,4 @@ jobs: distribution/package/logs/gravitino-server.out distribution/package/logs/gravitino-server.log catalogs/**/*.log - catalogs/**/*.tar \ No newline at end of file + catalogs/**/*.tar From 046d112fb511aa4b3b01ef9c4014e5af36a460f8 Mon Sep 17 00:00:00 2001 From: lsyulong Date: Sun, 29 Sep 2024 11:11:02 +0800 Subject: [PATCH 07/15] [#4956] fix(docs): Correct array and union and struct mapping document for Gravitino (#5039) ### What changes were proposed in this pull request? Fix the data type mapping in some catalog doc ### Why are the changes needed? Datatype in some catalog doc should be correct Fix: #4956 --- docs/apache-hive-catalog.md | 4 +-- docs/lakehouse-iceberg-catalog.md | 4 +-- docs/lakehouse-paimon-catalog.md | 44 +++++++++++++++---------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md index 6197c476cb8..cc5c01fecb0 100644 --- a/docs/apache-hive-catalog.md +++ b/docs/apache-hive-catalog.md @@ -119,10 +119,10 @@ The following table lists the data types mapped from the Hive catalog to Graviti | `interval_year_month` | `interval_year` | 0.2.0 | | `interval_day_time` | `interval_day` | 0.2.0 | | `binary` | `binary` | 0.2.0 | -| `array` | `array` | 0.2.0 | +| `array` | `list` | 0.2.0 | | `map` | `map` | 0.2.0 | | `struct` | `struct` | 0.2.0 | -| `uniontype` | `uniontype` | 0.2.0 | +| `uniontype` | `union` | 0.2.0 | :::info Since 0.6.0, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type from the Hive catalog. diff --git a/docs/lakehouse-iceberg-catalog.md b/docs/lakehouse-iceberg-catalog.md index 7552e0f195a..1f77eb9eddd 100644 --- a/docs/lakehouse-iceberg-catalog.md +++ b/docs/lakehouse-iceberg-catalog.md @@ -287,7 +287,7 @@ Apache Iceberg doesn't support Gravitino `EvenDistribution` type. |-----------------------------|-----------------------------| | `Struct` | `Struct` | | `Map` | `Map` | -| `Array` | `Array` | +| `List` | `Array` | | `Boolean` | `Boolean` | | `Integer` | `Integer` | | `Long` | `Long` | @@ -300,7 +300,7 @@ Apache Iceberg doesn't support Gravitino `EvenDistribution` type. | `TimestampType withoutZone` | `TimestampType withoutZone` | | `Decimal` | `Decimal` | | `Fixed` | `Fixed` | -| `BinaryType` | `Binary` | +| `Binary` | `Binary` | | `UUID` | `UUID` | :::info diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md index 6eabd3e8fcd..fa1e04523a4 100644 --- a/docs/lakehouse-paimon-catalog.md +++ b/docs/lakehouse-paimon-catalog.md @@ -127,28 +127,28 @@ Paimon Table primary key constraint should not be same with partition fields, th ### Table column types -| Gravitino Type | Apache Paimon Type | -|-------------------------------|--------------------------------| -| `Sturct` | `Row` | -| `Map` | `Map` | -| `Array` | `Array` | -| `Boolean` | `Boolean` | -| `Byte` | `TinyInt` | -| `Short` | `SmallInt` | -| `Integer` | `Int` | -| `Long` | `BigInt` | -| `Float` | `Float` | -| `Double` | `Double` | -| `Decimal` | `Decimal` | -| `String` | `VarChar(Integer.MAX_VALUE)` | -| `VarChar` | `VarChar` | -| `FixedChar` | `Char` | -| `Date` | `Date` | -| `Time` | `Time` | -| `TimestampType withZone` | `LocalZonedTimestamp` | -| `TimestampType withoutZone` | `Timestamp` | -| `Binary` | `Binary` | -| `Fixed` | `VarBinary` | +| Gravitino Type | Apache Paimon Type | +|-----------------------------|--------------------------------| +| `Struct` | `Row` | +| `Map` | `Map` | +| `List` | `Array` | +| `Boolean` | `Boolean` | +| `Byte` | `TinyInt` | +| `Short` | `SmallInt` | +| `Integer` | `Int` | +| `Long` | `BigInt` | +| `Float` | `Float` | +| `Double` | `Double` | +| `Decimal` | `Decimal` | +| `String` | `VarChar(Integer.MAX_VALUE)` | +| `VarChar` | `VarChar` | +| `FixedChar` | `Char` | +| `Date` | `Date` | +| `Time` | `Time` | +| `TimestampType withZone` | `LocalZonedTimestamp` | +| `TimestampType withoutZone` | `Timestamp` | +| `Binary` | `Binary` | +| `Fixed` | `VarBinary` | :::info Gravitino doesn't support Paimon `MultisetType` type. From 3c5d20f88a07dde00f758cb513ff951da896db91 Mon Sep 17 00:00:00 2001 From: FANNG Date: Sun, 29 Sep 2024 14:04:32 +0800 Subject: [PATCH 08/15] [#4959] feat(Iceberg): support Iceberg REST extend API (#4987) ### What changes were proposed in this pull request? support Iceberg REST extend API ### Why are the changes needed? Fix: #4959 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? 1. add new operation classes with new URI PATH to `org.apache.gravitino.iceberg.service.rest2` 2. config `gravitino.iceberg-rest.extension-packages` to `org.apache.gravitino.iceberg.service.rest2` 3. start Gravitino IcebergRESTServer 4. check new URI is accessable. --- docs/iceberg-rest-service.md | 8 +- .../iceberg/common/IcebergConfig.java | 12 +++ .../apache/gravitino/iceberg/RESTService.java | 12 ++- .../test/TestIcebergExtendAPI.java | 91 +++++++++++++++++++ .../service/extension/HelloOperations.java | 49 ++++++++++ .../service/extension/HelloResponse.java | 41 +++++++++ .../integration/test/util/ITUtils.java | 10 ++ 7 files changed, 221 insertions(+), 2 deletions(-) create mode 100644 iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/TestIcebergExtendAPI.java create mode 100644 iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloOperations.java create mode 100644 iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloResponse.java diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index a5760118cc1..0e6fecd19ad 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -75,7 +75,6 @@ Please note that, it only takes affect in `gravitino.conf`, you don't need to sp | `gravitino.iceberg-rest.responseHeaderSize` | The maximum size of an HTTP response. | `131072` | No | 0.2.0 | | `gravitino.iceberg-rest.customFilters` | Comma-separated list of filter class names to apply to the APIs. | (none) | No | 0.4.0 | - The filter in `customFilters` should be a standard javax servlet filter. You can also specify filter parameters by setting configuration entries in the style `gravitino.iceberg-rest..param.=`. @@ -302,6 +301,13 @@ Gravitino provides a pluggable metrics store interface to store and delete Icebe | `gravitino.iceberg-rest.metricsStoreRetainDays` | The days to retain Iceberg metrics in store, the value not greater than 0 means retain forever. | -1 | No | 0.4.0 | | `gravitino.iceberg-rest.metricsQueueCapacity` | The size of queue to store metrics temporally before storing to the persistent storage. Metrics will be dropped when queue is full. | 1000 | No | 0.4.0 | +### Misc configurations + +| Configuration item | Description | Default value | Required | Since Version | +|---------------------------------------------|--------------------------------------------------------------|---------------|----------|---------------| +| `gravitino.iceberg-rest.extension-packages` | Comma-separated list of Iceberg REST API packages to expand. | (none) | No | 0.7.0 | + + ## Starting the Iceberg REST server To start as an auxiliary service with Gravitino server: diff --git a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java index 64d327c7880..fd7b52050c3 100644 --- a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java +++ b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java @@ -19,7 +19,10 @@ package org.apache.gravitino.iceberg.common; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; +import java.util.Collections; +import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Config; @@ -35,6 +38,7 @@ public class IcebergConfig extends Config implements OverwriteDefaultConfig { public static final String ICEBERG_CONFIG_PREFIX = "gravitino.iceberg-rest."; + @VisibleForTesting public static final String ICEBERG_EXTENSION_PACKAGES = "extension-packages"; public static final int DEFAULT_ICEBERG_REST_SERVICE_HTTP_PORT = 9001; public static final int DEFAULT_ICEBERG_REST_SERVICE_HTTPS_PORT = 9433; @@ -221,6 +225,14 @@ public class IcebergConfig extends Config implements OverwriteDefaultConfig { .stringConf() .create(); + public static final ConfigEntry> REST_API_EXTENSION_PACKAGES = + new ConfigBuilder(ICEBERG_EXTENSION_PACKAGES) + .doc("Comma-separated list of Iceberg REST API packages to expand") + .version(ConfigConstants.VERSION_0_7_0) + .stringConf() + .toSequence() + .createWithDefault(Collections.emptyList()); + public String getJdbcDriver() { return get(JDBC_DRIVER); } diff --git a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/RESTService.java b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/RESTService.java index 027b2614dfd..0592cfd9421 100644 --- a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/RESTService.java +++ b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/RESTService.java @@ -18,6 +18,8 @@ */ package org.apache.gravitino.iceberg; +import com.google.common.collect.Lists; +import java.util.List; import java.util.Map; import javax.servlet.Servlet; import org.apache.gravitino.GravitinoEnv; @@ -47,6 +49,8 @@ public class RESTService implements GravitinoAuxiliaryService { public static final String SERVICE_NAME = "iceberg-rest"; public static final String ICEBERG_SPEC = "/iceberg/*"; + private static final String ICEBERG_REST_SPEC_PACKAGE = + "org.apache.gravitino.iceberg.service.rest"; private IcebergCatalogWrapperManager icebergCatalogWrapperManager; private IcebergMetricsManager icebergMetricsManager; @@ -58,7 +62,7 @@ private void initServer(IcebergConfig icebergConfig) { server.initialize(serverConfig, SERVICE_NAME, false /* shouldEnableUI */); ResourceConfig config = new ResourceConfig(); - config.packages("org.apache.gravitino.iceberg.service.rest"); + config.packages(getIcebergRESTPackages(icebergConfig)); config.register(IcebergObjectMapperProvider.class).register(JacksonFeature.class); config.register(IcebergExceptionMapper.class); @@ -127,4 +131,10 @@ public void join() { server.join(); } } + + private String[] getIcebergRESTPackages(IcebergConfig icebergConfig) { + List packages = Lists.newArrayList(ICEBERG_REST_SPEC_PACKAGE); + packages.addAll(icebergConfig.get(IcebergConfig.REST_API_EXTENSION_PACKAGES)); + return packages.toArray(new String[0]); + } } diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/TestIcebergExtendAPI.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/TestIcebergExtendAPI.java new file mode 100644 index 00000000000..9d975a6f9c5 --- /dev/null +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/TestIcebergExtendAPI.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.iceberg.integration.test; + +import java.util.HashMap; +import java.util.Map; +import org.apache.gravitino.iceberg.common.IcebergConfig; +import org.apache.gravitino.iceberg.integration.test.util.IcebergRESTServerManager; +import org.apache.gravitino.iceberg.service.extension.HelloOperations; +import org.apache.gravitino.iceberg.service.extension.HelloResponse; +import org.apache.gravitino.server.web.JettyServerConfig; +import org.apache.iceberg.rest.ErrorHandlers; +import org.apache.iceberg.rest.HTTPClient; +import org.apache.iceberg.rest.RESTClient; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.api.condition.EnabledIf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; + +@TestInstance(Lifecycle.PER_CLASS) +// We couldn't add REST extension package jar in deploy mode, so just test embedded mode. +@EnabledIf("org.apache.gravitino.integration.test.util.ITUtils#isEmbedded") +public class TestIcebergExtendAPI { + public static final Logger LOG = LoggerFactory.getLogger(TestIcebergExtendAPI.class); + private IcebergRESTServerManager icebergRESTServerManager; + private String uri; + + @BeforeAll + void initIcebergTestEnv() throws Exception { + this.icebergRESTServerManager = IcebergRESTServerManager.create(); + registerIcebergExtensionPackages(); + icebergRESTServerManager.startIcebergRESTServer(); + this.uri = String.format("http://127.0.0.1:%d/iceberg", getServerPort()); + LOG.info("Gravitino Iceberg REST server started, uri: {}", uri); + } + + @AfterAll + void stopIcebergTestEnv() { + icebergRESTServerManager.stopIcebergRESTServer(); + } + + @Test + void testExtendAPI() { + RESTClient client = HTTPClient.builder(ImmutableMap.of()).uri(uri).build(); + HelloResponse helloResponse = + client.get( + HelloOperations.HELLO_URI_PATH, + HelloResponse.class, + new HashMap(), + ErrorHandlers.defaultErrorHandler()); + Assertions.assertEquals(HelloOperations.HELLO_MSG, helloResponse.msg()); + } + + private void registerIcebergExtensionPackages() { + Map config = + ImmutableMap.of( + IcebergConfig.ICEBERG_CONFIG_PREFIX + IcebergConfig.ICEBERG_EXTENSION_PACKAGES, + HelloOperations.class.getPackage().getName()); + icebergRESTServerManager.registerCustomConfigs(config); + } + + private int getServerPort() { + JettyServerConfig jettyServerConfig = + JettyServerConfig.fromConfig( + icebergRESTServerManager.getServerConfig(), IcebergConfig.ICEBERG_CONFIG_PREFIX); + return jettyServerConfig.getHttpPort(); + } +} diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloOperations.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloOperations.java new file mode 100644 index 00000000000..bac8b30a098 --- /dev/null +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloOperations.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.iceberg.service.extension; + +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.gravitino.iceberg.service.IcebergRestUtils; + +@Path("/hello") +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public class HelloOperations { + public static final String HELLO_URI_PATH = "hello"; + + public static final String HELLO_MSG = "hello"; + + @SuppressWarnings("UnusedVariable") + @Context + private HttpServletRequest httpRequest; + + @GET + @Produces(MediaType.APPLICATION_JSON) + public Response hello() { + return IcebergRestUtils.ok(new HelloResponse(HELLO_MSG)); + } +} diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloResponse.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloResponse.java new file mode 100644 index 00000000000..5db68f7756c --- /dev/null +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/extension/HelloResponse.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.iceberg.service.extension; + +import org.apache.iceberg.rest.RESTResponse; + +public class HelloResponse implements RESTResponse { + + private String msg; + + // Required for Jackson deserialization + public HelloResponse() {} + + public HelloResponse(String msg) { + this.msg = msg; + } + + @Override + public void validate() throws IllegalArgumentException {} + + public String msg() { + return msg; + } +} diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/ITUtils.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/ITUtils.java index 596b0c23c4f..e5454199f8a 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/ITUtils.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/ITUtils.java @@ -30,6 +30,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Properties; import java.util.stream.Collectors; import org.apache.commons.lang3.ArrayUtils; @@ -175,5 +176,14 @@ public static void assertPartition(Partition expected, Partition actual) { } } + public static boolean isEmbedded() { + String mode = + System.getProperty(TEST_MODE) == null + ? EMBEDDED_TEST_MODE + : System.getProperty(ITUtils.TEST_MODE); + + return Objects.equals(mode, ITUtils.EMBEDDED_TEST_MODE); + } + private ITUtils() {} } From a5e564497da0415e79366dbb74354395bdfff0d3 Mon Sep 17 00:00:00 2001 From: roryqi Date: Sun, 29 Sep 2024 17:56:47 +0800 Subject: [PATCH 09/15] [#5033] improvement(docs): Add the documents about access control (#5035) ### What changes were proposed in this pull request? Add the documents about access control. ### Why are the changes needed? Fix: #5033 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Just docs. --- docs/open-api/groups.yaml | 79 ++++++++++++ docs/open-api/openapi.yaml | 24 +++- docs/open-api/permissions.yaml | 152 ++++++++++++++++++++--- docs/open-api/roles.yaml | 69 +++++++++++ docs/open-api/tags.yaml | 21 +--- docs/open-api/users.yaml | 79 ++++++++++++ docs/security/access-control.md | 211 +++++++++++++++++++++++++++++++- 7 files changed, 598 insertions(+), 37 deletions(-) diff --git a/docs/open-api/groups.yaml b/docs/open-api/groups.yaml index a89370cc1ca..e2e8108dafa 100644 --- a/docs/open-api/groups.yaml +++ b/docs/open-api/groups.yaml @@ -23,6 +23,42 @@ paths: parameters: - $ref: "./openapi.yaml#/components/parameters/metalake" + get: + tags: + - access control + summary: List groups (names) + operationId: listGroups + parameters: + - $ref: "#/components/parameters/details" + responses: + "200": + description: Returns the list of groups if {details} is true, otherwise returns the list of group name + content: + application/vnd.gravitino.v1+json: + schema: + oneOf: + - $ref: "#/components/responses/GroupListResponse" + - $ref: "./openapi.yaml#/components/schemas/NameListResponse" + examples: + GroupListResponse: + $ref: "#/components/examples/GroupListResponse" + NameListResponse: + $ref: "#/components/examples/NameListResponse" + "400": + $ref: "./openapi.yaml#/components/responses/BadRequestErrorResponse" + "404": + description: Not Found - The specified metalake does not exist + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/ErrorModel" + examples: + NoSuchMetalakeException: + $ref: "./metalakes.yaml#/components/examples/NoSuchMetalakeException" + + "5xx": + $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" + post: tags: - access control @@ -108,6 +144,15 @@ paths: $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" components: + parameters: + details: + name: details + in: query + description: Include detailed information about the group + required: false + schema: + type: boolean + default: false schemas: Group: @@ -148,12 +193,33 @@ components: group: $ref: "#/components/schemas/Group" + GroupListResponse: + type: object + properties: + code: + type: integer + format: int32 + description: Status code of the response + enum: + - 0 + groups: + type: array + description: A list of group objects + items: + $ref: "#/components/schemas/Group" + examples: GroupAddRequest: value: { "name": "group1", } + NameListResponse: + value: { + "code": 0, + "names": [ "group1", "group2" ] + } + GroupResponse: value: { "code": 0, @@ -167,6 +233,19 @@ components: } } + GroupListResponse: + value: { + "code": 0, + "groups": [{ + "name": "group1", + "roles": [], + "audit": { + "creator": "gravitino", + "createTime": "2023-12-08T06:41:25.595Z" + }, + }] + } + GroupAlreadyExistsException: value: { "code": 1004, diff --git a/docs/open-api/openapi.yaml b/docs/open-api/openapi.yaml index 2c8ab1bfefd..0ce8f23a845 100644 --- a/docs/open-api/openapi.yaml +++ b/docs/open-api/openapi.yaml @@ -65,6 +65,9 @@ paths: /metalakes/{metalake}/objects/{metadataObjectType}/{metadataObjectFullName}/tags: $ref: "./tags.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1objects~1%7BmetadataObjectType%7D~1%7BmetadataObjectFullName%7D~1tags" + /metalakes/{metalake}/objects/{metadataObjectType}/{metadataObjectFullName}/roles: + $ref: "./roles.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1objects~1%7BmetadataObjectType%7D~1%7BmetadataObjectFullName%7D~1roles" + /metalakes/{metalake}/objects/{metadataObjectType}/{metadataObjectFullName}/tags/{tag}: $ref: "./tags.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1objects~1%7BmetadataObjectType%7D~1%7BmetadataObjectFullName%7D~1tags~1%7Btag%7D" @@ -144,6 +147,12 @@ paths: /metalakes/{metalake}/permissions/groups/{group}/revoke: $ref: "./permissions.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1permissions~1groups~1%7Bgroup%7D~1revoke" + /metalakes/{metalake}/permissions/roles/{role}/{metadataObjectType}/{metadataObjectFullName}/grant: + $ref: "./permissions.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1permissions~1roles~1%7Brole%7D~1%7BmetadataObjectType%7D~1%7BmetadataObjectFullName%7D~1grant" + + /metalakes/{metalake}/permissions/roles/{role}/{metadataObjectType}/{metadataObjectFullName}/revoke: + $ref: "./permissions.yaml#/paths/~1metalakes~1%7Bmetalake%7D~1permissions~1roles~1%7Brole%7D~1%7BmetadataObjectType%7D~1%7BmetadataObjectFullName%7D~1revoke" + components: schemas: @@ -208,8 +217,21 @@ components: type: string description: The name of the entity - responses: + NameListResponse: + type: object + properties: + code: + type: integer + format: int32 + description: Status code of the response + enum: + - 0 + names: + type: array + items: + type: string + responses: EntityListResponse: description: A list of entities content: diff --git a/docs/open-api/permissions.yaml b/docs/open-api/permissions.yaml index 6aa2b151870..1a19a9e2be3 100644 --- a/docs/open-api/permissions.yaml +++ b/docs/open-api/permissions.yaml @@ -207,23 +207,108 @@ paths: "5xx": $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" + /metalakes/{metalake}/permissions/roles/{role}/{metadataObjectType}/{metadataObjectFullName}/grant: + parameters: + - $ref: "./openapi.yaml#/components/parameters/metalake" + - $ref: "./openapi.yaml#/components/parameters/role" + - $ref: "./openapi.yaml#/components/parameters/metadataObjectType" + - $ref: "./openapi.yaml#/components/parameters/metadataObjectFullName" + + put: + tags: + - access control + summary: Grant privileges to a role + operationId: grantPrivilegeToRole + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/PrivilegeGrantRequest" + examples: + PrivilegeGrantRequest: + $ref: "#/components/examples/PrivilegeGrantRequest" + + responses: + "200": + description: Returns the granted role object + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./roles.yaml#/components/responses/RoleResponse" + examples: + GroupResponse: + $ref: "./roles.yaml#/components/examples/RoleResponse" + + "404": + description: Not Found - The specified medata object or role does not exist in the specified metalake + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/ErrorModel" + examples: + NoSuchMetalakeException: + $ref: "./metalakes.yaml#/components/examples/NoSuchMetalakeException" + NoSuchMetadataObjectException: + $ref: "./roles.yaml#/components/examples/NoSuchMetadataObjectException" + NoSuchRoleException: + $ref: "./roles.yaml#/components/examples/NoSuchRoleException" + + "5xx": + $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" + + /metalakes/{metalake}/permissions/roles/{role}/{metadataObjectType}/{metadataObjectFullName}/revoke: + parameters: + - $ref: "./openapi.yaml#/components/parameters/metalake" + - $ref: "./openapi.yaml#/components/parameters/role" + - $ref: "./openapi.yaml#/components/parameters/metadataObjectType" + - $ref: "./openapi.yaml#/components/parameters/metadataObjectFullName" + + put: + tags: + - access control + summary: Revoke privileges from a role + operationId: revokePrivilegeFromRole + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/PrivilegeRevokeRequest" + examples: + PrivilegeRevokeRequest: + $ref: "#/components/examples/PrivilegeRevokeRequest" + + responses: + "200": + description: Returns the revoked role object + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./roles.yaml#/components/responses/RoleResponse" + examples: + GroupResponse: + $ref: "./roles.yaml#/components/examples/RoleResponse" + + "404": + description: Not Found - The specified medata object or role does not exist in the specified metalake + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/ErrorModel" + examples: + NoSuchMetalakeException: + $ref: "./metalakes.yaml#/components/examples/NoSuchMetalakeException" + NoSuchMetadataObjectException: + $ref: "./roles.yaml#/components/examples/NoSuchMetadataObjectException" + NoSuchRoleException: + $ref: "./roles.yaml#/components/examples/NoSuchRoleException" + + "5xx": + $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" + + components: schemas: - User: - type: object - required: - - name - properties: - name: - type: string - description: The name of the user - roles: - type: array - items: - type: string - description: The roles of the user - RoleGrantRequest: type: object required: @@ -246,6 +331,27 @@ components: items: type: string + PrivilegeGrantRequest: + type: object + required: + - privileges + properties: + privileges: + type: array + description: The privileges need to grant + items: + $ref: "./roles.yaml#/components/schemas/Privilege" + + PrivilegeRevokeRequest: + type: object + required: + - privileges + properties: + privileges: + type: array + description: The privileges need to revoke + items: + $ref: "./roles.yaml#/components/schemas/Privilege" examples: @@ -257,4 +363,22 @@ components: RoleRevokeRequest: value: { "roleNames": [ "role1" ], + } + + PrivilegeGrantRequest: + value: { + "privileges": [ + { + "name": "SELECT_TABLE", + "condition": "ALLOW" + } ] + } + + PrivilegeRevokeRequest: + value: { + "privileges": [ + { + "name": "SELECT_TABLE", + "condition": "ALLOW" + } ] } \ No newline at end of file diff --git a/docs/open-api/roles.yaml b/docs/open-api/roles.yaml index e5636bb334f..8bc452a2082 100644 --- a/docs/open-api/roles.yaml +++ b/docs/open-api/roles.yaml @@ -23,6 +23,33 @@ paths: parameters: - $ref: "./openapi.yaml#/components/parameters/metalake" + get: + tags: + - access control + summary: List role names + operationId: listRoles + responses: + "200": + description: Returns the list of role name + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/NameListResponse" + examples: + NameListResponse: + $ref: "#/components/examples/NameListResponse" + "400": + $ref: "./openapi.yaml#/components/responses/BadRequestErrorResponse" + "404": + description: Not Found - The specified metalake does not exist + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/ErrorModel" + examples: + NoSuchMetalakeException: + $ref: "./metalakes.yaml#/components/examples/NoSuchMetalakeException" + post: tags: - access control @@ -118,6 +145,42 @@ paths: "5xx": $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" + /metalakes/{metalake}/objects/{metadataObjectType}/{metadataObjectFullName}/roles: + parameters: + - $ref: "./openapi.yaml#/components/parameters/metalake" + - $ref: "./openapi.yaml#/components/parameters/metadataObjectType" + - $ref: "./openapi.yaml#/components/parameters/metadataObjectFullName" + + get: + tags: + - access control + summary: List role names for metadata object + operationId: listRolesForObject + responses: + "200": + description: Returns the list of role name for the metadata object + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/NameListResponse" + examples: + NameListResponse: + $ref: "#/components/examples/NameListResponse" + "400": + $ref: "./openapi.yaml#/components/responses/BadRequestErrorResponse" + "404": + description: Not Found - The specified metalake does not exist + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/ErrorModel" + examples: + NoSuchMetalakeException: + $ref: "./metalakes.yaml#/components/examples/NoSuchMetalakeException" + NoSuchMetadataObjectException: + $ref: "#/components/examples/NoSuchMetadataObjectException" + + components: schemas: @@ -306,4 +369,10 @@ components: "org.apache.gravitino.exceptions.NoSuchUserException: Metadata object does not exist", "..." ] + } + + NameListResponse: + value: { + "code": 0, + "names": [ "user1", "user2" ] } \ No newline at end of file diff --git a/docs/open-api/tags.yaml b/docs/open-api/tags.yaml index 9419b8f6eb6..42d45c2a1d7 100644 --- a/docs/open-api/tags.yaml +++ b/docs/open-api/tags.yaml @@ -36,7 +36,7 @@ paths: application/vnd.gravitino.v1+json: schema: oneOf: - - $ref: "#/components/responses/NameListResponse" + - $ref: "./openapi.yaml#/components/schemas/NameListResponse" - $ref: "#/components/responses/TagListResponse" examples: NameListResponse: @@ -197,7 +197,7 @@ paths: application/vnd.gravitino.v1+json: schema: oneOf: - - $ref: "#/components/responses/NameListResponse" + - $ref: "./openapi.yaml#/components/schemas/NameListResponse" - $ref: "#/components/responses/TagListResponse" examples: NameListResponse: @@ -228,7 +228,7 @@ paths: content: application/vnd.gravitino.v1+json: schema: - $ref: "#/components/responses/NameListResponse" + $ref: "./openapi.yaml#/components/schemas/NameListResponse" examples: NameListResponse: $ref: "#/components/examples/NameListResponse" @@ -517,21 +517,6 @@ components: nullable: true responses: - - NameListResponse: - type: object - properties: - code: - type: integer - format: int32 - description: Status code of the response - enum: - - 0 - names: - type: array - items: - type: string - TagListResponse: type: object properties: diff --git a/docs/open-api/users.yaml b/docs/open-api/users.yaml index 7b439cadffb..40e3a25cb0f 100644 --- a/docs/open-api/users.yaml +++ b/docs/open-api/users.yaml @@ -23,6 +23,42 @@ paths: parameters: - $ref: "./openapi.yaml#/components/parameters/metalake" + get: + tags: + - access control + summary: List users (names) + operationId: listUsers + parameters: + - $ref: "#/components/parameters/details" + responses: + "200": + description: Returns the list of users if {details} is true, otherwise returns the list of user name + content: + application/vnd.gravitino.v1+json: + schema: + oneOf: + - $ref: "#/components/responses/UserListResponse" + - $ref: "./openapi.yaml#/components/schemas/NameListResponse" + examples: + UserListResponse: + $ref: "#/components/examples/UserListResponse" + NameListResponse: + $ref: "#/components/examples/NameListResponse" + "400": + $ref: "./openapi.yaml#/components/responses/BadRequestErrorResponse" + "404": + description: Not Found - The specified metalake does not exist + content: + application/vnd.gravitino.v1+json: + schema: + $ref: "./openapi.yaml#/components/schemas/ErrorModel" + examples: + NoSuchMetalakeException: + $ref: "./metalakes.yaml#/components/examples/NoSuchMetalakeException" + + "5xx": + $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" + post: tags: - access control @@ -108,6 +144,15 @@ paths: $ref: "./openapi.yaml#/components/responses/ServerErrorResponse" components: + parameters: + details: + name: details + in: query + description: Include detailed information about the user + required: false + schema: + type: boolean + default: false schemas: User: @@ -148,12 +193,33 @@ components: user: $ref: "#/components/schemas/User" + UserListResponse: + type: object + properties: + code: + type: integer + format: int32 + description: Status code of the response + enum: + - 0 + users: + type: array + description: A list of user objects + items: + $ref: "#/components/schemas/User" + examples: UserAddRequest: value: { "name": "user1", } + NameListResponse: + value: { + "code": 0, + "names": [ "user1", "user2" ] + } + UserResponse: value: { "code": 0, @@ -167,6 +233,19 @@ components: } } + UserListResponse: + value: { + "code": 0, + "users": [ { + "name": "user1", + "roles": [ ], + "audit": { + "creator": "gravitino", + "createTime": "2023-12-08T06:41:25.595Z" + }, + } ] + } + UserAlreadyExistsException: value: { "code": 1004, diff --git a/docs/security/access-control.md b/docs/security/access-control.md index 91eb53725d5..9d98c0a3701 100644 --- a/docs/security/access-control.md +++ b/docs/security/access-control.md @@ -152,16 +152,16 @@ You can also create a dedicated role for your business by API or the client. ### Permission privileges -| Name | Supports Securable Object | Operation | -|---------------|---------------------------|------------------------| -| MANAGE_GRANTS | Metalake | grant or revoke a role | +| Name | Supports Securable Object | Operation | +|---------------|---------------------------|---------------------------------------------------------------------------------------------------------------| +| MANAGE_GRANTS | Metalake | Manages roles granted to or revoked from the user or group, and privilege granted to or revoked from the role | ### Catalog privileges | Name | Supports Securable Object | Operation | |----------------|---------------------------|------------------| | CREATE_CATALOG | Metalake | Create a catalog | -| USE_CATALOG | Metalake, Catalog | | +| USE_CATALOG | Metalake, Catalog | Use a catalog | :::info @@ -282,6 +282,35 @@ User user = +### List users + +You can list the created users in a metalake. +Returns the list of users if details is true, otherwise returns the list of user name. + + + + +```shell +curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" http://localhost:8090/api/metalakes/test/users/ + +curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" http://localhost:8090/api/metalakes/test/users/?details=true +``` + + + + +```java +GravitinoClient client = ... +String[] usernames = client.listUserNames(); + +User[] users = client.listUsers(); +``` + + + + ### Get a user You can get a user by its name. @@ -358,6 +387,35 @@ Group group = +### List groups + +You can list the created groups in a metalake. +Returns the list of groups if details is true, otherwise returns the list of group name. + + + + +```shell +curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" http://localhost:8090/api/metalakes/test/groups/ + +curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" http://localhost:8090/api/metalakes/test/groups/?details=true +``` + + + + +```java +GravitinoClient client = ... +String[] usernames = client.listGroupNames(); + +User[] users = client.listGroups(); +``` + + + + ### Get a group You can get a group by its name. @@ -457,6 +515,60 @@ Role role = +### List roles + +You can list the created roles in a metalake. + + + + +```shell +curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" http://localhost:8090/api/metalakes/test/roles/ +``` + + + + +```java +GravitinoClient client = ... +String[] usernames = client.listRoleNames(); +``` + + + + +### List roles for the metadata object + +You can list the binding roles for a metadata object in a metalake. + +The request path for REST API is `/api/metalakes/{metalake}/objects/{metadataObjectType}/{metadataObjectName}/roles`. + + + + +```shell +curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ +http://localhost:8090/api/metalakes/test/objects/catalog/catalog1/tags + +curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ +http://localhost:8090/api/metalakes/test/objects/schema/catalog1.schema1/tags +``` + + + + +```java +Catalog catalog1 = .. +String[] roles = catalog1.supportsRoles().listBindingRoleNames(); + +Schema schema1 = ... +String[] roles = schema1.supportsRoles().listBindingRoleNames(); +``` + + + + ### Get a role You can get a role by its name. @@ -507,6 +619,97 @@ boolean deleted = ## Permission Operation +### Grant privileges to a role + +You can grant specific privileges to a role. +The request path for REST API is `/api/metalakes/{metalake}/permissions/roles/{role}/{metadataObjectType}/{metadataObjectName}/grant`. + + + + +```shell +curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "privileges": [ + { + "name": "SELECT_TABLE", + "condition": "ALLOW" + }] +}' http://localhost:8090/api/metalakes/test/permissions/roles/role1/schema/catalog1.schema1/grant + +curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "privileges": [ + { + "name": "SELECT_TABLE", + "condition": "ALLOW" + }] +}' http://localhost:8090/api/metalakes/test/permissions/roles/role1/table/catalog1.schema1.table1/grant +``` + + + + +```java +GravitinoClient client = ... + +// Grant the privilege allowing `SELEC_TABLE` for the `schema` to `role1` +MetadataObject schema = ... +Role role = client.grantPrivilegesToRole("role1", schema, Lists.newArrayList(Privileges.SelectTable.allow())); + +// Grant the privilege allowing `SELEC_TABLE` for the `table` to `role1` +MetadataObject table = ... +Role role = client.grantPrivilegesToRole("role1", table, Lists.newArrayList(Privileges.SelectTable.allow())); +``` + + + +### Revoke privileges from a role + +You can revoke specific privileges from a role. +The request path for REST API is `/api/metalakes/{metalake}/permissions/roles/{role}/{metadataObjectType}/{metadataObjectName}/revoke`. + + + + +```shell +curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "privileges": [ + { + "name": "SELECT_TABLE", + "condition": "ALLOW" + }] +}' http://localhost:8090/api/metalakes/test/permissions/roles/role1/schema/catalog1.schema1/revoke + +curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "privileges": [ + { + "name": "SELECT_TABLE", + "condition": "ALLOW" + }] +}' http://localhost:8090/api/metalakes/test/permissions/roles/role1/table/catalog1.schema1.table1/revoke +``` + + + + +```java +GravitinoClient client = ... + +// Revoke the privilege allowing `SELEC_TABLE` for the `schema` from `role1` +MetadataObject schema = ... +Role role = client.revokePrivilegesFromRole("role1", schema, Lists.newArrayList(Privileges.SelectTable.allow())); + +// Revoke the privilege allowing `SELEC_TABLE` for the `table` from `role1` +MetadataObject table = ... +Role role = client.revokePrivilegesFromRole("role1", table, Lists.newArrayList(Privileges.SelectTable.allow())); + +``` + + + ### Grant roles to a user You can grant specific roles to a user. From 9d0f58c2ef17b81259d4e5e43d840fe84cb42a81 Mon Sep 17 00:00:00 2001 From: roryqi Date: Sun, 29 Sep 2024 19:14:15 +0800 Subject: [PATCH 10/15] [#5045] improvement(docs): Change the version of OpenAPI (#5046) ### What changes were proposed in this pull request? Change the version of OPENAPI ### Why are the changes needed? Fix: #5045 ### Does this PR introduce _any_ user-facing change? No need. ### How was this patch tested? No need. --- docs/open-api/openapi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/open-api/openapi.yaml b/docs/open-api/openapi.yaml index 0ce8f23a845..0b16270c126 100644 --- a/docs/open-api/openapi.yaml +++ b/docs/open-api/openapi.yaml @@ -22,7 +22,7 @@ info: license: name: Apache 2.0 url: https://www.apache.org/licenses/LICENSE-2.0.html - version: 0.4.0 + version: 0.7.0-incubating-SNAPSHOT description: | Defines the specification for the first version of the Gravitino REST API. From 328efb8e2208813e3538befd2098f64a3f7306ac Mon Sep 17 00:00:00 2001 From: Qi Yu Date: Sun, 29 Sep 2024 23:19:48 +0800 Subject: [PATCH 11/15] [#4988] fix(doris-catalog): Fix the missing distribution information when loading Doris tables (#4991) ### What changes were proposed in this pull request? Loading distribution information when obtaining Doris tables. ### Why are the changes needed? It's a bug to be fixed. Fix: #4988 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? IT. --- .../expressions/distributions/Strategy.java | 26 ++++++---- .../catalog/jdbc/JdbcCatalogOperations.java | 1 + .../gravitino/catalog/jdbc/JdbcTable.java | 1 + .../jdbc/operation/JdbcTableOperations.java | 23 ++++++++- .../doris/operation/DorisTableOperations.java | 19 ++++++- .../catalog/doris/utils/DorisUtils.java | 44 +++++++++++++++++ .../integration/test/CatalogDorisIT.java | 43 ++++++++++++++++ .../operation/TestDorisTableOperations.java | 49 +++++++++++++++++++ .../gravitino/dto/rel/PartitionUtils.java | 2 +- docs/jdbc-doris-catalog.md | 9 ++++ ...age-relational-metadata-using-gravitino.md | 8 +-- ...ioning-distribution-sort-order-indexes.md} | 20 ++++---- .../lakehouse-iceberg/00000_create_table.txt | 2 +- 13 files changed, 219 insertions(+), 28 deletions(-) rename docs/{table-partitioning-bucketing-sort-order-indexes.md => table-partitioning-distribution-sort-order-indexes.md} (91%) diff --git a/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Strategy.java b/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Strategy.java index 3e2aa7f4806..76b01fead45 100644 --- a/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Strategy.java +++ b/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Strategy.java @@ -59,15 +59,23 @@ public enum Strategy { * @return The distribution strategy. */ public static Strategy getByName(String name) { - for (Strategy strategy : Strategy.values()) { - if (strategy.name().equalsIgnoreCase(name)) { - return strategy; - } + String upperName = name.toUpperCase(); + switch (upperName) { + case "NONE": + return NONE; + case "HASH": + return HASH; + case "RANGE": + return RANGE; + case "EVEN": + case "RANDOM": + return EVEN; + default: + throw new IllegalArgumentException( + "Invalid distribution strategy: " + + name + + ". Valid values are: " + + Arrays.toString(Strategy.values())); } - throw new IllegalArgumentException( - "Invalid distribution strategy: " - + name - + ". Valid values are: " - + Arrays.toString(Strategy.values())); } } diff --git a/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcCatalogOperations.java b/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcCatalogOperations.java index cbbbd469e37..aacd9f881eb 100644 --- a/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcCatalogOperations.java +++ b/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcCatalogOperations.java @@ -350,6 +350,7 @@ public Table loadTable(NameIdentifier tableIdent) throws NoSuchTableException { .withAuditInfo(load.auditInfo()) .withComment(comment) .withProperties(properties) + .withDistribution(load.distribution()) .withIndexes(load.index()) .withPartitioning(load.partitioning()) .withDatabaseName(databaseName) diff --git a/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcTable.java b/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcTable.java index 54a22186664..f1008f2d039 100644 --- a/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcTable.java +++ b/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/JdbcTable.java @@ -97,6 +97,7 @@ protected JdbcTable internalBuild() { jdbcTable.auditInfo = auditInfo; jdbcTable.columns = columns; jdbcTable.partitioning = partitioning; + jdbcTable.distribution = distribution; jdbcTable.sortOrders = sortOrders; jdbcTable.indexes = indexes; jdbcTable.proxyPlugin = proxyPlugin; diff --git a/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/operation/JdbcTableOperations.java b/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/operation/JdbcTableOperations.java index 2688c6aa192..e65926fd0c2 100644 --- a/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/operation/JdbcTableOperations.java +++ b/catalogs/catalog-jdbc-common/src/main/java/org/apache/gravitino/catalog/jdbc/operation/JdbcTableOperations.java @@ -49,6 +49,7 @@ import org.apache.gravitino.rel.TableChange; import org.apache.gravitino.rel.expressions.Expression; import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.distributions.Distributions; import org.apache.gravitino.rel.expressions.literals.Literals; import org.apache.gravitino.rel.expressions.transforms.Transform; import org.apache.gravitino.rel.expressions.transforms.Transforms; @@ -204,11 +205,15 @@ public JdbcTable load(String databaseName, String tableName) throws NoSuchTableE Transform[] tablePartitioning = getTablePartitioning(connection, databaseName, tableName); jdbcTableBuilder.withPartitioning(tablePartitioning); - // 5.Get table properties + // 5.Get distribution information + Distribution distribution = getDistributionInfo(connection, databaseName, tableName); + jdbcTableBuilder.withDistribution(distribution); + + // 6.Get table properties Map tableProperties = getTableProperties(connection, tableName); jdbcTableBuilder.withProperties(tableProperties); - // 6.Leave the information to the bottom layer to append the table + // 7.Leave the information to the bottom layer to append the table correctJdbcTableFields(connection, databaseName, tableName, jdbcTableBuilder); return jdbcTableBuilder.withTableOperation(this).build(); @@ -236,6 +241,20 @@ protected Transform[] getTablePartitioning( return Transforms.EMPTY_TRANSFORM; } + /** + * Get the distribution information of the table, including the distribution type and the fields + * + * @param connection jdbc connection. + * @param databaseName database name. + * @param tableName table name. + * @return Returns the distribution information of the table. + * @throws SQLException if an error occurs while getting the distribution information. + */ + protected Distribution getDistributionInfo( + Connection connection, String databaseName, String tableName) throws SQLException { + return Distributions.NONE; + } + protected boolean getAutoIncrementInfo(ResultSet resultSet) throws SQLException { return resultSet.getBoolean("IS_AUTOINCREMENT"); } diff --git a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java index 27b2e9c6831..479e3e5dca6 100644 --- a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java +++ b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java @@ -221,7 +221,7 @@ private static void validateDistribution(Distribution distribution, JdbcColumn[] Preconditions.checkArgument( Strategy.HASH == distribution.strategy() || Strategy.EVEN == distribution.strategy(), - "Doris only supports HASH or EVEN distribution strategy"); + "Doris only supports HASH or EVEN(RANDOM) distribution strategy"); if (distribution.strategy() == Strategy.HASH) { // Check if the distribution column exists @@ -235,6 +235,10 @@ private static void validateDistribution(Distribution distribution, JdbcColumn[] "Distribution column " + expression + " does not exist in the table columns")); + } else if (distribution.strategy() == Strategy.EVEN) { + Preconditions.checkArgument( + distribution.expressions().length == 0, + "Doris does not support distribution column in EVEN distribution strategy"); } } @@ -806,4 +810,17 @@ static String deleteIndexDefinition( } return "DROP INDEX " + deleteIndex.getName(); } + + @Override + protected Distribution getDistributionInfo( + Connection connection, String databaseName, String tableName) throws SQLException { + + String showCreateTableSql = String.format("SHOW CREATE TABLE `%s`", tableName); + try (Statement statement = connection.createStatement(); + ResultSet result = statement.executeQuery(showCreateTableSql)) { + result.next(); + String createTableSyntax = result.getString("Create Table"); + return DorisUtils.extractDistributionInfoFromSql(createTableSyntax); + } + } } diff --git a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java index a3d2ccc915c..e0543b8eee2 100644 --- a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java +++ b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java @@ -22,10 +22,15 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import org.apache.gravitino.rel.expressions.NamedReference; +import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.distributions.Distributions.DistributionImpl; +import org.apache.gravitino.rel.expressions.distributions.Strategy; import org.apache.gravitino.rel.expressions.literals.Literal; import org.apache.gravitino.rel.expressions.literals.Literals; import org.apache.gravitino.rel.expressions.transforms.Transform; @@ -40,6 +45,11 @@ public final class DorisUtils { private static final Logger LOGGER = LoggerFactory.getLogger(DorisUtils.class); private static final Pattern PARTITION_INFO_PATTERN = Pattern.compile("PARTITION BY \\b(LIST|RANGE)\\b\\((.+)\\)"); + + private static final Pattern DISTRIBUTION_INFO_PATTERN = + Pattern.compile( + "DISTRIBUTED BY\\s+(HASH|RANDOM)\\s*(\\(([^)]+)\\))?\\s*(BUCKETS\\s+(\\d+))?"); + private static final String LIST_PARTITION = "LIST"; private static final String RANGE_PARTITION = "RANGE"; @@ -176,4 +186,38 @@ private static String generateListPartitionSqlValues(ListPartition listPartition } return String.format("IN (%s)", listValues.build().stream().collect(Collectors.joining(","))); } + + public static Distribution extractDistributionInfoFromSql(String createTableSql) { + Matcher matcher = DISTRIBUTION_INFO_PATTERN.matcher(createTableSql.trim()); + if (matcher.find()) { + String distributionType = matcher.group(1); + + // For Random distribution, no need to specify distribution columns. + String distributionColumns = matcher.group(3); + String[] columns = + Objects.equals(distributionColumns, null) + ? new String[] {} + : Arrays.stream(distributionColumns.split(",")) + .map(String::trim) + .map(f -> f.substring(1, f.length() - 1)) + .toArray(String[]::new); + + // Default bucket number is 1. + int bucketNum = 1; + if (matcher.find(5)) { + bucketNum = Integer.valueOf(matcher.group(5)); + } + + return new DistributionImpl.Builder() + .withStrategy(Strategy.getByName(distributionType)) + .withNumber(bucketNum) + .withExpressions( + Arrays.stream(columns) + .map(col -> NamedReference.field(new String[] {col})) + .toArray(NamedReference[]::new)) + .build(); + } + + throw new RuntimeException("Failed to extract distribution info in sql:" + createTableSql); + } } diff --git a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java index 98478ad2347..d50020c8156 100644 --- a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java +++ b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java @@ -55,6 +55,7 @@ import org.apache.gravitino.rel.Table; import org.apache.gravitino.rel.TableCatalog; import org.apache.gravitino.rel.TableChange; +import org.apache.gravitino.rel.expressions.Expression; import org.apache.gravitino.rel.expressions.NamedReference; import org.apache.gravitino.rel.expressions.distributions.Distribution; import org.apache.gravitino.rel.expressions.distributions.Distributions; @@ -73,6 +74,7 @@ import org.apache.gravitino.utils.RandomNameUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; @@ -895,4 +897,45 @@ void testNonPartitionedTable() { assertThrows( UnsupportedOperationException.class, () -> tablePartitionOperations.dropPartition("p1")); } + + @Test + void testAllDistribution() { + Distribution[] distributions = + new Distribution[] { + Distributions.even(1, Expression.EMPTY_EXPRESSION), + Distributions.hash(1, NamedReference.field(DORIS_COL_NAME1)), + Distributions.even(10, Expression.EMPTY_EXPRESSION), + Distributions.hash(0, NamedReference.field(DORIS_COL_NAME1)), + Distributions.hash(11, NamedReference.field(DORIS_COL_NAME1)), + Distributions.hash( + 12, NamedReference.field(DORIS_COL_NAME1), NamedReference.field(DORIS_COL_NAME2)) + }; + + for (Distribution distribution : distributions) { + String tableName = GravitinoITUtils.genRandomName("test_distribution_table"); + NameIdentifier tableIdentifier = NameIdentifier.of(schemaName, tableName); + Column[] columns = createColumns(); + Index[] indexes = Indexes.EMPTY_INDEXES; + Map properties = createTableProperties(); + Transform[] partitioning = Transforms.EMPTY_TRANSFORM; + TableCatalog tableCatalog = catalog.asTableCatalog(); + tableCatalog.createTable( + tableIdentifier, + columns, + table_comment, + properties, + partitioning, + distribution, + null, + indexes); + // load table + Table loadTable = tableCatalog.loadTable(tableIdentifier); + + Assertions.assertEquals(distribution.strategy(), loadTable.distribution().strategy()); + Assertions.assertArrayEquals( + distribution.expressions(), loadTable.distribution().expressions()); + + tableCatalog.dropTable(tableIdentifier); + } + } } diff --git a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/operation/TestDorisTableOperations.java b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/operation/TestDorisTableOperations.java index 07e6209d9e0..8bc082073aa 100644 --- a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/operation/TestDorisTableOperations.java +++ b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/operation/TestDorisTableOperations.java @@ -38,6 +38,7 @@ import org.apache.gravitino.catalog.jdbc.operation.JdbcTablePartitionOperations; import org.apache.gravitino.integration.test.util.GravitinoITUtils; import org.apache.gravitino.rel.TableChange; +import org.apache.gravitino.rel.expressions.Expression; import org.apache.gravitino.rel.expressions.NamedReference; import org.apache.gravitino.rel.expressions.distributions.Distribution; import org.apache.gravitino.rel.expressions.distributions.Distributions; @@ -94,6 +95,54 @@ private static Map createProperties() { return properties; } + @Test + void testAllDistribution() { + Distribution[] distributions = + new Distribution[] { + Distributions.even(DEFAULT_BUCKET_SIZE, Expression.EMPTY_EXPRESSION), + Distributions.hash(DEFAULT_BUCKET_SIZE, NamedReference.field("col_1")), + Distributions.even(10, Expression.EMPTY_EXPRESSION), + Distributions.hash(0, NamedReference.field("col_1")), + Distributions.hash(11, NamedReference.field("col_1")), + Distributions.hash(12, NamedReference.field("col_1"), NamedReference.field("col_2")) + }; + + for (Distribution distribution : distributions) { + String tableName = GravitinoITUtils.genRandomName("doris_basic_test_table"); + String tableComment = "test_comment"; + List columns = new ArrayList<>(); + JdbcColumn col_1 = + JdbcColumn.builder().withName("col_1").withType(INT).withComment("id").build(); + columns.add(col_1); + JdbcColumn col_2 = + JdbcColumn.builder().withName("col_2").withType(VARCHAR_255).withComment("col_2").build(); + columns.add(col_2); + JdbcColumn col_3 = + JdbcColumn.builder().withName("col_3").withType(VARCHAR_255).withComment("col_3").build(); + columns.add(col_3); + Map properties = new HashMap<>(); + Index[] indexes = new Index[] {}; + + // create table + TABLE_OPERATIONS.create( + databaseName, + tableName, + columns.toArray(new JdbcColumn[0]), + tableComment, + createProperties(), + null, + distribution, + indexes); + JdbcTable load = TABLE_OPERATIONS.load(databaseName, tableName); + assertionsTableInfo( + tableName, tableComment, columns, properties, indexes, Transforms.EMPTY_TRANSFORM, load); + + Assertions.assertEquals(distribution.strategy(), load.distribution().strategy()); + Assertions.assertArrayEquals(distribution.expressions(), load.distribution().expressions()); + TABLE_OPERATIONS.drop(databaseName, tableName); + } + } + @Test public void testBasicTableOperation() { String tableName = GravitinoITUtils.genRandomName("doris_basic_test_table"); diff --git a/common/src/main/java/org/apache/gravitino/dto/rel/PartitionUtils.java b/common/src/main/java/org/apache/gravitino/dto/rel/PartitionUtils.java index dcc60a2b44a..603a15818c2 100644 --- a/common/src/main/java/org/apache/gravitino/dto/rel/PartitionUtils.java +++ b/common/src/main/java/org/apache/gravitino/dto/rel/PartitionUtils.java @@ -46,7 +46,7 @@ public static void validateFieldExistence(ColumnDTO[] columns, String[] fieldNam .filter(c -> c.name().equalsIgnoreCase(fieldName[0])) .collect(Collectors.toList()); Preconditions.checkArgument( - partitionColumn.size() == 1, "partition field %s not found in table", fieldName[0]); + partitionColumn.size() == 1, "Field '%s' not found in table", fieldName[0]); // TODO: should validate nested fieldName after column type support namedStruct } diff --git a/docs/jdbc-doris-catalog.md b/docs/jdbc-doris-catalog.md index 560f0baaead..7a20ddf2193 100644 --- a/docs/jdbc-doris-catalog.md +++ b/docs/jdbc-doris-catalog.md @@ -161,6 +161,15 @@ Note that although Gravitino supports several partitioning strategies, Apache Do The `fieldName` specified in the partitioning attributes must be the name of columns defined in the table. ::: +### Table distribution + +Users can also specify the distribution strategy when creating tables in the Doris catalog. Currently, the Doris catalog supports the following distribution strategies: +- `HASH` +- `RANDOM` + +For the `RANDOM` distribution strategy, Gravitino uses the `EVEN` to represent it. More information about the distribution strategy defined in Gravitino can be found [here](./table-partitioning-distribution-sort-order-indexes.md#table-distribution). + + ### Table operations Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metadata-using-gravitino.md#table-operations) for more details. diff --git a/docs/manage-relational-metadata-using-gravitino.md b/docs/manage-relational-metadata-using-gravitino.md index a5d4d44ece6..cdef8a680f5 100644 --- a/docs/manage-relational-metadata-using-gravitino.md +++ b/docs/manage-relational-metadata-using-gravitino.md @@ -836,14 +836,14 @@ The following is the table property that Gravitino supports: | `jdbc-postgresql` | [PostgreSQL table property](./jdbc-postgresql-catalog.md#table-properties) | [PostgreSQL type mapping](./jdbc-postgresql-catalog.md#table-column-types) | | `doris` | [Doris table property](./jdbc-doris-catalog.md#table-properties) | [Doris type mapping](./jdbc-doris-catalog.md#table-column-types) | -#### Table partitioning, bucketing, sort ordering and indexes +#### Table partitioning, distribution, sort ordering and indexes In addition to the basic settings, Gravitino supports the following features: -| Feature | Description | Java doc | -|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------| +| Feature | Description | Java doc | +|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------| | Table partitioning | Equal to `PARTITION BY` in Apache Hive, It is a partitioning strategy that is used to split a table into parts based on partition keys. Some table engine may not support this feature | [Partition](pathname:///docs/0.6.0-incubating/api/java/org/apache/gravitino/dto/rel/partitioning/Partitioning.html) | -| Table bucketing | Equal to `CLUSTERED BY` in Apache Hive, Bucketing a.k.a (Clustering) is a technique to split the data into more manageable files/parts, (By specifying the number of buckets to create). The value of the bucketing column will be hashed by a user-defined number into buckets. | [Distribution](pathname:///docs/0.6.0-incubating/api/java/org/apache/gravitino/rel/expressions/distributions/Distribution.html) | +| Table distribution | Equal to `CLUSTERED BY` in Apache Hive, distribution a.k.a (Clustering) is a technique to split the data into more manageable files/parts, (By specifying the number of buckets to create). The value of the distribution column will be hashed by a user-defined number into buckets. | [Distribution](pathname:///docs/0.6.0-incubating/api/java/org/apache/gravitino/rel/expressions/distributions/Distribution.html) | | Table sort ordering | Equal to `SORTED BY` in Apache Hive, sort ordering is a method to sort the data in specific ways such as by a column or a function, and then store table data. it will highly improve the query performance under certain scenarios. | [SortOrder](pathname:///docs/0.6.0-incubating/api/java/org/apache/gravitino/rel/expressions/sorts/SortOrder.html) | | Table indexes | Equal to `KEY/INDEX` in MySQL , unique key enforces uniqueness of values in one or more columns within a table. It ensures that no two rows have identical values in specified columns, thereby facilitating data integrity and enabling efficient data retrieval and manipulation operations. | [Index](pathname:///docs/0.6.0-incubating/api/java/org/apache/gravitino/rel/indexes/Index.html) | diff --git a/docs/table-partitioning-bucketing-sort-order-indexes.md b/docs/table-partitioning-distribution-sort-order-indexes.md similarity index 91% rename from docs/table-partitioning-bucketing-sort-order-indexes.md rename to docs/table-partitioning-distribution-sort-order-indexes.md index 1e744b564e1..4ffc56f0215 100644 --- a/docs/table-partitioning-bucketing-sort-order-indexes.md +++ b/docs/table-partitioning-distribution-sort-order-indexes.md @@ -1,6 +1,6 @@ --- -title: "Table partitioning, bucketing and sort ordering and indexes" -slug: /table-partitioning-bucketing-sort-order-indexes +title: "Table partitioning, distribution and sort ordering and indexes" +slug: /table-partitioning-distribution-sort-order-indexes date: 2023-12-25 keyword: Table Partition Bucket Distribute Sort By license: This software is licensed under the Apache License version 2. @@ -45,19 +45,19 @@ For function partitioning, you should provide the function name and the function Once a partitioned table is created, you can [manage its partitions using Gravitino](./manage-table-partition-using-gravitino.md). -## Table bucketing +## Table distribution -To create a bucketed table, you should use the following three components to construct a valid bucketed table. +To create a distribution(bucketed) table, you should use the following three components to construct a valid bucketed table. - Strategy. It defines how Gravitino distributes table data across partitions. -| Bucket strategy | Description | JSON | Java | -|-----------------|---------------------------------------------------------------------------------------------------------------------------|---------|------------------| -| hash | Bucket table using hash. Gravitino distributes table data into buckets based on the hash value of the key. | `hash` | `Strategy.HASH` | -| range | Bucket table using range. Gravitino distributes table data into buckets based on a specified range or interval of values. | `range` | `Strategy.RANGE` | -| even | Bucket table using even. Gravitino distributes table data, ensuring an equal distribution of data. | `even` | `Strategy.EVEN` | +| Distribution strategy | Description | JSON | Java | +|-----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|------------------| +| hash | Distribution table using hash. Gravitino distributes table data into buckets based on the hash value of the key. | `hash` | `Strategy.HASH` | +| range | Distribution table using range. Gravitino distributes table data into buckets based on a specified range or interval of values. | `range` | `Strategy.RANGE` | +| even | Distribution table using even. Gravitino distributes table data, ensuring an equal distribution of data. Currently we use `even` to implementation Doris `random` distribution | `even` | `Strategy.EVEN` | -- number. It defines how many buckets you use to bucket the table. +- number. It defines how many buckets you use to distribution the table. - funcArgs. It defines the arguments of the strategy, the argument must be an [expression](./expression.md). diff --git a/trino-connector/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00000_create_table.txt b/trino-connector/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00000_create_table.txt index 2cea55a45b2..6656e9660a7 100644 --- a/trino-connector/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00000_create_table.txt +++ b/trino-connector/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00000_create_table.txt @@ -24,7 +24,7 @@ WITH ( sorted_by = ARRAY['salary'] )" - partition field salary_wrong_name not found in table + Field 'salary_wrong_name' not found in table CREATE TABLE From 2e5bdbf3bb82016ea882805be9801ccc91b18390 Mon Sep 17 00:00:00 2001 From: Qian Xia Date: Mon, 30 Sep 2024 14:08:17 +0800 Subject: [PATCH 12/15] [#5042] fix(ui): show the expend arrow when reload tree node (#5043) ### What changes were proposed in this pull request? show the expend arrow when reload tree node image image image ### Why are the changes needed? show the correct info after reload tree node Fix: #5042 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? manually --- .../app/metalakes/metalake/MetalakeTree.js | 2 +- web/web/src/lib/store/metalakes/index.js | 26 +++++++++++++++++-- web/web/src/lib/utils/index.js | 4 ++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/web/web/src/app/metalakes/metalake/MetalakeTree.js b/web/web/src/app/metalakes/metalake/MetalakeTree.js index cf1cca7ee93..58b7c138bfb 100644 --- a/web/web/src/app/metalakes/metalake/MetalakeTree.js +++ b/web/web/src/app/metalakes/metalake/MetalakeTree.js @@ -111,7 +111,7 @@ const MetalakeTree = props => { break } default: - dispatch(setIntoTreeNodeWithFetch({ key: nodeProps.data.key })) + dispatch(setIntoTreeNodeWithFetch({ key: nodeProps.data.key, reload: true })) } } diff --git a/web/web/src/lib/store/metalakes/index.js b/web/web/src/lib/store/metalakes/index.js index b0cc84c499b..5dd55501001 100644 --- a/web/web/src/lib/store/metalakes/index.js +++ b/web/web/src/lib/store/metalakes/index.js @@ -96,7 +96,7 @@ export const updateMetalake = createAsyncThunk('appMetalakes/updateMetalake', as export const setIntoTreeNodeWithFetch = createAsyncThunk( 'appMetalakes/setIntoTreeNodeWithFetch', - async ({ key }, { getState, dispatch }) => { + async ({ key, reload }, { getState, dispatch }) => { let result = { key, data: [], @@ -137,20 +137,42 @@ export const setIntoTreeNodeWithFetch = createAsyncThunk( } const { identifiers = [] } = res + const expandedKeys = getState().metalakes.expandedNodes + const loadedNodes = getState().metalakes.loadedNodes + let reloadedEpxpandedKeys = [] + let reloadedKeys = [] result.data = identifiers.map(schemaItem => { + if (reload) { + if (expandedKeys.includes(`{{${metalake}}}{{${catalog}}}{{${type}}}{{${schemaItem.name}}}`)) { + reloadedEpxpandedKeys.push(`{{${metalake}}}{{${catalog}}}{{${type}}}{{${schemaItem.name}}}`) + } + if (loadedNodes.includes(`{{${metalake}}}{{${catalog}}}{{${type}}}{{${schemaItem.name}}}`)) { + reloadedKeys.push(`{{${metalake}}}{{${catalog}}}{{${type}}}{{${schemaItem.name}}}`) + } + } + return { ...schemaItem, node: 'schema', id: `{{${metalake}}}{{${catalog}}}{{${type}}}{{${schemaItem.name}}}`, key: `{{${metalake}}}{{${catalog}}}{{${type}}}{{${schemaItem.name}}}`, path: `?${new URLSearchParams({ metalake, catalog, type, schema: schemaItem.name }).toString()}`, + isLeaf: reload ? false : undefined, name: schemaItem.name, title: schemaItem.name, tables: [], children: [] } }) + if (reloadedEpxpandedKeys.length > 0) { + const epxpanded = expandedKeys.filter(key => !reloadedEpxpandedKeys.includes(key)) + dispatch(resetExpandNode(epxpanded)) + } + if (reloadedKeys.length > 0) { + const loaded = loadedNodes.filter(key => !reloadedKeys.includes(key)) + dispatch(setLoadedNodes(loaded)) + } } else if (pathArr.length === 4 && type === 'relational') { const [err, res] = await to(getTablesApi({ metalake, catalog, schema })) @@ -933,7 +955,7 @@ export const appMetalakesSlice = createSlice({ state.expandedNodes = expandedNodes }, resetExpandNode(state, action) { - state.expandedNodes = [] + state.expandedNodes = action.payload || [] }, resetTableData(state, action) { state.tableData = [] diff --git a/web/web/src/lib/utils/index.js b/web/web/src/lib/utils/index.js index bcb04c66238..1524f1a23a7 100644 --- a/web/web/src/lib/utils/index.js +++ b/web/web/src/lib/utils/index.js @@ -144,12 +144,14 @@ export const updateTreeData = (list = [], key, children = []) => { if (node.key === key) { return { ...node, + isLeaf: children?.length === 0, children } } - if (node.children) { + if (node.children && node.children.length > 0) { return { ...node, + isLeaf: node.children.length === 0, children: updateTreeData(node.children, key, children) } } From f600041b4d7f3e35fb33d44d570a0d82ba467228 Mon Sep 17 00:00:00 2001 From: roryqi Date: Mon, 30 Sep 2024 19:23:17 +0800 Subject: [PATCH 13/15] [#5054] improvement(api,server): Add the check of privileges (#5053) ### What changes were proposed in this pull request? Add the check of privileges ### Why are the changes needed? Fix: #5054 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add some UTs --------- Co-authored-by: Jerry Shao --- .../gravitino/authorization/Privilege.java | 10 + .../gravitino/authorization/Privileges.java | 111 ++++++++++ .../authorization/TestSecurableObjects.java | 191 ++++++++++++++++++ .../dto/authorization/PrivilegeDTO.java | 10 + .../gravitino/dto/util/DTOConverters.java | 15 ++ .../authorization/AuthorizationUtils.java | 133 ++++++++++-- .../server/web/rest/RoleOperations.java | 98 +++------ .../server/web/rest/TestRoleOperations.java | 74 ++++++- 8 files changed, 547 insertions(+), 95 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java index fbfde267151..3ca4107a12d 100644 --- a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java +++ b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java @@ -18,6 +18,7 @@ */ package org.apache.gravitino.authorization; +import org.apache.gravitino.MetadataObject; import org.apache.gravitino.annotation.Unstable; /** @@ -39,6 +40,15 @@ public interface Privilege { */ Condition condition(); + /** + * If the privilege can bind to a securable object, then this method will return true, otherwise + * false. + * + * @param type The securable object type. + * @return It will return true if the privilege can bind to a securable object, otherwise false. + */ + boolean canBindTo(MetadataObject.Type type); + /** The name of this privilege. */ enum Name { /** The privilege to create a catalog. */ diff --git a/api/src/main/java/org/apache/gravitino/authorization/Privileges.java b/api/src/main/java/org/apache/gravitino/authorization/Privileges.java index ef9e441b322..5255bce1c5a 100644 --- a/api/src/main/java/org/apache/gravitino/authorization/Privileges.java +++ b/api/src/main/java/org/apache/gravitino/authorization/Privileges.java @@ -18,11 +18,37 @@ */ package org.apache.gravitino.authorization; +import com.google.common.collect.Sets; import java.util.Objects; +import java.util.Set; +import org.apache.gravitino.MetadataObject; /** The helper class for {@link Privilege}. */ public class Privileges { + private static final Set TABLE_SUPPORTED_TYPES = + Sets.immutableEnumSet( + MetadataObject.Type.METALAKE, + MetadataObject.Type.CATALOG, + MetadataObject.Type.SCHEMA, + MetadataObject.Type.TABLE); + private static final Set TOPIC_SUPPORTED_TYPES = + Sets.immutableEnumSet( + MetadataObject.Type.METALAKE, + MetadataObject.Type.CATALOG, + MetadataObject.Type.SCHEMA, + MetadataObject.Type.TOPIC); + private static final Set SCHEMA_SUPPORTED_TYPES = + Sets.immutableEnumSet( + MetadataObject.Type.METALAKE, MetadataObject.Type.CATALOG, MetadataObject.Type.SCHEMA); + + private static final Set FILESET_SUPPORTED_TYPES = + Sets.immutableEnumSet( + MetadataObject.Type.METALAKE, + MetadataObject.Type.CATALOG, + MetadataObject.Type.SCHEMA, + MetadataObject.Type.FILESET); + /** * Returns the Privilege with allow condition from the string representation. * @@ -241,6 +267,11 @@ public static CreateCatalog allow() { public static CreateCatalog deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return type == MetadataObject.Type.METALAKE; + } } /** The privilege to use a catalog. */ @@ -263,6 +294,11 @@ public static UseCatalog allow() { public static UseCatalog deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return type == MetadataObject.Type.METALAKE || type == MetadataObject.Type.CATALOG; + } } /** The privilege to use a schema. */ @@ -283,6 +319,11 @@ public static UseSchema allow() { public static UseSchema deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return SCHEMA_SUPPORTED_TYPES.contains(type); + } } /** The privilege to create a schema. */ @@ -305,6 +346,11 @@ public static CreateSchema allow() { public static CreateSchema deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return type == MetadataObject.Type.METALAKE || type == MetadataObject.Type.CATALOG; + } } /** The privilege to create a table. */ @@ -327,6 +373,11 @@ public static CreateTable allow() { public static CreateTable deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return SCHEMA_SUPPORTED_TYPES.contains(type); + } } /** The privilege to select data from a table. */ @@ -349,6 +400,11 @@ public static SelectTable allow() { public static SelectTable deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return TABLE_SUPPORTED_TYPES.contains(type); + } } /** The privilege to execute SQL `ALTER`, `INSERT`, `UPDATE`, or `DELETE` for a table. */ @@ -371,6 +427,11 @@ public static ModifyTable allow() { public static ModifyTable deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return TABLE_SUPPORTED_TYPES.contains(type); + } } /** The privilege to create a fileset. */ @@ -393,6 +454,11 @@ public static CreateFileset allow() { public static CreateFileset deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return SCHEMA_SUPPORTED_TYPES.contains(type); + } } /** The privilege to read a fileset. */ @@ -415,6 +481,11 @@ public static ReadFileset allow() { public static ReadFileset deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return FILESET_SUPPORTED_TYPES.contains(type); + } } /** The privilege to write a fileset. */ @@ -437,6 +508,11 @@ public static WriteFileset allow() { public static WriteFileset deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return FILESET_SUPPORTED_TYPES.contains(type); + } } /** The privilege to create a topic. */ @@ -459,6 +535,11 @@ public static CreateTopic allow() { public static CreateTopic deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return SCHEMA_SUPPORTED_TYPES.contains(type); + } } /** The privilege to consume from a topic. */ @@ -481,6 +562,11 @@ public static ConsumeTopic allow() { public static ConsumeTopic deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return TOPIC_SUPPORTED_TYPES.contains(type); + } } /** The privilege to produce to a topic. */ @@ -503,6 +589,11 @@ public static ProduceTopic allow() { public static ProduceTopic deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return TOPIC_SUPPORTED_TYPES.contains(type); + } } /** The privilege to manage users. */ @@ -525,6 +616,11 @@ public static ManageUsers allow() { public static ManageUsers deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return type == MetadataObject.Type.METALAKE; + } } /** The privilege to manage groups. */ @@ -547,6 +643,11 @@ public static ManageGroups allow() { public static ManageGroups deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return type == MetadataObject.Type.METALAKE; + } } /** The privilege to create a role. */ @@ -569,6 +670,11 @@ public static CreateRole allow() { public static CreateRole deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return type == MetadataObject.Type.METALAKE; + } } /** The privilege to grant or revoke a role for the user or the group. */ @@ -591,5 +697,10 @@ public static ManageGrants allow() { public static ManageGrants deny() { return DENY_INSTANCE; } + + @Override + public boolean canBindTo(MetadataObject.Type type) { + return type == MetadataObject.Type.METALAKE; + } } } diff --git a/api/src/test/java/org/apache/gravitino/authorization/TestSecurableObjects.java b/api/src/test/java/org/apache/gravitino/authorization/TestSecurableObjects.java index 82374f676e3..f3066666d9c 100644 --- a/api/src/test/java/org/apache/gravitino/authorization/TestSecurableObjects.java +++ b/api/src/test/java/org/apache/gravitino/authorization/TestSecurableObjects.java @@ -157,4 +157,195 @@ public void testSecurableObjects() { Lists.newArrayList(Privileges.UseSchema.allow()))); Assertions.assertTrue(e.getMessage().contains("the length of names is 3")); } + + @Test + public void testPrivileges() { + Privilege createCatalog = Privileges.CreateCatalog.allow(); + Privilege useCatalog = Privileges.UseCatalog.allow(); + Privilege createSchema = Privileges.CreateSchema.allow(); + Privilege useSchema = Privileges.UseSchema.allow(); + Privilege createTable = Privileges.CreateTable.allow(); + Privilege selectTable = Privileges.SelectTable.allow(); + Privilege modifyTable = Privileges.ModifyTable.allow(); + Privilege createFileset = Privileges.CreateFileset.allow(); + Privilege readFileset = Privileges.ReadFileset.allow(); + Privilege writeFileset = Privileges.WriteFileset.allow(); + Privilege createTopic = Privileges.CreateTopic.allow(); + Privilege consumeTopic = Privileges.ConsumeTopic.allow(); + Privilege produceTopic = Privileges.ProduceTopic.allow(); + Privilege createRole = Privileges.CreateRole.allow(); + Privilege manageUsers = Privileges.ManageUsers.allow(); + Privilege manageGroups = Privileges.ManageGroups.allow(); + Privilege manageGrants = Privileges.ManageGrants.allow(); + + // Test create catalog + Assertions.assertTrue(createCatalog.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertFalse(createCatalog.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertFalse(createCatalog.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(createCatalog.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(createCatalog.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(createCatalog.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(createCatalog.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(createCatalog.canBindTo(MetadataObject.Type.COLUMN)); + + // Test use catalog + Assertions.assertTrue(useCatalog.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(useCatalog.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertFalse(useCatalog.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(useCatalog.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(useCatalog.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(useCatalog.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(useCatalog.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(useCatalog.canBindTo(MetadataObject.Type.COLUMN)); + + // Test create schema + Assertions.assertTrue(createSchema.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(createSchema.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertFalse(createSchema.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(createSchema.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(createSchema.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(createSchema.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(createSchema.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(createSchema.canBindTo(MetadataObject.Type.COLUMN)); + + // Test use schema + Assertions.assertTrue(useSchema.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(useSchema.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(useSchema.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(useSchema.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(useSchema.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(useSchema.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(useSchema.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(useSchema.canBindTo(MetadataObject.Type.COLUMN)); + + // Test create table + Assertions.assertTrue(createTable.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(createTable.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(createTable.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(createTable.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(createTable.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(createTable.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(createTable.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(createTable.canBindTo(MetadataObject.Type.COLUMN)); + + // Test select table + Assertions.assertTrue(selectTable.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(selectTable.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(selectTable.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertTrue(selectTable.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(selectTable.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(selectTable.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(selectTable.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(selectTable.canBindTo(MetadataObject.Type.COLUMN)); + + // Test modify table + Assertions.assertTrue(modifyTable.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(modifyTable.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(modifyTable.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertTrue(modifyTable.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(modifyTable.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(modifyTable.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(modifyTable.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(modifyTable.canBindTo(MetadataObject.Type.COLUMN)); + + // Test create topic + Assertions.assertTrue(createTopic.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(createTopic.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(createTopic.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(createTopic.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(createTopic.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(createTopic.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(createTopic.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(createTopic.canBindTo(MetadataObject.Type.COLUMN)); + + // Test consume topic + Assertions.assertTrue(consumeTopic.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(consumeTopic.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(consumeTopic.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(consumeTopic.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertTrue(consumeTopic.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(consumeTopic.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(consumeTopic.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(consumeTopic.canBindTo(MetadataObject.Type.COLUMN)); + + // Test produce topic + Assertions.assertTrue(produceTopic.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(produceTopic.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(produceTopic.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(produceTopic.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertTrue(produceTopic.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(produceTopic.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(produceTopic.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(produceTopic.canBindTo(MetadataObject.Type.COLUMN)); + + // Test create fileset + Assertions.assertTrue(createFileset.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(createFileset.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(createFileset.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(createFileset.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(createFileset.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(createFileset.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(createFileset.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(createFileset.canBindTo(MetadataObject.Type.COLUMN)); + + // Test read fileset + Assertions.assertTrue(readFileset.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(readFileset.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(readFileset.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(readFileset.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(readFileset.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertTrue(readFileset.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(readFileset.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(readFileset.canBindTo(MetadataObject.Type.COLUMN)); + + // Test write fileset + Assertions.assertTrue(writeFileset.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertTrue(writeFileset.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertTrue(writeFileset.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(writeFileset.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(writeFileset.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertTrue(writeFileset.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(writeFileset.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(writeFileset.canBindTo(MetadataObject.Type.COLUMN)); + + // Test create role + Assertions.assertTrue(createRole.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertFalse(createRole.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertFalse(createRole.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(createRole.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(createRole.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(createRole.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(createRole.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(createRole.canBindTo(MetadataObject.Type.COLUMN)); + + // Test manager users + Assertions.assertTrue(manageUsers.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertFalse(manageUsers.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertFalse(manageUsers.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(manageUsers.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(manageUsers.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(manageUsers.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(manageUsers.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(manageUsers.canBindTo(MetadataObject.Type.COLUMN)); + + // Test manager groups + Assertions.assertTrue(manageGroups.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertFalse(manageGroups.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertFalse(manageGroups.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(manageGroups.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(manageGroups.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(manageGroups.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(manageGroups.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(manageGroups.canBindTo(MetadataObject.Type.COLUMN)); + + // Test manager grants + Assertions.assertTrue(manageGrants.canBindTo(MetadataObject.Type.METALAKE)); + Assertions.assertFalse(manageGrants.canBindTo(MetadataObject.Type.CATALOG)); + Assertions.assertFalse(manageGrants.canBindTo(MetadataObject.Type.SCHEMA)); + Assertions.assertFalse(manageGrants.canBindTo(MetadataObject.Type.TABLE)); + Assertions.assertFalse(manageGrants.canBindTo(MetadataObject.Type.TOPIC)); + Assertions.assertFalse(manageGrants.canBindTo(MetadataObject.Type.FILESET)); + Assertions.assertFalse(manageGrants.canBindTo(MetadataObject.Type.ROLE)); + Assertions.assertFalse(manageGrants.canBindTo(MetadataObject.Type.COLUMN)); + } } diff --git a/common/src/main/java/org/apache/gravitino/dto/authorization/PrivilegeDTO.java b/common/src/main/java/org/apache/gravitino/dto/authorization/PrivilegeDTO.java index d31b693838c..e8554c015ad 100644 --- a/common/src/main/java/org/apache/gravitino/dto/authorization/PrivilegeDTO.java +++ b/common/src/main/java/org/apache/gravitino/dto/authorization/PrivilegeDTO.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import org.apache.gravitino.MetadataObject; import org.apache.gravitino.authorization.Privilege; import org.apache.gravitino.authorization.Privileges; @@ -65,6 +66,15 @@ public Condition condition() { return condition; } + @Override + public boolean canBindTo(MetadataObject.Type type) { + if (Condition.ALLOW.equals(condition)) { + return Privileges.allow(name).canBindTo(type); + } else { + return Privileges.deny(name).canBindTo(type); + } + } + /** @return the builder for creating a new instance of PrivilegeDTO. */ public static Builder builder() { return new Builder(); diff --git a/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java b/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java index 38224493b71..adc1f5f03e0 100644 --- a/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java +++ b/common/src/main/java/org/apache/gravitino/dto/util/DTOConverters.java @@ -32,6 +32,7 @@ import org.apache.gravitino.authorization.Group; import org.apache.gravitino.authorization.Owner; import org.apache.gravitino.authorization.Privilege; +import org.apache.gravitino.authorization.Privileges; import org.apache.gravitino.authorization.Role; import org.apache.gravitino.authorization.SecurableObject; import org.apache.gravitino.authorization.User; @@ -1003,4 +1004,18 @@ public static Transform fromDTO(Partitioning partitioning) { throw new IllegalArgumentException("Unsupported partitioning: " + partitioning.strategy()); } } + + /** + * Converts a Privilege DTO to a Privilege + * + * @param privilegeDTO The privilege DTO to be converted. + * @return The privilege. + */ + public static Privilege fromPrivilegeDTO(PrivilegeDTO privilegeDTO) { + if (privilegeDTO.condition().equals(Privilege.Condition.ALLOW)) { + return Privileges.allow(privilegeDTO.name()); + } else { + return Privileges.deny(privilegeDTO.name()); + } + } } diff --git a/core/src/main/java/org/apache/gravitino/authorization/AuthorizationUtils.java b/core/src/main/java/org/apache/gravitino/authorization/AuthorizationUtils.java index c182980b8ec..5a4a62cd60e 100644 --- a/core/src/main/java/org/apache/gravitino/authorization/AuthorizationUtils.java +++ b/core/src/main/java/org/apache/gravitino/authorization/AuthorizationUtils.java @@ -18,11 +18,14 @@ */ package org.apache.gravitino.authorization; -import com.google.common.collect.Lists; +import static com.google.common.base.Preconditions.checkNotNull; + +import com.google.common.collect.Sets; import java.io.IOException; import java.util.List; import java.util.Set; import java.util.function.Consumer; +import java.util.function.Supplier; import org.apache.gravitino.Catalog; import org.apache.gravitino.Entity; import org.apache.gravitino.EntityStore; @@ -33,6 +36,10 @@ import org.apache.gravitino.catalog.CatalogManager; import org.apache.gravitino.connector.BaseCatalog; import org.apache.gravitino.connector.authorization.AuthorizationPlugin; +import org.apache.gravitino.dto.authorization.PrivilegeDTO; +import org.apache.gravitino.dto.util.DTOConverters; +import org.apache.gravitino.exceptions.NoSuchCatalogException; +import org.apache.gravitino.exceptions.NoSuchMetadataObjectException; import org.apache.gravitino.exceptions.NoSuchMetalakeException; import org.apache.gravitino.utils.MetadataObjectUtil; import org.apache.gravitino.utils.NameIdentifierUtil; @@ -47,15 +54,15 @@ public class AuthorizationUtils { static final String ROLE_DOES_NOT_EXIST_MSG = "Role %s does not exist in th metalake %s"; private static final Logger LOG = LoggerFactory.getLogger(AuthorizationUtils.class); private static final String METALAKE_DOES_NOT_EXIST_MSG = "Metalake %s does not exist"; - - private static final List pluginNotSupportsPrivileges = - Lists.newArrayList( - Privilege.Name.CREATE_CATALOG, - Privilege.Name.USE_CATALOG, - Privilege.Name.MANAGE_GRANTS, - Privilege.Name.MANAGE_USERS, - Privilege.Name.MANAGE_GROUPS, - Privilege.Name.CREATE_ROLE); + private static final Set FILESET_PRIVILEGES = + Sets.immutableEnumSet( + Privilege.Name.CREATE_FILESET, Privilege.Name.WRITE_FILESET, Privilege.Name.READ_FILESET); + private static final Set TABLE_PRIVILEGES = + Sets.immutableEnumSet( + Privilege.Name.CREATE_TABLE, Privilege.Name.MODIFY_TABLE, Privilege.Name.SELECT_TABLE); + private static final Set TOPIC_PRIVILEGES = + Sets.immutableEnumSet( + Privilege.Name.CREATE_TOPIC, Privilege.Name.PRODUCE_TOPIC, Privilege.Name.CONSUME_TOPIC); private AuthorizationUtils() {} @@ -195,11 +202,10 @@ private static void callAuthorizationPluginImpl( } public static boolean needApplyAuthorizationPluginAllCatalogs(SecurableObject securableObject) { - // TODO: Add `supportsSecurableObjects` method for every privilege to simplify this code if (securableObject.type() == MetadataObject.Type.METALAKE) { List privileges = securableObject.privileges(); for (Privilege privilege : privileges) { - if (!pluginNotSupportsPrivileges.contains(privilege.name())) { + if (privilege.canBindTo(MetadataObject.Type.CATALOG)) { return true; } } @@ -207,6 +213,109 @@ public static boolean needApplyAuthorizationPluginAllCatalogs(SecurableObject se return false; } + // Check every securable object whether exists and is imported. + public static void checkSecurableObject(String metalake, MetadataObject object) { + NameIdentifier identifier = MetadataObjectUtil.toEntityIdent(metalake, object); + + Supplier exceptionToThrowSupplier = + () -> + new NoSuchMetadataObjectException( + "Securable object %s doesn't exist", object.fullName()); + + switch (object.type()) { + case METALAKE: + check( + GravitinoEnv.getInstance().metalakeDispatcher().metalakeExists(identifier), + exceptionToThrowSupplier); + break; + + case CATALOG: + check( + GravitinoEnv.getInstance().catalogDispatcher().catalogExists(identifier), + exceptionToThrowSupplier); + break; + + case SCHEMA: + check( + GravitinoEnv.getInstance().schemaDispatcher().schemaExists(identifier), + exceptionToThrowSupplier); + break; + + case FILESET: + check( + GravitinoEnv.getInstance().filesetDispatcher().filesetExists(identifier), + exceptionToThrowSupplier); + break; + + case TABLE: + check( + GravitinoEnv.getInstance().tableDispatcher().tableExists(identifier), + exceptionToThrowSupplier); + break; + + case TOPIC: + check( + GravitinoEnv.getInstance().topicDispatcher().topicExists(identifier), + exceptionToThrowSupplier); + break; + + default: + throw new IllegalArgumentException( + String.format("Doesn't support the type %s", object.type())); + } + } + + public static void checkPrivilege( + PrivilegeDTO privilegeDTO, MetadataObject object, String metalake) { + Privilege privilege = DTOConverters.fromPrivilegeDTO(privilegeDTO); + if (!privilege.canBindTo(object.type())) { + throw new IllegalArgumentException( + String.format( + "Securable object %s type %s don't support binding privilege %s", + object.fullName(), object.type(), privilege)); + } + + if (object.type() == MetadataObject.Type.CATALOG + || object.type() == MetadataObject.Type.SCHEMA) { + NameIdentifier identifier = MetadataObjectUtil.toEntityIdent(metalake, object); + NameIdentifier catalogIdent = NameIdentifierUtil.getCatalogIdentifier(identifier); + try { + if (FILESET_PRIVILEGES.contains(privilege.name())) { + checkCatalogType(catalogIdent, Catalog.Type.FILESET, privilege); + } + + if (TABLE_PRIVILEGES.contains(privilege.name())) { + checkCatalogType(catalogIdent, Catalog.Type.RELATIONAL, privilege); + } + + if (TOPIC_PRIVILEGES.contains(privilege.name())) { + checkCatalogType(catalogIdent, Catalog.Type.MESSAGING, privilege); + } + } catch (NoSuchCatalogException ne) { + throw new NoSuchMetadataObjectException( + "Securable object %s doesn't exist", object.fullName()); + } + } + } + + private static void check( + final boolean expression, Supplier exceptionToThrowSupplier) { + if (!expression) { + throw checkNotNull(exceptionToThrowSupplier).get(); + } + } + + private static void checkCatalogType( + NameIdentifier catalogIdent, Catalog.Type type, Privilege privilege) { + Catalog catalog = GravitinoEnv.getInstance().catalogDispatcher().loadCatalog(catalogIdent); + if (catalog.type() != type) { + throw new IllegalArgumentException( + String.format( + "Catalog %s type %s don't support privilege %s", + catalogIdent, catalog.type(), privilege)); + } + } + private static boolean needApplyAuthorizationPluginAllCatalogs(MetadataObject.Type type) { return type == MetadataObject.Type.METALAKE; } diff --git a/server/src/main/java/org/apache/gravitino/server/web/rest/RoleOperations.java b/server/src/main/java/org/apache/gravitino/server/web/rest/RoleOperations.java index b006471e383..9810ad759e3 100644 --- a/server/src/main/java/org/apache/gravitino/server/web/rest/RoleOperations.java +++ b/server/src/main/java/org/apache/gravitino/server/web/rest/RoleOperations.java @@ -20,8 +20,10 @@ import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; +import com.google.common.collect.Sets; import java.util.Arrays; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.DELETE; @@ -34,20 +36,20 @@ import javax.ws.rs.core.Response; import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.MetadataObject; +import org.apache.gravitino.MetadataObjects; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.authorization.AccessControlDispatcher; import org.apache.gravitino.authorization.AuthorizationUtils; import org.apache.gravitino.authorization.Privilege; -import org.apache.gravitino.authorization.Privileges; import org.apache.gravitino.authorization.SecurableObject; import org.apache.gravitino.authorization.SecurableObjects; +import org.apache.gravitino.dto.authorization.PrivilegeDTO; import org.apache.gravitino.dto.authorization.SecurableObjectDTO; import org.apache.gravitino.dto.requests.RoleCreateRequest; import org.apache.gravitino.dto.responses.DeleteResponse; import org.apache.gravitino.dto.responses.NameListResponse; import org.apache.gravitino.dto.responses.RoleResponse; import org.apache.gravitino.dto.util.DTOConverters; -import org.apache.gravitino.exceptions.NoSuchMetadataObjectException; import org.apache.gravitino.lock.LockType; import org.apache.gravitino.lock.TreeLockUtils; import org.apache.gravitino.metrics.MetricNames; @@ -118,12 +120,29 @@ public Response getRole(@PathParam("metalake") String metalake, @PathParam("role @ResponseMetered(name = "create-role", absolute = true) public Response createRole(@PathParam("metalake") String metalake, RoleCreateRequest request) { try { + return Utils.doAs( httpRequest, () -> { + Set metadataObjects = Sets.newHashSet(); for (SecurableObjectDTO object : request.getSecurableObjects()) { - checkSecurableObject(metalake, object); + MetadataObject metadataObject = + MetadataObjects.parse(object.getFullName(), object.type()); + if (metadataObjects.contains(metadataObject)) { + throw new IllegalArgumentException( + String.format( + "Doesn't support specifying duplicated securable objects %s type %s", + object.fullName(), object.type())); + } else { + metadataObjects.add(metadataObject); + } + + for (Privilege privilege : object.privileges()) { + AuthorizationUtils.checkPrivilege((PrivilegeDTO) privilege, object, metalake); + } + AuthorizationUtils.checkSecurableObject(metalake, object); } + List securableObjects = Arrays.stream(request.getSecurableObjects()) .map( @@ -133,15 +152,9 @@ public Response createRole(@PathParam("metalake") String metalake, RoleCreateReq securableObjectDTO.type(), securableObjectDTO.privileges().stream() .map( - privilege -> { - if (privilege - .condition() - .equals(Privilege.Condition.ALLOW)) { - return Privileges.allow(privilege.name()); - } else { - return Privileges.deny(privilege.name()); - } - }) + privilege -> + DTOConverters.fromPrivilegeDTO( + (PrivilegeDTO) privilege)) .collect(Collectors.toList()))) .collect(Collectors.toList()); @@ -190,65 +203,4 @@ public Response deleteRole( return ExceptionHandlers.handleRoleException(OperationType.DELETE, role, metalake, e); } } - - // Check every securable object whether exists and is imported. - static void checkSecurableObject(String metalake, SecurableObjectDTO object) { - NameIdentifier identifier; - - // Securable object ignores the metalake namespace, so we should add it back. - if (object.type() == MetadataObject.Type.METALAKE) { - identifier = NameIdentifier.parse(object.fullName()); - } else { - identifier = NameIdentifier.parse(String.format("%s.%s", metalake, object.fullName())); - } - - String existErrMsg = "Securable object %s doesn't exist"; - - switch (object.type()) { - case METALAKE: - if (!GravitinoEnv.getInstance().metalakeDispatcher().metalakeExists(identifier)) { - throw new NoSuchMetadataObjectException(existErrMsg, object.fullName()); - } - - break; - - case CATALOG: - if (!GravitinoEnv.getInstance().catalogDispatcher().catalogExists(identifier)) { - throw new NoSuchMetadataObjectException(existErrMsg, object.fullName()); - } - - break; - - case SCHEMA: - if (!GravitinoEnv.getInstance().schemaDispatcher().schemaExists(identifier)) { - throw new NoSuchMetadataObjectException(existErrMsg, object.fullName()); - } - - break; - - case FILESET: - if (!GravitinoEnv.getInstance().filesetDispatcher().filesetExists(identifier)) { - throw new NoSuchMetadataObjectException(existErrMsg, object.fullName()); - } - - break; - case TABLE: - if (!GravitinoEnv.getInstance().tableDispatcher().tableExists(identifier)) { - throw new NoSuchMetadataObjectException(existErrMsg, object.fullName()); - } - - break; - - case TOPIC: - if (!GravitinoEnv.getInstance().topicDispatcher().topicExists(identifier)) { - throw new NoSuchMetadataObjectException(existErrMsg, object.fullName()); - } - - break; - - default: - throw new IllegalArgumentException( - String.format("Doesn't support the type %s", object.type())); - } - } } diff --git a/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java b/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java index a2f0c4847d6..5767464894a 100644 --- a/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java +++ b/server/src/test/java/org/apache/gravitino/server/web/rest/TestRoleOperations.java @@ -24,6 +24,7 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; import static org.mockito.Mockito.when; import com.google.common.collect.Lists; @@ -39,6 +40,7 @@ import org.apache.gravitino.Config; import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.authorization.AccessControlManager; +import org.apache.gravitino.authorization.AuthorizationUtils; import org.apache.gravitino.authorization.Privileges; import org.apache.gravitino.authorization.Role; import org.apache.gravitino.authorization.SecurableObject; @@ -257,6 +259,58 @@ public void testCreateRole() { ErrorResponse errorResponse2 = resp3.readEntity(ErrorResponse.class); Assertions.assertEquals(ErrorConstants.INTERNAL_ERROR_CODE, errorResponse2.getCode()); Assertions.assertEquals(RuntimeException.class.getSimpleName(), errorResponse2.getType()); + + // Test with wrong binding privileges + SecurableObject wrongPrivilegeObject = + SecurableObjects.ofCatalog("wrong", Lists.newArrayList(Privileges.CreateCatalog.allow())); + RoleCreateRequest wrongPriRequest = + new RoleCreateRequest( + "role", + Collections.emptyMap(), + new SecurableObjectDTO[] {DTOConverters.toDTO(wrongPrivilegeObject)}); + + Response wrongPrivilegeResp = + target("/metalakes/metalake1/roles") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .post(Entity.entity(wrongPriRequest, MediaType.APPLICATION_JSON_TYPE)); + + Assertions.assertEquals( + Response.Status.BAD_REQUEST.getStatusCode(), wrongPrivilegeResp.getStatus()); + + ErrorResponse wrongPriErrorResp = wrongPrivilegeResp.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.ILLEGAL_ARGUMENTS_CODE, wrongPriErrorResp.getCode()); + Assertions.assertEquals( + IllegalArgumentException.class.getSimpleName(), wrongPriErrorResp.getType()); + + // Test with empty securable objects request + RoleCreateRequest emptyObjectRequest = + new RoleCreateRequest("role", Collections.emptyMap(), new SecurableObjectDTO[] {}); + + Role emptyObjectRole = + RoleEntity.builder() + .withId(1L) + .withName("empty") + .withProperties(Collections.emptyMap()) + .withSecurableObjects(Collections.emptyList()) + .withAuditInfo( + AuditInfo.builder().withCreator("creator").withCreateTime(Instant.now()).build()) + .build(); + reset(manager); + when(manager.createRole(any(), any(), any(), any())).thenReturn(emptyObjectRole); + + Response emptyObjectResp = + target("/metalakes/metalake1/roles") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .post(Entity.entity(emptyObjectRequest, MediaType.APPLICATION_JSON_TYPE)); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), emptyObjectResp.getStatus()); + Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, emptyObjectResp.getMediaType()); + + RoleResponse emptyObjectResponse = emptyObjectResp.readEntity(RoleResponse.class); + Assertions.assertEquals(0, emptyObjectResponse.getCode()); + Role emptyRoleDTO = emptyObjectResponse.getRole(); + Assertions.assertEquals(emptyRoleDTO.name(), "empty"); } @Test @@ -384,11 +438,11 @@ public void testCheckSecurableObjects() { SecurableObjects.ofCatalog("catalog", Lists.newArrayList(Privileges.UseCatalog.allow())); when(catalogDispatcher.catalogExists(any())).thenReturn(true); Assertions.assertDoesNotThrow( - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(catalog))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(catalog))); when(catalogDispatcher.catalogExists(any())).thenReturn(false); Assertions.assertThrows( NoSuchMetadataObjectException.class, - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(catalog))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(catalog))); // check the schema SecurableObject schema = @@ -396,11 +450,11 @@ public void testCheckSecurableObjects() { catalog, "schema", Lists.newArrayList(Privileges.UseSchema.allow())); when(schemaDispatcher.schemaExists(any())).thenReturn(true); Assertions.assertDoesNotThrow( - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(schema))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(schema))); when(schemaDispatcher.schemaExists(any())).thenReturn(false); Assertions.assertThrows( NoSuchMetadataObjectException.class, - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(schema))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(schema))); // check the table SecurableObject table = @@ -408,11 +462,11 @@ public void testCheckSecurableObjects() { schema, "table", Lists.newArrayList(Privileges.SelectTable.allow())); when(tableDispatcher.tableExists(any())).thenReturn(true); Assertions.assertDoesNotThrow( - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(table))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(table))); when(tableDispatcher.tableExists(any())).thenReturn(false); Assertions.assertThrows( NoSuchMetadataObjectException.class, - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(table))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(table))); // check the topic SecurableObject topic = @@ -420,11 +474,11 @@ public void testCheckSecurableObjects() { schema, "topic", Lists.newArrayList(Privileges.ConsumeTopic.allow())); when(topicDispatcher.topicExists(any())).thenReturn(true); Assertions.assertDoesNotThrow( - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(topic))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(topic))); when(topicDispatcher.topicExists(any())).thenReturn(false); Assertions.assertThrows( NoSuchMetadataObjectException.class, - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(topic))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(topic))); // check the fileset SecurableObject fileset = @@ -432,11 +486,11 @@ public void testCheckSecurableObjects() { schema, "fileset", Lists.newArrayList(Privileges.ReadFileset.allow())); when(filesetDispatcher.filesetExists(any())).thenReturn(true); Assertions.assertDoesNotThrow( - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(fileset))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(fileset))); when(filesetDispatcher.filesetExists(any())).thenReturn(false); Assertions.assertThrows( NoSuchMetadataObjectException.class, - () -> RoleOperations.checkSecurableObject("metalake", DTOConverters.toDTO(fileset))); + () -> AuthorizationUtils.checkSecurableObject("metalake", DTOConverters.toDTO(fileset))); } @Test From 13d1684dd5c5a38b1f57a34851f24024718e3a64 Mon Sep 17 00:00:00 2001 From: theoryxu Date: Tue, 8 Oct 2024 16:39:13 +0800 Subject: [PATCH 14/15] [#4370]feat(iceberg): support view interface for Iceberg REST server (#4937) ### What changes were proposed in this pull request? support view interface for Iceberg REST server ### Why are the changes needed? Fix: #4370 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? 1. add UT 2. manual test --------- Co-authored-by: theoryxu --- docs/iceberg-rest-service.md | 10 +- .../common/ops/IcebergCatalogWrapper.java | 41 +++ .../service/IcebergExceptionMapper.java | 2 + .../service/rest/IcebergViewOperations.java | 149 +++++++++ .../rest/IcebergViewRenameOperations.java | 62 ++++ .../test/IcebergRESTJdbcCatalogIT.java | 2 + .../test/IcebergRESTServiceBaseIT.java | 8 + .../test/IcebergRESTServiceIT.java | 139 ++++++++ .../service/rest/IcebergRestTestUtil.java | 4 + .../iceberg/service/rest/IcebergTestBase.java | 14 + .../rest/TestIcebergViewOperations.java | 311 ++++++++++++++++++ 11 files changed, 741 insertions(+), 1 deletion(-) create mode 100644 iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewOperations.java create mode 100644 iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewRenameOperations.java create mode 100644 iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/TestIcebergViewOperations.java diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 0e6fecd19ad..393845d64f7 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -14,7 +14,6 @@ The Apache Gravitino Iceberg REST Server follows the [Apache Iceberg REST API sp - Supports the Apache Iceberg REST API defined in Iceberg 1.5, and supports all namespace and table interfaces. The following interfaces are not implemented yet: - token - - view - multi table transaction - pagination - Works as a catalog proxy, supporting `Hive` and `JDBC` as catalog backend. @@ -214,6 +213,15 @@ You must download the corresponding JDBC driver to the `iceberg-rest-server/libs If you want to use a custom Iceberg Catalog as `catalog-backend`, you can add a corresponding jar file to the classpath and load a custom Iceberg Catalog implementation by specifying the `catalog-backend-impl` property. +#### View support + +You could access the view interface if using JDBC backend and enable `jdbc.schema-version` property. + +| Configuration item | Description | Default value | Required | Since Version | +|-------------------------------------------------|--------------------------------------------------------------------------------------------|---------------|----------|---------------| +| `gravitino.iceberg-rest.jdbc.schema-version` | The schema version of JDBC catalog backend, setting to `V1` if supporting view operations. | (none) | NO | 0.7.0 | + + #### Multi catalog support The Gravitino Iceberg REST server supports multiple catalogs and offers a configuration-based catalog management system. diff --git a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergCatalogWrapper.java b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergCatalogWrapper.java index 0c7c2914b76..6ff4bf2ce03 100644 --- a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergCatalogWrapper.java +++ b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergCatalogWrapper.java @@ -43,9 +43,11 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.catalog.ViewCatalog; import org.apache.iceberg.rest.CatalogHandlers; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; import org.apache.iceberg.rest.requests.CreateTableRequest; +import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; @@ -55,6 +57,7 @@ import org.apache.iceberg.rest.responses.ListNamespacesResponse; import org.apache.iceberg.rest.responses.ListTablesResponse; import org.apache.iceberg.rest.responses.LoadTableResponse; +import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -116,6 +119,13 @@ private void validateNamespace(Optional namespace) { } } + private ViewCatalog getViewCatalog() { + if (!(catalog instanceof ViewCatalog)) { + throw new UnsupportedOperationException(catalog.name() + " is not support view"); + } + return (ViewCatalog) catalog; + } + public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { validateNamespace(Optional.of(request.namespace())); return CatalogHandlers.createNamespace(asNamespaceCatalog, request); @@ -203,6 +213,37 @@ public LoadTableResponse updateTable(IcebergTableChange icebergTableChange) { return loadTable(icebergTableChange.getTableIdentifier()); } + public LoadViewResponse createView(Namespace namespace, CreateViewRequest request) { + request.validate(); + return CatalogHandlers.createView(getViewCatalog(), namespace, request); + } + + public LoadViewResponse updateView(TableIdentifier viewIdentifier, UpdateTableRequest request) { + request.validate(); + return CatalogHandlers.updateView(getViewCatalog(), viewIdentifier, request); + } + + public LoadViewResponse loadView(TableIdentifier viewIdentifier) { + return CatalogHandlers.loadView(getViewCatalog(), viewIdentifier); + } + + public void dropView(TableIdentifier viewIdentifier) { + CatalogHandlers.dropView(getViewCatalog(), viewIdentifier); + } + + public void renameView(RenameTableRequest request) { + request.validate(); + CatalogHandlers.renameView(getViewCatalog(), request); + } + + public boolean existView(TableIdentifier viewIdentifier) { + return getViewCatalog().viewExists(viewIdentifier); + } + + public ListTablesResponse listView(Namespace namespace) { + return CatalogHandlers.listViews(getViewCatalog(), namespace); + } + @Override public void close() throws Exception { if (catalog instanceof AutoCloseable) { diff --git a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/IcebergExceptionMapper.java b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/IcebergExceptionMapper.java index 95c7bf91ab9..f880f7f7a9f 100644 --- a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/IcebergExceptionMapper.java +++ b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/IcebergExceptionMapper.java @@ -32,6 +32,7 @@ import org.apache.iceberg.exceptions.NoSuchIcebergTableException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.exceptions.NoSuchViewException; import org.apache.iceberg.exceptions.NotAuthorizedException; import org.apache.iceberg.exceptions.ServiceUnavailableException; import org.apache.iceberg.exceptions.UnprocessableEntityException; @@ -57,6 +58,7 @@ public class IcebergExceptionMapper implements ExceptionMapper { .put(NoSuchTableException.class, 404) .put(NoSuchIcebergTableException.class, 404) .put(UnsupportedOperationException.class, 406) + .put(NoSuchViewException.class, 404) .put(AlreadyExistsException.class, 409) .put(CommitFailedException.class, 409) .put(UnprocessableEntityException.class, 422) diff --git a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewOperations.java b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewOperations.java new file mode 100644 index 00000000000..3e46257e22b --- /dev/null +++ b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewOperations.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.iceberg.service.rest; + +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; +import javax.inject.Inject; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; +import javax.ws.rs.GET; +import javax.ws.rs.HEAD; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.gravitino.iceberg.service.IcebergCatalogWrapperManager; +import org.apache.gravitino.iceberg.service.IcebergRestUtils; +import org.apache.gravitino.metrics.MetricNames; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.RESTUtil; +import org.apache.iceberg.rest.requests.CreateViewRequest; +import org.apache.iceberg.rest.requests.UpdateTableRequest; +import org.apache.iceberg.rest.responses.ListTablesResponse; +import org.apache.iceberg.rest.responses.LoadViewResponse; + +@Path("/v1/{prefix:([^/]*/)?}namespaces/{namespace}/views") +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public class IcebergViewOperations { + + private IcebergCatalogWrapperManager icebergCatalogWrapperManager; + + @SuppressWarnings("UnusedVariable") + @Context + private HttpServletRequest httpRequest; + + @Inject + public IcebergViewOperations(IcebergCatalogWrapperManager icebergCatalogWrapperManager) { + this.icebergCatalogWrapperManager = icebergCatalogWrapperManager; + } + + @GET + @Produces(MediaType.APPLICATION_JSON) + @Timed(name = "list-view." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "list-view", absolute = true) + public Response listView( + @PathParam("prefix") String prefix, @PathParam("namespace") String namespace) { + ListTablesResponse response = + icebergCatalogWrapperManager.getOps(prefix).listView(RESTUtil.decodeNamespace(namespace)); + return IcebergRestUtils.ok(response); + } + + @POST + @Produces(MediaType.APPLICATION_JSON) + @Timed(name = "create-view." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "create-view", absolute = true) + public Response createView( + @PathParam("prefix") String prefix, + @PathParam("namespace") String namespace, + CreateViewRequest request) { + LoadViewResponse response = + icebergCatalogWrapperManager + .getOps(prefix) + .createView(RESTUtil.decodeNamespace(namespace), request); + return IcebergRestUtils.ok(response); + } + + @GET + @Path("{view}") + @Produces(MediaType.APPLICATION_JSON) + @Timed(name = "load-view." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "load-view", absolute = true) + public Response loadView( + @PathParam("prefix") String prefix, + @PathParam("namespace") String namespace, + @PathParam("view") String view) { + TableIdentifier viewIdentifier = TableIdentifier.of(RESTUtil.decodeNamespace(namespace), view); + LoadViewResponse response = + icebergCatalogWrapperManager.getOps(prefix).loadView(viewIdentifier); + return IcebergRestUtils.ok(response); + } + + @POST + @Path("{view}") + @Produces(MediaType.APPLICATION_JSON) + @Timed(name = "replace-view." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "replace-view", absolute = true) + public Response replaceView( + @PathParam("prefix") String prefix, + @PathParam("namespace") String namespace, + @PathParam("view") String view, + UpdateTableRequest request) { + TableIdentifier viewIdentifier = TableIdentifier.of(RESTUtil.decodeNamespace(namespace), view); + LoadViewResponse response = + icebergCatalogWrapperManager.getOps(prefix).updateView(viewIdentifier, request); + return IcebergRestUtils.ok(response); + } + + @DELETE + @Path("{view}") + @Produces(MediaType.APPLICATION_JSON) + @Timed(name = "drop-view." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "drop-view", absolute = true) + public Response dropView( + @PathParam("prefix") String prefix, + @PathParam("namespace") String namespace, + @PathParam("view") String view) { + TableIdentifier viewIdentifier = TableIdentifier.of(RESTUtil.decodeNamespace(namespace), view); + icebergCatalogWrapperManager.getOps(prefix).dropView(viewIdentifier); + return IcebergRestUtils.noContent(); + } + + @HEAD + @Path("{view}") + @Produces(MediaType.APPLICATION_JSON) + @Timed(name = "view-exists." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "view-exits", absolute = true) + public Response viewExists( + @PathParam("prefix") String prefix, + @PathParam("namespace") String namespace, + @PathParam("view") String view) { + TableIdentifier tableIdentifier = TableIdentifier.of(RESTUtil.decodeNamespace(namespace), view); + if (icebergCatalogWrapperManager.getOps(prefix).existView(tableIdentifier)) { + return IcebergRestUtils.noContent(); + } else { + return IcebergRestUtils.notExists(); + } + } +} diff --git a/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewRenameOperations.java b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewRenameOperations.java new file mode 100644 index 00000000000..128689d33be --- /dev/null +++ b/iceberg/iceberg-rest-server/src/main/java/org/apache/gravitino/iceberg/service/rest/IcebergViewRenameOperations.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.iceberg.service.rest; + +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; +import javax.inject.Inject; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.gravitino.iceberg.service.IcebergCatalogWrapperManager; +import org.apache.gravitino.iceberg.service.IcebergRestUtils; +import org.apache.gravitino.metrics.MetricNames; +import org.apache.iceberg.rest.requests.RenameTableRequest; + +@Path("/v1/{prefix:([^/]*/)?}views/rename") +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public class IcebergViewRenameOperations { + + @SuppressWarnings("UnusedVariable") + @Context + private HttpServletRequest httpRequest; + + private IcebergCatalogWrapperManager icebergCatalogWrapperManager; + + @Inject + public IcebergViewRenameOperations(IcebergCatalogWrapperManager icebergCatalogWrapperManager) { + this.icebergCatalogWrapperManager = icebergCatalogWrapperManager; + } + + @POST + @Produces(MediaType.APPLICATION_JSON) + @Timed(name = "rename-view." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "rename-view", absolute = true) + public Response renameView(@PathParam("prefix") String prefix, RenameTableRequest request) { + icebergCatalogWrapperManager.getOps(prefix).renameView(request); + return IcebergRestUtils.noContent(); + } +} diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTJdbcCatalogIT.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTJdbcCatalogIT.java index 1dc758a15c3..d53f8022091 100644 --- a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTJdbcCatalogIT.java +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTJdbcCatalogIT.java @@ -68,6 +68,8 @@ public Map getCatalogConfig() { configMap.put( IcebergConfig.ICEBERG_CONFIG_PREFIX + IcebergConfig.JDBC_INIT_TABLES.getKey(), "true"); + configMap.put(IcebergConfig.ICEBERG_CONFIG_PREFIX + "jdbc.schema-version", "V1"); + configMap.put( IcebergConfig.ICEBERG_CONFIG_PREFIX + IcebergConfig.CATALOG_WAREHOUSE.getKey(), GravitinoITUtils.genRandomName( diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceBaseIT.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceBaseIT.java index e562e2783e4..0ba781cabd8 100644 --- a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceBaseIT.java +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceBaseIT.java @@ -76,6 +76,10 @@ boolean catalogTypeNotMemory() { return !catalogType.equals(IcebergCatalogBackend.MEMORY); } + boolean isSupportsViewCatalog() { + return !catalogType.equals(IcebergCatalogBackend.HIVE); + } + abstract void initEnv(); abstract Map getCatalogConfig(); @@ -175,6 +179,10 @@ protected Map getTableInfo(String tableName) { return convertToStringMap(sql("desc table extended " + tableName)); } + protected Map getViewInfo(String viewName) { + return convertToStringMap(sql("desc extended " + viewName)); + } + protected List getTableColumns(String tableName) { List objects = sql("desc table extended " + tableName); List columns = new ArrayList<>(); diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceIT.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceIT.java index eb196b3a444..9b4900f4d75 100644 --- a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceIT.java +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/IcebergRESTServiceIT.java @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -557,4 +558,142 @@ void testRegisterTable() { result = convertToStringMap(sql("SELECT * FROM iceberg_rest_table_test.register_foo2")); Assertions.assertEquals(ImmutableMap.of("1", "a", "2", "b"), result); } + + @Test + @EnabledIf("isSupportsViewCatalog") + void testCreateViewAndDisplayView() { + String originTableName = "iceberg_rest_table_test.create_table_for_view_1"; + String viewName = "iceberg_rest_table_test.test_create_view"; + + sql( + String.format( + "CREATE TABLE %s ( id bigint, data string, ts timestamp) USING iceberg", + originTableName)); + sql(String.format("CREATE VIEW %s AS SELECT * FROM %s", viewName, originTableName)); + + Map viewInfo = getViewInfo(viewName); + Map m = + ImmutableMap.of( + "id", "bigint", + "data", "string", + "ts", "timestamp"); + + checkMapContains(m, viewInfo); + } + + @Test + @EnabledIf("isSupportsViewCatalog") + void testViewProperties() { + String originTableName = "iceberg_rest_table_test.create_table_for_view_2"; + String viewName = "iceberg_rest_table_test.test_create_view_with_properties"; + sql( + String.format( + "CREATE TABLE %s ( id bigint, data string, ts timestamp) USING iceberg", + originTableName)); + + // test create view with properties + sql( + String.format( + "CREATE VIEW %s TBLPROPERTIES ('key1' = 'val1') AS SELECT * FROM %s", + viewName, originTableName)); + + Map viewInfo = getViewInfo(viewName); + Assertions.assertTrue(viewInfo.getOrDefault("View Properties", "").contains("'key1' = 'val1'")); + Assertions.assertFalse( + viewInfo.getOrDefault("View Properties", "").contains("'key2' = 'val2'")); + + // test set properties + sql( + String.format( + "ALTER VIEW %s SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2')", viewName)); + + viewInfo = getViewInfo(viewName); + Assertions.assertTrue(viewInfo.getOrDefault("View Properties", "").contains("'key1' = 'val1'")); + Assertions.assertTrue(viewInfo.getOrDefault("View Properties", "").contains("'key2' = 'val2'")); + + // test unset properties + sql(String.format("ALTER VIEW %s UNSET TBLPROPERTIES ('key1', 'key2')", viewName)); + + viewInfo = getViewInfo(viewName); + Assertions.assertFalse( + viewInfo.getOrDefault("View Properties", "").contains("'key1' = 'val1'")); + Assertions.assertFalse( + viewInfo.getOrDefault("View Properties", "").contains("'key2' = 'val2'")); + } + + @Test + @EnabledIf("isSupportsViewCatalog") + void testDropView() { + String originTableName = "iceberg_rest_table_test.create_table_for_view_3"; + String viewName = "iceberg_rest_table_test.test_drop_view"; + + sql( + String.format( + "CREATE TABLE %s ( id bigint, data string, ts timestamp) USING iceberg", + originTableName)); + sql(String.format("CREATE VIEW %s AS SELECT * FROM %s", viewName, originTableName)); + sql(String.format("DROP VIEW %s", viewName)); + + Assertions.assertThrowsExactly(AnalysisException.class, () -> getViewInfo(viewName)); + Assertions.assertThrowsExactly( + NoSuchViewException.class, () -> sql(String.format("DROP VIEW %s", viewName))); + } + + @Test + @EnabledIf("isSupportsViewCatalog") + void testReplaceView() { + String originTableName = "iceberg_rest_table_test.create_table_for_view_4"; + String viewName = "iceberg_rest_table_test.test_replace_view"; + + sql( + String.format( + "CREATE TABLE %s (id bigint, data string, ts timestamp) USING iceberg", + originTableName)); + sql(String.format("CREATE VIEW %s AS SELECT * FROM %s", viewName, originTableName)); + sql( + String.format( + "CREATE OR REPLACE VIEW %s (updated_id COMMENT 'updated ID') TBLPROPERTIES ('key1' = 'new_val1') AS SELECT id FROM %s", + viewName, originTableName)); + + Map viewInfo = getViewInfo(viewName); + Assertions.assertTrue( + viewInfo.getOrDefault("View Properties", "").contains("'key1' = 'new_val1'")); + Assertions.assertTrue(viewInfo.containsKey("updated_id")); + } + + @Test + @EnabledIf("isSupportsViewCatalog") + void testShowAvailableViews() { + String originTableName = "iceberg_rest_table_test.create_table_for_view_5"; + String viewName1 = "iceberg_rest_table_test.show_available_views_1"; + String viewName2 = "iceberg_rest_table_test.show_available_views_2"; + + sql( + String.format( + "CREATE TABLE %s (id bigint, data string, ts timestamp) USING iceberg", + originTableName)); + sql(String.format("CREATE VIEW %s AS SELECT * FROM %s", viewName1, originTableName)); + sql(String.format("CREATE VIEW %s AS SELECT * FROM %s", viewName2, originTableName)); + + List views = sql("SHOW VIEWS IN iceberg_rest_table_test"); + Assertions.assertEquals(2, views.size()); + } + + @Test + @EnabledIf("isSupportsViewCatalog") + void testShowCreateStatementView() { + String originTableName = "iceberg_rest_table_test.create_table_for_view_6"; + String viewName = "iceberg_rest_table_test.show_create_statement_view"; + + sql( + String.format( + "CREATE TABLE %s (id bigint, data string, ts timestamp) USING iceberg", + originTableName)); + sql(String.format("CREATE VIEW %s AS SELECT * FROM %s", viewName, originTableName)); + + List result = sql(String.format("SHOW CREATE TABLE %s", viewName)); + Assertions.assertEquals(1, result.size()); + Assertions.assertTrue( + Arrays.stream(result.get(0)).findFirst().orElse("").toString().contains(viewName)); + } } diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergRestTestUtil.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergRestTestUtil.java index 8bccdab7c56..4fc645132e1 100644 --- a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergRestTestUtil.java +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergRestTestUtil.java @@ -44,7 +44,11 @@ public class IcebergRestTestUtil { public static final String UPDATE_NAMESPACE_POSTFIX = "properties"; public static final String TEST_NAMESPACE_NAME = "gravitino-test"; public static final String TABLE_PATH = NAMESPACE_PATH + "/" + TEST_NAMESPACE_NAME + "/tables"; + + public static final String VIEW_PATH = NAMESPACE_PATH + "/" + TEST_NAMESPACE_NAME + "/views"; public static final String RENAME_TABLE_PATH = V_1 + "/tables/rename"; + + public static final String RENAME_VIEW_PATH = V_1 + "/views/rename"; public static final String REPORT_METRICS_POSTFIX = "metrics"; public static final boolean DEBUG_SERVER_LOG_ENABLED = true; diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergTestBase.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergTestBase.java index 7d1d80b54e3..03d9a49eb28 100644 --- a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergTestBase.java +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/IcebergTestBase.java @@ -45,16 +45,30 @@ public Invocation.Builder getRenameTableClientBuilder() { return getIcebergClientBuilder(IcebergRestTestUtil.RENAME_TABLE_PATH, Optional.empty()); } + public Invocation.Builder getRenameViewClientBuilder() { + return getIcebergClientBuilder(IcebergRestTestUtil.RENAME_VIEW_PATH, Optional.empty()); + } + public Invocation.Builder getTableClientBuilder() { return getTableClientBuilder(Optional.empty()); } + public Invocation.Builder getViewClientBuilder() { + return getViewClientBuilder(Optional.empty()); + } + public Invocation.Builder getTableClientBuilder(Optional name) { String path = Joiner.on("/").skipNulls().join(IcebergRestTestUtil.TABLE_PATH, name.orElseGet(() -> null)); return getIcebergClientBuilder(path, Optional.empty()); } + public Invocation.Builder getViewClientBuilder(Optional name) { + String path = + Joiner.on("/").skipNulls().join(IcebergRestTestUtil.VIEW_PATH, name.orElseGet(() -> null)); + return getIcebergClientBuilder(path, Optional.empty()); + } + public Invocation.Builder getReportMetricsClientBuilder(String name) { String path = Joiner.on("/") diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/TestIcebergViewOperations.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/TestIcebergViewOperations.java new file mode 100644 index 00000000000..9ec2dc66f46 --- /dev/null +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/service/rest/TestIcebergViewOperations.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.iceberg.service.rest; + +import com.google.common.collect.ImmutableSet; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import javax.ws.rs.client.Entity; +import javax.ws.rs.core.Application; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.iceberg.Schema; +import org.apache.iceberg.UpdateRequirements; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.requests.CreateViewRequest; +import org.apache.iceberg.rest.requests.ImmutableCreateViewRequest; +import org.apache.iceberg.rest.requests.RenameTableRequest; +import org.apache.iceberg.rest.requests.UpdateTableRequest; +import org.apache.iceberg.rest.responses.ListTablesResponse; +import org.apache.iceberg.rest.responses.LoadViewResponse; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.view.ImmutableSQLViewRepresentation; +import org.apache.iceberg.view.ImmutableViewVersion; +import org.apache.iceberg.view.ViewMetadata; +import org.glassfish.jersey.server.ResourceConfig; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestIcebergViewOperations extends TestIcebergNamespaceOperations { + private static final Schema viewSchema = + new Schema(Types.NestedField.of(1, false, "foo_string", Types.StringType.get())); + + private static final Schema newViewSchema = + new Schema(Types.NestedField.of(2, false, "foo_string1", Types.StringType.get())); + + private static final String VIEW_QUERY = "select 1"; + + @Override + protected Application configure() { + ResourceConfig resourceConfig = + IcebergRestTestUtil.getIcebergResourceConfig(IcebergViewOperations.class); + // create namespace before each view test + resourceConfig.register(IcebergNamespaceOperations.class); + resourceConfig.register(IcebergViewRenameOperations.class); + + return resourceConfig; + } + + @ParameterizedTest + @ValueSource(strings = {"", IcebergRestTestUtil.PREFIX}) + void testListViews(String prefix) { + setUrlPathWithPrefix(prefix); + verifyListViewFail(404); + + verifyCreateNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + verifyCreateViewSucc("list_foo1"); + verifyCreateViewSucc("list_foo2"); + verifyLisViewSucc(ImmutableSet.of("list_foo1", "list_foo2")); + } + + @Test + void testCreateView() { + verifyCreateViewFail("create_foo1", 404); + + verifyCreateNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + + verifyCreateViewSucc("create_foo1"); + + verifyCreateViewFail("create_foo1", 409); + verifyCreateViewFail("", 400); + } + + @Test + void testLoadView() { + verifyLoadViewFail("load_foo1", 404); + + verifyCreateNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + verifyCreateViewSucc("load_foo1"); + verifyLoadViewSucc("load_foo1"); + + verifyLoadViewFail("load_foo2", 404); + } + + @Test + void testReplaceView() { + verifyCreateNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + verifyCreateViewSucc("replace_foo1"); + ViewMetadata metadata = getViewMeta("replace_foo1"); + verifyReplaceSucc("replace_foo1", metadata); + + verifyDropViewSucc("replace_foo1"); + verifyUpdateViewFail("replace_foo1", 404, metadata); + + verifyDropNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + verifyUpdateViewFail("replace_foo1", 404, metadata); + } + + @Test + void testDropView() { + verifyDropViewFail("drop_foo1", 404); + verifyCreateNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + verifyDropViewFail("drop_foo1", 404); + + verifyCreateViewSucc("drop_foo1"); + verifyDropViewSucc("drop_foo1"); + verifyLoadViewFail("drop_foo1", 404); + } + + @Test + void testViewExits() { + verifyViewExistsStatusCode("exists_foo2", 404); + verifyCreateNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + verifyViewExistsStatusCode("exists_foo2", 404); + + verifyCreateViewSucc("exists_foo1"); + verifyViewExistsStatusCode("exists_foo1", 204); + verifyLoadViewSucc("exists_foo1"); + } + + @ParameterizedTest + @ValueSource(strings = {"", IcebergRestTestUtil.PREFIX}) + void testRenameTable(String prefix) { + setUrlPathWithPrefix(prefix); + // namespace not exits + verifyRenameViewFail("rename_foo1", "rename_foo3", 404); + + verifyCreateNamespaceSucc(IcebergRestTestUtil.TEST_NAMESPACE_NAME); + verifyCreateViewSucc("rename_foo1"); + // rename + verifyRenameViewSucc("rename_foo1", "rename_foo2"); + verifyLoadViewFail("rename_foo1", 404); + verifyLoadViewSucc("rename_foo2"); + + // source view not exists + verifyRenameViewFail("rename_foo1", "rename_foo3", 404); + + // dest view exists + verifyCreateViewSucc("rename_foo3"); + verifyRenameViewFail("rename_foo2", "rename_foo3", 409); + } + + private Response doCreateView(String name) { + CreateViewRequest createViewRequest = + ImmutableCreateViewRequest.builder() + .name(name) + .schema(viewSchema) + .viewVersion( + ImmutableViewVersion.builder() + .versionId(1) + .timestampMillis(System.currentTimeMillis()) + .schemaId(1) + .defaultNamespace(Namespace.of(IcebergRestTestUtil.TEST_NAMESPACE_NAME)) + .addRepresentations( + ImmutableSQLViewRepresentation.builder() + .sql(VIEW_QUERY) + .dialect("spark") + .build()) + .build()) + .build(); + return getViewClientBuilder() + .post(Entity.entity(createViewRequest, MediaType.APPLICATION_JSON_TYPE)); + } + + private Response doLoadView(String name) { + return getViewClientBuilder(Optional.of(name)).get(); + } + + private void verifyLoadViewSucc(String name) { + Response response = doLoadView(name); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus()); + + LoadViewResponse loadViewResponse = response.readEntity(LoadViewResponse.class); + Assertions.assertEquals(viewSchema.columns(), loadViewResponse.metadata().schema().columns()); + } + + private void verifyCreateViewFail(String name, int status) { + Response response = doCreateView(name); + Assertions.assertEquals(status, response.getStatus()); + } + + private void verifyCreateViewSucc(String name) { + Response response = doCreateView(name); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus()); + LoadViewResponse loadViewResponse = response.readEntity(LoadViewResponse.class); + Schema schema = loadViewResponse.metadata().schema(); + Assertions.assertEquals(schema.columns(), viewSchema.columns()); + } + + private void verifyLoadViewFail(String name, int status) { + Response response = doLoadView(name); + Assertions.assertEquals(status, response.getStatus()); + } + + private void verifyReplaceSucc(String name, ViewMetadata base) { + Response response = doReplaceView(name, base); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus()); + LoadViewResponse loadViewResponse = response.readEntity(LoadViewResponse.class); + Assertions.assertEquals( + newViewSchema.columns(), loadViewResponse.metadata().schema().columns()); + } + + private Response doReplaceView(String name, ViewMetadata base) { + ViewMetadata.Builder builder = + ViewMetadata.buildFrom(base).setCurrentVersion(base.currentVersion(), newViewSchema); + ViewMetadata replacement = builder.build(); + UpdateTableRequest updateTableRequest = + UpdateTableRequest.create( + null, + UpdateRequirements.forReplaceView(base, replacement.changes()), + replacement.changes()); + return getViewClientBuilder(Optional.of(name)) + .post(Entity.entity(updateTableRequest, MediaType.APPLICATION_JSON_TYPE)); + } + + private ViewMetadata getViewMeta(String viewName) { + Response response = doLoadView(viewName); + LoadViewResponse loadViewResponse = response.readEntity(LoadViewResponse.class); + return loadViewResponse.metadata(); + } + + private void verifyUpdateViewFail(String name, int status, ViewMetadata base) { + Response response = doReplaceView(name, base); + Assertions.assertEquals(status, response.getStatus()); + } + + private void verifyDropViewSucc(String name) { + Response response = doDropView(name); + Assertions.assertEquals(Response.Status.NO_CONTENT.getStatusCode(), response.getStatus()); + } + + private Response doDropView(String name) { + return getViewClientBuilder(Optional.of(name)).delete(); + } + + private void verifyDropViewFail(String name, int status) { + Response response = doDropView(name); + Assertions.assertEquals(status, response.getStatus()); + } + + private void verifyViewExistsStatusCode(String name, int status) { + Response response = doViewExists(name); + Assertions.assertEquals(status, response.getStatus()); + } + + private Response doViewExists(String name) { + return getViewClientBuilder(Optional.of(name)).head(); + } + + private void verifyListViewFail(int status) { + Response response = doListView(); + Assertions.assertEquals(status, response.getStatus()); + } + + private Response doListView() { + return getViewClientBuilder().get(); + } + + private void verifyLisViewSucc(Set expectedTableNames) { + Response response = doListView(); + Assertions.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus()); + ListTablesResponse listTablesResponse = response.readEntity(ListTablesResponse.class); + Set tableNames = + listTablesResponse.identifiers().stream() + .map(identifier -> identifier.name()) + .collect(Collectors.toSet()); + Assertions.assertEquals(expectedTableNames, tableNames); + } + + private void verifyRenameViewFail(String source, String dest, int status) { + Response response = doRenameView(source, dest); + Assertions.assertEquals(status, response.getStatus()); + } + + private Response doRenameView(String source, String dest) { + RenameTableRequest renameTableRequest = + RenameTableRequest.builder() + .withSource( + TableIdentifier.of(Namespace.of(IcebergRestTestUtil.TEST_NAMESPACE_NAME), source)) + .withDestination( + TableIdentifier.of(Namespace.of(IcebergRestTestUtil.TEST_NAMESPACE_NAME), dest)) + .build(); + return getRenameViewClientBuilder() + .post(Entity.entity(renameTableRequest, MediaType.APPLICATION_JSON_TYPE)); + } + + private void verifyRenameViewSucc(String source, String dest) { + Response response = doRenameView(source, dest); + Assertions.assertEquals(Response.Status.NO_CONTENT.getStatusCode(), response.getStatus()); + } +} From dd1a9300747b3474d54623a6d23a9490a5f10ddf Mon Sep 17 00:00:00 2001 From: xloya <982052490@qq.com> Date: Tue, 8 Oct 2024 19:00:09 +0800 Subject: [PATCH 15/15] [#4280] improvement(PyGVFS): Refactor the `getFileLocation` logic in the Python GVFS (#5026) ### What changes were proposed in this pull request? Refactor the logic of getting the file location in Python GVFS. ### Why are the changes needed? Fix: #4280 ### How was this patch tested? Refactor the UTs and reserved ITs works well. --- .../gravitino/audit/fileset_data_operation.py | 8 + .../gravitino/filesystem/gvfs.py | 631 ++++---- .../tests/integration/test_gvfs_with_hdfs.py | 4 + .../tests/unittests/test_gvfs_with_local.py | 1356 ++++++++--------- .../gravitino/audit/FilesetDataOperation.java | 4 + 5 files changed, 991 insertions(+), 1012 deletions(-) diff --git a/clients/client-python/gravitino/audit/fileset_data_operation.py b/clients/client-python/gravitino/audit/fileset_data_operation.py index 5f7a5794b70..0428d7111a2 100644 --- a/clients/client-python/gravitino/audit/fileset_data_operation.py +++ b/clients/client-python/gravitino/audit/fileset_data_operation.py @@ -28,6 +28,14 @@ class FilesetDataOperation(Enum): """Opens a file. """ + OPEN_AND_WRITE = "OPEN_AND_WRITE" + """Opens a file and writes to it. + """ + + OPEN_AND_APPEND = "OPEN_AND_APPEND" + """Opens a file and appends to it. + """ + APPEND = "APPEND" """Appends some content into a file. """ diff --git a/clients/client-python/gravitino/filesystem/gvfs.py b/clients/client-python/gravitino/filesystem/gvfs.py index 4870ac505fa..8d98d0a0412 100644 --- a/clients/client-python/gravitino/filesystem/gvfs.py +++ b/clients/client-python/gravitino/filesystem/gvfs.py @@ -21,16 +21,19 @@ import re import fsspec -from cachetools import TTLCache +from cachetools import TTLCache, LRUCache from fsspec import AbstractFileSystem from fsspec.implementations.local import LocalFileSystem from fsspec.implementations.arrow import ArrowFSWrapper from fsspec.utils import infer_storage_options from pyarrow.fs import HadoopFileSystem from readerwriterlock import rwlock -from gravitino.api.catalog import Catalog -from gravitino.api.fileset import Fileset +from gravitino.audit.caller_context import CallerContext, CallerContextHolder +from gravitino.audit.fileset_audit_constants import FilesetAuditConstants +from gravitino.audit.fileset_data_operation import FilesetDataOperation +from gravitino.audit.internal_client_type import InternalClientType from gravitino.auth.simple_auth_provider import SimpleAuthProvider +from gravitino.catalog.fileset_catalog import FilesetCatalog from gravitino.client.gravitino_client import GravitinoClient from gravitino.exceptions.base import GravitinoRuntimeException from gravitino.filesystem.gvfs_config import GVFSConfig @@ -44,39 +47,20 @@ class StorageType(Enum): LOCAL = "file" -class FilesetContext: - """A context object that holds the information about the fileset and the file system which used in +class FilesetContextPair: + """A context object that holds the information about the actual file location and the file system which used in the GravitinoVirtualFileSystem's operations. """ - def __init__( - self, - name_identifier: NameIdentifier, - fileset: Fileset, - fs: AbstractFileSystem, - storage_type: StorageType, - actual_path: str, - ): - self._name_identifier = name_identifier - self._fileset = fileset - self._fs = fs - self._storage_type = storage_type - self._actual_path = actual_path - - def get_name_identifier(self): - return self._name_identifier + def __init__(self, actual_file_location: str, filesystem: AbstractFileSystem): + self._actual_file_location = actual_file_location + self._filesystem = filesystem - def get_fileset(self): - return self._fileset + def actual_file_location(self): + return self._actual_file_location - def get_fs(self): - return self._fs - - def get_actual_path(self): - return self._actual_path - - def get_storage_type(self): - return self._storage_type + def filesystem(self): + return self._filesystem class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem): @@ -136,6 +120,8 @@ def __init__( ) self._cache = TTLCache(maxsize=cache_size, ttl=cache_expired_time) self._cache_lock = rwlock.RWLockFair() + self._catalog_cache = LRUCache(maxsize=100) + self._catalog_cache_lock = rwlock.RWLockFair() super().__init__(**kwargs) @@ -160,28 +146,42 @@ def ls(self, path, detail=True, **kwargs): :param kwargs: Extra args :return If details is true, returns a list of file info dicts, else returns a list of file paths """ - context: FilesetContext = self._get_fileset_context(path) + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.LIST_STATUS + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + pre_process_path: str = self._pre_process_path(path) + identifier: NameIdentifier = self._extract_identifier(pre_process_path) + sub_path: str = self._get_sub_path_from_virtual_path( + identifier, pre_process_path + ) + storage_location: str = actual_path[: len(actual_path) - len(sub_path)] + # return entries with details if detail: - entries = [ - self._convert_actual_info(entry, context) - for entry in context.get_fs().ls( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), - detail=True, + entries = context_pair.filesystem().ls( + self._strip_storage_protocol(storage_type, actual_path), + detail=True, + ) + virtual_entries = [ + self._convert_actual_info( + entry, storage_location, self._get_virtual_location(identifier) ) + for entry in entries ] - return entries - entries = [ - self._convert_actual_path(entry_path, context) - for entry_path in context.get_fs().ls( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), - detail=False, + return virtual_entries + # only returns paths + entry_paths = context_pair.filesystem().ls( + self._strip_storage_protocol(storage_type, actual_path), + detail=False, + ) + virtual_entry_paths = [ + self._convert_actual_path( + entry_path, storage_location, self._get_virtual_location(identifier) ) + for entry_path in entry_paths ] - return entries + return virtual_entry_paths def info(self, path, **kwargs): """Get file info. @@ -189,13 +189,23 @@ def info(self, path, **kwargs): :param kwargs: Extra args :return A file info dict """ - context: FilesetContext = self._get_fileset_context(path) - actual_info: Dict = context.get_fs().info( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ) + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.GET_FILE_STATUS + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + pre_process_path: str = self._pre_process_path(path) + identifier: NameIdentifier = self._extract_identifier(pre_process_path) + sub_path: str = self._get_sub_path_from_virtual_path( + identifier, pre_process_path + ) + storage_location: str = actual_path[: len(actual_path) - len(sub_path)] + actual_info: Dict = context_pair.filesystem().info( + self._strip_storage_protocol(storage_type, actual_path) + ) + return self._convert_actual_info( + actual_info, storage_location, self._get_virtual_location(identifier) ) - return self._convert_actual_info(actual_info, context) def exists(self, path, **kwargs): """Check if a file or a directory exists. @@ -203,11 +213,13 @@ def exists(self, path, **kwargs): :param kwargs: Extra args :return If a file or directory exists, it returns True, otherwise False """ - context: FilesetContext = self._get_fileset_context(path) - return context.get_fs().exists( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ) + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.EXISTS + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + return context_pair.filesystem().exists( + self._strip_storage_protocol(storage_type, actual_path) ) def cp_file(self, path1, path2, **kwargs): @@ -225,24 +237,20 @@ def cp_file(self, path1, path2, **kwargs): f"Destination file path identifier: `{dst_identifier}` should be same with src file path " f"identifier: `{src_identifier}`." ) - src_context: FilesetContext = self._get_fileset_context(src_path) - if self._check_mount_single_file( - src_context.get_fileset(), - src_context.get_fs(), - src_context.get_storage_type(), - ): - raise GravitinoRuntimeException( - f"Cannot cp file of the fileset: {src_identifier} which only mounts to a single file." - ) - dst_context: FilesetContext = self._get_fileset_context(dst_path) + src_context_pair: FilesetContextPair = self._get_fileset_context( + src_path, FilesetDataOperation.COPY_FILE + ) + src_actual_path = src_context_pair.actual_file_location() + + dst_context_pair: FilesetContextPair = self._get_fileset_context( + dst_path, FilesetDataOperation.COPY_FILE + ) + dst_actual_path = dst_context_pair.actual_file_location() - src_context.get_fs().cp_file( - self._strip_storage_protocol( - src_context.get_storage_type(), src_context.get_actual_path() - ), - self._strip_storage_protocol( - dst_context.get_storage_type(), dst_context.get_actual_path() - ), + storage_type = self._recognize_storage_type(src_actual_path) + src_context_pair.filesystem().cp_file( + self._strip_storage_protocol(storage_type, src_actual_path), + self._strip_storage_protocol(storage_type, dst_actual_path), ) def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): @@ -264,39 +272,31 @@ def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): f"Destination file path identifier: `{dst_identifier}`" f" should be same with src file path identifier: `{src_identifier}`." ) - src_context: FilesetContext = self._get_fileset_context(src_path) - if self._check_mount_single_file( - src_context.get_fileset(), - src_context.get_fs(), - src_context.get_storage_type(), - ): - raise GravitinoRuntimeException( - f"Cannot cp file of the fileset: {src_identifier} which only mounts to a single file." - ) - dst_context: FilesetContext = self._get_fileset_context(dst_path) - if src_context.get_storage_type() == StorageType.HDFS: - src_context.get_fs().mv( - self._strip_storage_protocol( - src_context.get_storage_type(), src_context.get_actual_path() - ), - self._strip_storage_protocol( - dst_context.get_storage_type(), dst_context.get_actual_path() - ), + src_context_pair: FilesetContextPair = self._get_fileset_context( + src_path, FilesetDataOperation.RENAME + ) + src_actual_path = src_context_pair.actual_file_location() + storage_type = self._recognize_storage_type(src_actual_path) + dst_context_pair: FilesetContextPair = self._get_fileset_context( + dst_path, FilesetDataOperation.RENAME + ) + dst_actual_path = dst_context_pair.actual_file_location() + + if storage_type == StorageType.HDFS: + src_context_pair.filesystem().mv( + self._strip_storage_protocol(storage_type, src_actual_path), + self._strip_storage_protocol(storage_type, dst_actual_path), ) - elif src_context.get_storage_type() == StorageType.LOCAL: - src_context.get_fs().mv( - self._strip_storage_protocol( - src_context.get_storage_type(), src_context.get_actual_path() - ), - self._strip_storage_protocol( - dst_context.get_storage_type(), dst_context.get_actual_path() - ), + elif storage_type == StorageType.LOCAL: + src_context_pair.filesystem().mv( + self._strip_storage_protocol(storage_type, src_actual_path), + self._strip_storage_protocol(storage_type, dst_actual_path), recursive, maxdepth, ) else: raise GravitinoRuntimeException( - f"Storage type:{src_context.get_storage_type()} doesn't support now." + f"Storage type:{storage_type} doesn't support now." ) def _rm(self, path): @@ -311,11 +311,13 @@ def rm(self, path, recursive=False, maxdepth=None): When removing a directory, this parameter should be True. :param maxdepth: The maximum depth to remove the directory recursively. """ - context: FilesetContext = self._get_fileset_context(path) - context.get_fs().rm( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.DELETE + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + context_pair.filesystem().rm( + self._strip_storage_protocol(storage_type, actual_path), recursive, maxdepth, ) @@ -324,11 +326,13 @@ def rm_file(self, path): """Remove a file. :param path: Virtual fileset path """ - context: FilesetContext = self._get_fileset_context(path) - context.get_fs().rm_file( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ) + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.DELETE + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + context_pair.filesystem().rm_file( + self._strip_storage_protocol(storage_type, actual_path) ) def rmdir(self, path): @@ -337,11 +341,13 @@ def rmdir(self, path): And it will throw an exception if delete a directory which is non-empty for LocalFileSystem. :param path: Virtual fileset path """ - context: FilesetContext = self._get_fileset_context(path) - context.get_fs().rmdir( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ) + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.DELETE + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + context_pair.filesystem().rmdir( + self._strip_storage_protocol(storage_type, actual_path) ) def open( @@ -362,11 +368,19 @@ def open( :param kwargs: Extra args :return A file-like object from the filesystem """ - context: FilesetContext = self._get_fileset_context(path) - return context.get_fs().open( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), + if mode in ("w", "wb"): + data_operation = FilesetDataOperation.OPEN_AND_WRITE + elif mode in ("a", "ab"): + data_operation = FilesetDataOperation.OPEN_AND_APPEND + else: + data_operation = FilesetDataOperation.OPEN + context_pair: FilesetContextPair = self._get_fileset_context( + path, data_operation + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + return context_pair.filesystem().open( + self._strip_storage_protocol(storage_type, actual_path), mode, block_size, cache_options, @@ -382,11 +396,13 @@ def mkdir(self, path, create_parents=True, **kwargs): :param create_parents: Create parent directories if missing when set to True :param kwargs: Extra args """ - context: FilesetContext = self._get_fileset_context(path) - context.get_fs().mkdir( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.MKDIRS + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + context_pair.filesystem().mkdir( + self._strip_storage_protocol(storage_type, actual_path), create_parents, **kwargs, ) @@ -396,11 +412,13 @@ def makedirs(self, path, exist_ok=True): :param path: Virtual fileset path :param exist_ok: Continue if a directory already exists """ - context: FilesetContext = self._get_fileset_context(path) - context.get_fs().makedirs( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.MKDIRS + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + context_pair.filesystem().makedirs( + self._strip_storage_protocol(storage_type, actual_path), exist_ok, ) @@ -410,15 +428,17 @@ def created(self, path): :param path: Virtual fileset path :return Created time(datetime.datetime) """ - context: FilesetContext = self._get_fileset_context(path) - if context.get_storage_type() == StorageType.LOCAL: - return context.get_fs().created( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ) + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.CREATED_TIME + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + if storage_type == StorageType.LOCAL: + return context_pair.filesystem().created( + self._strip_storage_protocol(storage_type, actual_path) ) raise GravitinoRuntimeException( - f"Storage type:{context.get_storage_type()} doesn't support now." + f"Storage type:{storage_type} doesn't support now." ) def modified(self, path): @@ -426,11 +446,13 @@ def modified(self, path): :param path: Virtual fileset path :return Modified time(datetime.datetime) """ - context: FilesetContext = self._get_fileset_context(path) - return context.get_fs().modified( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ) + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.MODIFIED_TIME + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + return context_pair.filesystem().modified( + self._strip_storage_protocol(storage_type, actual_path) ) def cat_file(self, path, start=None, end=None, **kwargs): @@ -441,11 +463,13 @@ def cat_file(self, path, start=None, end=None, **kwargs): :param kwargs: Extra args :return File content """ - context: FilesetContext = self._get_fileset_context(path) - return context.get_fs().cat_file( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), + context_pair: FilesetContextPair = self._get_fileset_context( + path, FilesetDataOperation.CAT_FILE + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + return context_pair.filesystem().cat_file( + self._strip_storage_protocol(storage_type, actual_path), start, end, **kwargs, @@ -465,55 +489,67 @@ def get_file(self, rpath, lpath, callback=None, outfile=None, **kwargs): raise GravitinoRuntimeException( "Doesn't support copy a remote gvfs file to an another remote file." ) - context: FilesetContext = self._get_fileset_context(rpath) - context.get_fs().get_file( - self._strip_storage_protocol( - context.get_storage_type(), context.get_actual_path() - ), + context_pair: FilesetContextPair = self._get_fileset_context( + rpath, FilesetDataOperation.GET_FILE + ) + actual_path = context_pair.actual_file_location() + storage_type = self._recognize_storage_type(actual_path) + context_pair.filesystem().get_file( + self._strip_storage_protocol(storage_type, actual_path), lpath, **kwargs, ) - def _convert_actual_path(self, path, context: FilesetContext): + def _convert_actual_path( + self, + actual_path: str, + storage_location: str, + virtual_location: str, + ): """Convert an actual path to a virtual path. The virtual path is like `fileset/{catalog}/{schema}/{fileset}/xxx`. - :param path: Actual path - :param context: Fileset context + :param actual_path: Actual path + :param storage_location: Storage location + :param virtual_location: Virtual location :return A virtual path """ - if context.get_storage_type() == StorageType.HDFS: - actual_prefix = infer_storage_options( - context.get_fileset().storage_location() - )["path"] - elif context.get_storage_type() == StorageType.LOCAL: - actual_prefix = context.get_fileset().storage_location()[ - len(f"{StorageType.LOCAL.value}:") : - ] + if storage_location.startswith(f"{StorageType.HDFS.value}://"): + actual_prefix = infer_storage_options(storage_location)["path"] + elif storage_location.startswith(f"{StorageType.LOCAL.value}:/"): + actual_prefix = storage_location[len(f"{StorageType.LOCAL.value}:") :] else: raise GravitinoRuntimeException( - f"Storage type:{context.get_storage_type()} doesn't support now." + f"Storage location:{storage_location} doesn't support now." ) - if not path.startswith(actual_prefix): + if not actual_path.startswith(actual_prefix): raise GravitinoRuntimeException( - f"Path {path} does not start with valid prefix {actual_prefix}." + f"Path {actual_path} does not start with valid prefix {actual_prefix}." ) - virtual_location = self._get_virtual_location(context.get_name_identifier()) + # if the storage location is end with "/", # we should truncate this to avoid replace issues. if actual_prefix.endswith(self.SLASH) and not virtual_location.endswith( self.SLASH ): - return f"{path.replace(actual_prefix[:-1], virtual_location)}" - return f"{path.replace(actual_prefix, virtual_location)}" + return f"{actual_path.replace(actual_prefix[:-1], virtual_location)}" + return f"{actual_path.replace(actual_prefix, virtual_location)}" - def _convert_actual_info(self, entry: Dict, context: FilesetContext): + def _convert_actual_info( + self, + entry: Dict, + storage_location: str, + virtual_location: str, + ): """Convert a file info from an actual entry to a virtual entry. :param entry: A dict of the actual file info - :param context: Fileset context + :param storage_location: Storage location + :param virtual_location: Virtual location :return A dict of the virtual file info """ - path = self._convert_actual_path(entry["name"], context) + path = self._convert_actual_path( + entry["name"], storage_location, virtual_location + ) return { "name": path, "size": entry["size"], @@ -521,78 +557,38 @@ def _convert_actual_info(self, entry: Dict, context: FilesetContext): "mtime": entry["mtime"], } - def _get_fileset_context(self, virtual_path: str): + def _get_fileset_context(self, virtual_path: str, operation: FilesetDataOperation): """Get a fileset context from the cache or the Gravitino server :param virtual_path: The virtual path - :return A fileset context + :param operation: The data operation + :return A fileset context pair """ virtual_path: str = self._pre_process_path(virtual_path) identifier: NameIdentifier = self._extract_identifier(virtual_path) - read_lock = self._cache_lock.gen_rlock() - try: - read_lock.acquire() - cache_value: Tuple[Fileset, AbstractFileSystem, StorageType] = ( - self._cache.get(identifier) - ) - if cache_value is not None: - actual_path = self._get_actual_path_by_ident( - identifier, - cache_value[0], - cache_value[1], - cache_value[2], - virtual_path, - ) - return FilesetContext( - identifier, - cache_value[0], - cache_value[1], - cache_value[2], - actual_path, - ) - finally: - read_lock.release() - - write_lock = self._cache_lock.gen_wlock() - try: - write_lock.acquire() - cache_value: Tuple[Fileset, AbstractFileSystem] = self._cache.get( - identifier - ) - if cache_value is not None: - actual_path = self._get_actual_path_by_ident( - identifier, - cache_value[0], - cache_value[1], - cache_value[2], - virtual_path, - ) - return FilesetContext( - identifier, - cache_value[0], - cache_value[1], - cache_value[2], - actual_path, - ) - fileset: Fileset = self._load_fileset_from_server(identifier) - storage_location = fileset.storage_location() - if storage_location.startswith(f"{StorageType.HDFS.value}://"): - fs = ArrowFSWrapper(HadoopFileSystem.from_uri(storage_location)) - storage_type = StorageType.HDFS - elif storage_location.startswith(f"{StorageType.LOCAL.value}:/"): - fs = LocalFileSystem() - storage_type = StorageType.LOCAL - else: - raise GravitinoRuntimeException( - f"Storage under the fileset: `{identifier}` doesn't support now." - ) - actual_path = self._get_actual_path_by_ident( - identifier, fileset, fs, storage_type, virtual_path + catalog_ident: NameIdentifier = NameIdentifier.of( + self._metalake, identifier.namespace().level(1) + ) + fileset_catalog = self._get_fileset_catalog(catalog_ident) + if fileset_catalog is None: + raise GravitinoRuntimeException( + f"Loaded fileset catalog: {catalog_ident} is null." ) - self._cache[identifier] = (fileset, fs, storage_type) - context = FilesetContext(identifier, fileset, fs, storage_type, actual_path) - return context - finally: - write_lock.release() + sub_path: str = self._get_sub_path_from_virtual_path(identifier, virtual_path) + context = { + FilesetAuditConstants.HTTP_HEADER_FILESET_DATA_OPERATION: operation.name, + FilesetAuditConstants.HTTP_HEADER_INTERNAL_CLIENT_TYPE: InternalClientType.PYTHON_GVFS.name, + } + caller_context: CallerContext = CallerContext(context) + CallerContextHolder.set(caller_context) + actual_file_location: ( + str + ) = fileset_catalog.as_fileset_catalog().get_file_location( + NameIdentifier.of(identifier.namespace().level(2), identifier.name()), + sub_path, + ) + return FilesetContextPair( + actual_file_location, self._get_filesystem(actual_file_location) + ) def _extract_identifier(self, path): """Extract the fileset identifier from the path. @@ -613,63 +609,6 @@ def _extract_identifier(self, path): f"path: `{path}` doesn't contains valid identifier." ) - def _load_fileset_from_server(self, identifier: NameIdentifier) -> Fileset: - """Load the fileset from the server. - If the fileset is not found on the server, an `NoSuchFilesetException` exception will be raised. - :param identifier: The fileset identifier - :return The fileset - """ - catalog: Catalog = self._client.load_catalog(identifier.namespace().level(1)) - - return catalog.as_fileset_catalog().load_fileset( - NameIdentifier.of(identifier.namespace().level(2), identifier.name()) - ) - - def _get_actual_path_by_ident( - self, - identifier: NameIdentifier, - fileset: Fileset, - fs: AbstractFileSystem, - storage_type: StorageType, - virtual_path: str, - ): - """Get the actual path by the virtual path and the fileset. - :param identifier: The fileset identifier - :param fileset: The fileset - :param fs: The file system corresponding to the fileset storage location - :param storage_type: The storage type of the fileset storage location - :param virtual_path: The virtual fileset path - :return The actual path. - """ - virtual_location = self._get_virtual_location(identifier) - storage_location = fileset.storage_location() - if self._check_mount_single_file(fileset, fs, storage_type): - if virtual_path != virtual_location: - raise GravitinoRuntimeException( - f"Path: {virtual_path} should be same with the virtual location: {virtual_location}" - " when the fileset only mounts a single file." - ) - return storage_location - # if the storage location ends with "/", - # we should handle the conversion specially - if storage_location.endswith(self.SLASH): - sub_path = virtual_path[len(virtual_location) :] - # For example, if the virtual path is `gvfs://fileset/catalog/schema/test_fileset/ttt`, - # and the storage location is `hdfs://cluster:8020/user/`, - # we should replace `gvfs://fileset/catalog/schema/test_fileset` - # with `hdfs://localhost:8020/user` which truncates the tailing slash. - # If the storage location is `hdfs://cluster:8020/user`, - # we can replace `gvfs://fileset/catalog/schema/test_fileset` - # with `hdfs://localhost:8020/user` directly. - if sub_path.startswith(self.SLASH): - new_storage_location = storage_location[:-1] - else: - new_storage_location = storage_location - - # Replace virtual_location with the adjusted storage_location - return virtual_path.replace(virtual_location, new_storage_location, 1) - return virtual_path.replace(virtual_location, storage_location, 1) - @staticmethod def _get_virtual_location(identifier: NameIdentifier): """Get the virtual location of the fileset. @@ -682,20 +621,6 @@ def _get_virtual_location(identifier: NameIdentifier): f"/{identifier.name()}" ) - def _check_mount_single_file( - self, fileset: Fileset, fs: AbstractFileSystem, storage_type: StorageType - ): - """Check if the fileset is mounted a single file. - :param fileset: The fileset - :param fs: The file system corresponding to the fileset storage location - :param storage_type: The storage type of the fileset storage location - :return True the fileset is mounted a single file. - """ - result: Dict = fs.info( - self._strip_storage_protocol(storage_type, fileset.storage_location()) - ) - return result["type"] == "file" - @staticmethod def _pre_process_path(virtual_path): """Pre-process the path. @@ -719,6 +644,28 @@ def _pre_process_path(virtual_path): ) return pre_processed_path + @staticmethod + def _recognize_storage_type(path: str): + """Recognize the storage type by the path. + :param path: The path + :return: The storage type + """ + if path.startswith(f"{StorageType.HDFS.value}://"): + return StorageType.HDFS + if path.startswith(f"{StorageType.LOCAL.value}:/"): + return StorageType.LOCAL + raise GravitinoRuntimeException( + f"Storage type doesn't support now. Path:{path}" + ) + + @staticmethod + def _get_sub_path_from_virtual_path(identifier: NameIdentifier, virtual_path: str): + return virtual_path[ + len( + f"fileset/{identifier.namespace().level(1)}/{identifier.namespace().level(2)}/{identifier.name()}" + ) : + ] + @staticmethod def _strip_storage_protocol(storage_type: StorageType, path: str): """Strip the storage protocol from the path. @@ -739,5 +686,65 @@ def _strip_storage_protocol(storage_type: StorageType, path: str): f"Storage type:{storage_type} doesn't support now." ) + def _get_fileset_catalog(self, catalog_ident: NameIdentifier): + read_lock = self._catalog_cache_lock.gen_rlock() + try: + read_lock.acquire() + cache_value: Tuple[NameIdentifier, FilesetCatalog] = ( + self._catalog_cache.get(catalog_ident) + ) + if cache_value is not None: + return cache_value + finally: + read_lock.release() + + write_lock = self._catalog_cache_lock.gen_wlock() + try: + write_lock.acquire() + cache_value: Tuple[NameIdentifier, FilesetCatalog] = ( + self._catalog_cache.get(catalog_ident) + ) + if cache_value is not None: + return cache_value + catalog = self._client.load_catalog(catalog_ident.name()) + self._catalog_cache[catalog_ident] = catalog + return catalog + finally: + write_lock.release() + + def _get_filesystem(self, actual_file_location: str): + storage_type = self._recognize_storage_type(actual_file_location) + read_lock = self._cache_lock.gen_rlock() + try: + read_lock.acquire() + cache_value: Tuple[StorageType, AbstractFileSystem] = self._cache.get( + storage_type + ) + if cache_value is not None: + return cache_value + finally: + read_lock.release() + + write_lock = self._cache_lock.gen_wlock() + try: + write_lock.acquire() + cache_value: Tuple[StorageType, AbstractFileSystem] = self._cache.get( + storage_type + ) + if cache_value is not None: + return cache_value + if storage_type == StorageType.HDFS: + fs = ArrowFSWrapper(HadoopFileSystem.from_uri(actual_file_location)) + elif storage_type == StorageType.LOCAL: + fs = LocalFileSystem() + else: + raise GravitinoRuntimeException( + f"Storage type: `{storage_type}` doesn't support now." + ) + self._cache[storage_type] = fs + return fs + finally: + write_lock.release() + fsspec.register_implementation(PROTOCOL_NAME, GravitinoVirtualFileSystem) diff --git a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py index 87b6f102314..9116005b840 100644 --- a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py +++ b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py @@ -364,6 +364,10 @@ def test_mv(self): self.assertTrue(fs.exists(mv_new_file)) self.assertTrue(self.hdfs.exists(mv_new_actual_file)) + # test rename without sub path, which should throw an exception + with self.assertRaises(GravitinoRuntimeException): + fs.mv(self.fileset_gvfs_location, self.fileset_gvfs_location + "/test_mv") + def test_rm(self): rm_dir = self.fileset_gvfs_location + "/test_rm" rm_actual_dir = self.fileset_storage_location + "/test_rm" diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py b/clients/client-python/tests/unittests/test_gvfs_with_local.py index 3b28941dffd..22bdccd8c57 100644 --- a/clients/client-python/tests/unittests/test_gvfs_with_local.py +++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py @@ -14,16 +14,16 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import base64 +import os # pylint: disable=protected-access,too-many-lines,too-many-locals -import base64 -import os import random import string import time import unittest -from unittest import mock +from datetime import datetime from unittest.mock import patch import pandas @@ -31,16 +31,11 @@ import pyarrow.dataset as dt import pyarrow.parquet as pq from fsspec.implementations.local import LocalFileSystem -from llama_index.core import SimpleDirectoryReader -from gravitino import gvfs, Fileset -from gravitino import NameIdentifier +from gravitino import gvfs, NameIdentifier from gravitino.auth.auth_constants import AuthConstants -from gravitino.dto.audit_dto import AuditDTO -from gravitino.dto.fileset_dto import FilesetDTO -from gravitino.filesystem.gvfs import FilesetContext, StorageType from gravitino.exceptions.base import GravitinoRuntimeException - +from gravitino.filesystem.gvfs_config import GVFSConfig from tests.unittests import mock_base @@ -67,42 +62,30 @@ def tearDown(self) -> None: if local_fs.exists(self._local_base_dir_path): local_fs.rm(self._local_base_dir_path, recursive=True) - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_cache", f"{_fileset_dir}/test_cache" - ), - ) def test_cache(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_cache" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_cache" - local_fs.mkdir(fileset_storage_location) - self.assertTrue(local_fs.exists(fileset_storage_location)) - options = {"cache_size": 1, "cache_expired_time": 2} - fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", - metalake_name="metalake_demo", - options=options, - ) - self.assertTrue(fs.exists(fileset_virtual_location)) - # wait 2 seconds - time.sleep(2) - self.assertIsNone( - fs.cache.get( - NameIdentifier.of( - "metalake_demo", "fileset_catalog", "tmp", "test_cache" - ) + actual_path = fileset_storage_location + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) + self.assertTrue(local_fs.exists(fileset_storage_location)) + options = {GVFSConfig.CACHE_SIZE: 1, GVFSConfig.CACHE_EXPIRED_TIME: 1} + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + options=options, + skip_instance_cache=True, ) - ) + self.assertTrue(fs.exists(fileset_virtual_location)) + # wait 2 seconds + time.sleep(2) + self.assertIsNone(fs._cache.get("file:/")) - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_simple_auth", f"{_fileset_dir}/test_simple_auth" - ), - ) - def test_simple_auth(self, mock_method1, mock_method2, mock_method3, mock_method4): + def test_simple_auth(self, *mock_methods): options = {"auth_type": "simple"} current_user = ( None if os.environ.get("user.name") is None else os.environ["user.name"] @@ -113,6 +96,7 @@ def test_simple_auth(self, mock_method1, mock_method2, mock_method3, mock_method server_uri="http://localhost:9090", metalake_name="metalake_demo", options=options, + skip_instance_cache=True, ) token = fs._client._rest_client.auth_data_provider.get_token_data() token_string = base64.b64decode( @@ -122,60 +106,59 @@ def test_simple_auth(self, mock_method1, mock_method2, mock_method3, mock_method if current_user is not None: os.environ["user.name"] = current_user - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset("test_ls", f"{_fileset_dir}/test_ls"), - ) def test_ls(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_ls" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_ls" - local_fs.mkdir(fileset_storage_location) - sub_dir_path = f"{fileset_storage_location}/test_1" - local_fs.mkdir(sub_dir_path) - self.assertTrue(local_fs.exists(sub_dir_path)) - sub_file_path = f"{fileset_storage_location}/test_file_1.par" - local_fs.touch(sub_file_path) - self.assertTrue(local_fs.exists(sub_file_path)) - - fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" - ) - self.assertTrue(fs.exists(fileset_virtual_location)) - - # test detail = false - file_list_without_detail = fs.ls(fileset_virtual_location, detail=False) - file_list_without_detail.sort() - self.assertEqual(2, len(file_list_without_detail)) - self.assertEqual( - file_list_without_detail[0], f"{fileset_virtual_location}/test_1" - ) - self.assertEqual( - file_list_without_detail[1], f"{fileset_virtual_location}/test_file_1.par" - ) + actual_path = fileset_storage_location + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) + sub_dir_path = f"{fileset_storage_location}/test_1" + local_fs.mkdir(sub_dir_path) + self.assertTrue(local_fs.exists(sub_dir_path)) + sub_file_path = f"{fileset_storage_location}/test_file_1.par" + local_fs.touch(sub_file_path) + self.assertTrue(local_fs.exists(sub_file_path)) + + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, + ) + self.assertTrue(fs.exists(fileset_virtual_location)) + + # test detail = false + file_list_without_detail = fs.ls(fileset_virtual_location, detail=False) + file_list_without_detail.sort() + self.assertEqual(2, len(file_list_without_detail)) + self.assertEqual( + file_list_without_detail[0], f"{fileset_virtual_location}/test_1" + ) + self.assertEqual( + file_list_without_detail[1], + f"{fileset_virtual_location}/test_file_1.par", + ) - # test detail = true - file_list_with_detail = fs.ls(fileset_virtual_location, detail=True) - file_list_with_detail.sort(key=lambda x: x["name"]) - self.assertEqual(2, len(file_list_with_detail)) - self.assertEqual( - file_list_with_detail[0]["name"], f"{fileset_virtual_location}/test_1" - ) - self.assertEqual( - file_list_with_detail[1]["name"], - f"{fileset_virtual_location}/test_file_1.par", - ) + # test detail = true + file_list_with_detail = fs.ls(fileset_virtual_location, detail=True) + file_list_with_detail.sort(key=lambda x: x["name"]) + self.assertEqual(2, len(file_list_with_detail)) + self.assertEqual( + file_list_with_detail[0]["name"], f"{fileset_virtual_location}/test_1" + ) + self.assertEqual( + file_list_with_detail[1]["name"], + f"{fileset_virtual_location}/test_file_1.par", + ) - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_info", f"{_fileset_dir}/test_info" - ), - ) def test_info(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_info" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_info" + actual_path = fileset_storage_location + local_fs = LocalFileSystem() local_fs.mkdir(fileset_storage_location) sub_dir_path = f"{fileset_storage_location}/test_1" local_fs.mkdir(sub_dir_path) @@ -185,28 +168,39 @@ def test_info(self, *mock_methods): self.assertTrue(local_fs.exists(sub_file_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) dir_virtual_path = fileset_virtual_location + "/test_1" - dir_info = fs.info(dir_virtual_path) - self.assertEqual(dir_info["name"], dir_virtual_path) + actual_path = fileset_storage_location + "/test_1" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + dir_info = fs.info(dir_virtual_path) + self.assertEqual(dir_info["name"], dir_virtual_path) file_virtual_path = fileset_virtual_location + "/test_file_1.par" - file_info = fs.info(file_virtual_path) - self.assertEqual(file_info["name"], file_virtual_path) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_exist", f"{_fileset_dir}/test_exist" - ), - ) + actual_path = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + file_info = fs.info(file_virtual_path) + self.assertEqual(file_info["name"], file_virtual_path) + def test_exist(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_exist" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_exist" + actual_path = fileset_storage_location + local_fs = LocalFileSystem() local_fs.mkdir(fileset_storage_location) sub_dir_path = f"{fileset_storage_location}/test_1" local_fs.mkdir(sub_dir_path) @@ -216,28 +210,38 @@ def test_exist(self, *mock_methods): self.assertTrue(local_fs.exists(sub_file_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) dir_virtual_path = fileset_virtual_location + "/test_1" - self.assertTrue(fs.exists(dir_virtual_path)) + actual_path = fileset_storage_location + "/test_1" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(dir_virtual_path)) file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) + actual_path = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(file_virtual_path)) - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_cp_file", f"{_fileset_dir}/test_cp_file" - ), - ) def test_cp_file(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_cp_file" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_cp_file" + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) @@ -246,19 +250,35 @@ def test_cp_file(self, *mock_methods): f.write(b"test_file_1") fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) - - cp_file_virtual_path = fileset_virtual_location + "/test_cp_file_1.par" - fs.cp_file(file_virtual_path, cp_file_virtual_path) - self.assertTrue(fs.exists(cp_file_virtual_path)) - with local_fs.open(sub_file_path, "rb") as f: - result = f.read() - self.assertEqual(b"test_file_1", result) + src_actual_path = fileset_storage_location + "/test_file_1.par" + dst_actual_path = fileset_storage_location + "/test_cp_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + side_effect=[ + src_actual_path, + src_actual_path, + dst_actual_path, + dst_actual_path, + ], + ): + self.assertTrue(fs.exists(file_virtual_path)) + cp_file_virtual_path = fileset_virtual_location + "/test_cp_file_1.par" + fs.cp_file(file_virtual_path, cp_file_virtual_path) + self.assertTrue(fs.exists(cp_file_virtual_path)) + with local_fs.open(sub_file_path, "rb") as f: + result = f.read() + self.assertEqual(b"test_file_1", result) # test invalid dst path cp_file_invalid_virtual_path = ( @@ -267,25 +287,12 @@ def test_cp_file(self, *mock_methods): with self.assertRaises(GravitinoRuntimeException): fs.cp_file(file_virtual_path, cp_file_invalid_virtual_path) - # test mount a single file - local_fs.rm(path=fileset_storage_location, recursive=True) - self.assertFalse(local_fs.exists(fileset_storage_location)) - local_fs.touch(fileset_storage_location) - self.assertTrue(local_fs.exists(fileset_storage_location)) - with self.assertRaises(GravitinoRuntimeException): - fs.cp_file(file_virtual_path, cp_file_virtual_path) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset("test_mv", f"{_fileset_dir}/test_mv"), - ) def test_mv(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_mv" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_mv" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) @@ -295,27 +302,53 @@ def test_mv(self, *mock_methods): self.assertTrue(local_fs.exists(another_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) + src_actual_path = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=src_actual_path, + ): + self.assertTrue(fs.exists(file_virtual_path)) mv_file_virtual_path = fileset_virtual_location + "/test_cp_file_1.par" - fs.mv(file_virtual_path, mv_file_virtual_path) - self.assertTrue(fs.exists(mv_file_virtual_path)) + dst_actual_path = fileset_storage_location + "/test_cp_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + side_effect=[src_actual_path, dst_actual_path, dst_actual_path], + ): + fs.mv(file_virtual_path, mv_file_virtual_path) + self.assertTrue(fs.exists(mv_file_virtual_path)) mv_another_dir_virtual_path = ( fileset_virtual_location + "/another_dir/test_file_2.par" ) - fs.mv(mv_file_virtual_path, mv_another_dir_virtual_path) - self.assertTrue(fs.exists(mv_another_dir_virtual_path)) + dst_actual_path1 = fileset_storage_location + "/another_dir/test_file_2.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + side_effect=[dst_actual_path, dst_actual_path1, dst_actual_path1], + ): + fs.mv(mv_file_virtual_path, mv_another_dir_virtual_path) + self.assertTrue(fs.exists(mv_another_dir_virtual_path)) # test not exist dir not_exist_dst_dir_path = fileset_virtual_location + "/not_exist/test_file_2.par" - with self.assertRaises(FileNotFoundError): - fs.mv(path1=mv_another_dir_virtual_path, path2=not_exist_dst_dir_path) + dst_actual_path2 = fileset_storage_location + "/not_exist/test_file_2.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + side_effect=[dst_actual_path1, dst_actual_path2], + ): + with self.assertRaises(FileNotFoundError): + fs.mv(path1=mv_another_dir_virtual_path, path2=not_exist_dst_dir_path) # test invalid dst path mv_file_invalid_virtual_path = ( @@ -324,25 +357,12 @@ def test_mv(self, *mock_methods): with self.assertRaises(GravitinoRuntimeException): fs.mv(path1=file_virtual_path, path2=mv_file_invalid_virtual_path) - # test mount a single file - local_fs.rm(path=fileset_storage_location, recursive=True) - self.assertFalse(local_fs.exists(fileset_storage_location)) - local_fs.touch(fileset_storage_location) - self.assertTrue(local_fs.exists(fileset_storage_location)) - with self.assertRaises(GravitinoRuntimeException): - fs.mv(file_virtual_path, mv_file_virtual_path) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset("test_rm", f"{_fileset_dir}/test_rm"), - ) def test_rm(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_rm" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_rm" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) @@ -352,38 +372,48 @@ def test_rm(self, *mock_methods): self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test delete file file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) - fs.rm(file_virtual_path) - self.assertFalse(fs.exists(file_virtual_path)) + actual_path1 = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(file_virtual_path)) + fs.rm(file_virtual_path) + self.assertFalse(fs.exists(file_virtual_path)) # test delete dir with recursive = false dir_virtual_path = fileset_virtual_location + "/sub_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - with self.assertRaises(ValueError): - fs.rm(dir_virtual_path, recursive=False) - - # test delete dir with recursive = true - fs.rm(dir_virtual_path, recursive=True) - self.assertFalse(fs.exists(dir_virtual_path)) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_rm_file", f"{_fileset_dir}/test_rm_file" - ), - ) + actual_path2 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + self.assertTrue(fs.exists(dir_virtual_path)) + with self.assertRaises(ValueError): + fs.rm(dir_virtual_path, recursive=False) + + # test delete dir with recursive = true + fs.rm(dir_virtual_path, recursive=True) + self.assertFalse(fs.exists(dir_virtual_path)) + def test_rm_file(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_rm_file" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_rm_file" + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) @@ -394,35 +424,44 @@ def test_rm_file(self, *mock_methods): self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test delete file file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) - fs.rm_file(file_virtual_path) - self.assertFalse(fs.exists(file_virtual_path)) + actual_path1 = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(file_virtual_path)) + fs.rm_file(file_virtual_path) + self.assertFalse(fs.exists(file_virtual_path)) # test delete dir dir_virtual_path = fileset_virtual_location + "/sub_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - with self.assertRaises((IsADirectoryError, PermissionError)): - fs.rm_file(dir_virtual_path) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_rmdir", f"{_fileset_dir}/test_rmdir" - ), - ) + actual_path2 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + self.assertTrue(fs.exists(dir_virtual_path)) + with self.assertRaises((IsADirectoryError, PermissionError)): + fs.rm_file(dir_virtual_path) + def test_rmdir(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_rmdir" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_rmdir" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) @@ -432,35 +471,44 @@ def test_rmdir(self, *mock_methods): self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test delete file file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) - with self.assertRaises(NotADirectoryError): - fs.rmdir(file_virtual_path) + actual_path1 = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(file_virtual_path)) + with self.assertRaises(NotADirectoryError): + fs.rmdir(file_virtual_path) # test delete dir dir_virtual_path = fileset_virtual_location + "/sub_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - fs.rmdir(dir_virtual_path) - self.assertFalse(fs.exists(dir_virtual_path)) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_open", f"{_fileset_dir}/test_open" - ), - ) + actual_path2 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + self.assertTrue(fs.exists(dir_virtual_path)) + fs.rmdir(dir_virtual_path) + self.assertFalse(fs.exists(dir_virtual_path)) + def test_open(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_open" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_open" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) @@ -470,168 +518,198 @@ def test_open(self, *mock_methods): self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test open and write file file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) - with fs.open(file_virtual_path, mode="wb") as f: - f.write(b"test_open_write") - self.assertTrue(fs.info(file_virtual_path)["size"] > 0) - - # test open and read file - with fs.open(file_virtual_path, mode="rb") as f: - self.assertEqual(b"test_open_write", f.read()) + actual_path1 = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(file_virtual_path)) + with fs.open(file_virtual_path, mode="wb") as f: + f.write(b"test_open_write") + self.assertTrue(fs.info(file_virtual_path)["size"] > 0) + + # test open and read file + with fs.open(file_virtual_path, mode="rb") as f: + self.assertEqual(b"test_open_write", f.read()) # test open dir dir_virtual_path = fileset_virtual_location + "/sub_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - with self.assertRaises(IsADirectoryError): - fs.open(dir_virtual_path) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_mkdir", f"{_fileset_dir}/test_mkdir" - ), - ) + actual_path2 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + self.assertTrue(fs.exists(dir_virtual_path)) + with self.assertRaises(IsADirectoryError): + fs.open(dir_virtual_path) + def test_mkdir(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_mkdir" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_mkdir" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_dir_path = f"{fileset_storage_location}/sub_dir" local_fs.mkdirs(sub_dir_path) self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) - # test mkdir dir which exists - existed_dir_virtual_path = fileset_virtual_location - self.assertTrue(fs.exists(existed_dir_virtual_path)) - with self.assertRaises(FileExistsError): - fs.mkdir(existed_dir_virtual_path) + # test mkdir dir which exists + existed_dir_virtual_path = fileset_virtual_location + self.assertTrue(fs.exists(existed_dir_virtual_path)) + with self.assertRaises(FileExistsError): + fs.mkdir(existed_dir_virtual_path) # test mkdir dir with create_parents = false parent_not_exist_virtual_path = fileset_virtual_location + "/not_exist/sub_dir" - self.assertFalse(fs.exists(parent_not_exist_virtual_path)) - with self.assertRaises(FileNotFoundError): - fs.mkdir(parent_not_exist_virtual_path, create_parents=False) + actual_path1 = fileset_storage_location + "/not_exist/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertFalse(fs.exists(parent_not_exist_virtual_path)) + with self.assertRaises(FileNotFoundError): + fs.mkdir(parent_not_exist_virtual_path, create_parents=False) # test mkdir dir with create_parents = true parent_not_exist_virtual_path2 = fileset_virtual_location + "/not_exist/sub_dir" - self.assertFalse(fs.exists(parent_not_exist_virtual_path2)) - fs.mkdir(parent_not_exist_virtual_path2, create_parents=True) - self.assertTrue(fs.exists(parent_not_exist_virtual_path2)) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_makedirs", f"{_fileset_dir}/test_makedirs" - ), - ) + actual_path2 = fileset_storage_location + "/not_exist/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + self.assertFalse(fs.exists(parent_not_exist_virtual_path2)) + fs.mkdir(parent_not_exist_virtual_path2, create_parents=True) + self.assertTrue(fs.exists(parent_not_exist_virtual_path2)) + def test_makedirs(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_makedirs" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_makedirs" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_dir_path = f"{fileset_storage_location}/sub_dir" local_fs.mkdirs(sub_dir_path) self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) - # test mkdir dir which exists - existed_dir_virtual_path = fileset_virtual_location - self.assertTrue(fs.exists(existed_dir_virtual_path)) - with self.assertRaises(FileExistsError): - fs.mkdirs(existed_dir_virtual_path) + # test mkdir dir which exists + existed_dir_virtual_path = fileset_virtual_location + self.assertTrue(fs.exists(existed_dir_virtual_path)) + with self.assertRaises(FileExistsError): + fs.mkdirs(existed_dir_virtual_path) # test mkdir dir not exist parent_not_exist_virtual_path = fileset_virtual_location + "/not_exist/sub_dir" - self.assertFalse(fs.exists(parent_not_exist_virtual_path)) - fs.makedirs(parent_not_exist_virtual_path) - self.assertTrue(fs.exists(parent_not_exist_virtual_path)) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_created", f"{_fileset_dir}/test_created" - ), - ) + actual_path1 = fileset_storage_location + "/not_exist/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertFalse(fs.exists(parent_not_exist_virtual_path)) + fs.makedirs(parent_not_exist_virtual_path) + self.assertTrue(fs.exists(parent_not_exist_virtual_path)) + def test_created(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_created" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_created" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_dir_path = f"{fileset_storage_location}/sub_dir" local_fs.mkdirs(sub_dir_path) self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test mkdir dir which exists dir_virtual_path = fileset_virtual_location + "/sub_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - self.assertIsNotNone(fs.created(dir_virtual_path)) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_modified", f"{_fileset_dir}/test_modified" - ), - ) + actual_path1 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(dir_virtual_path)) + self.assertIsNotNone(fs.created(dir_virtual_path)) + def test_modified(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_modified" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_modified" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_dir_path = f"{fileset_storage_location}/sub_dir" local_fs.mkdirs(sub_dir_path) self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test mkdir dir which exists dir_virtual_path = fileset_virtual_location + "/sub_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - self.assertIsNotNone(fs.modified(dir_virtual_path)) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_cat_file", f"{_fileset_dir}/test_cat_file" - ), - ) + actual_path1 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(dir_virtual_path)) + self.assertIsNotNone(fs.modified(dir_virtual_path)) + def test_cat_file(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_cat_file" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_cat_file" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) @@ -641,40 +719,49 @@ def test_cat_file(self, *mock_methods): self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test open and write file file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) - with fs.open(file_virtual_path, mode="wb") as f: - f.write(b"test_cat_file") - self.assertTrue(fs.info(file_virtual_path)["size"] > 0) - - # test cat file - content = fs.cat_file(file_virtual_path) - self.assertEqual(b"test_cat_file", content) + actual_path1 = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(file_virtual_path)) + with fs.open(file_virtual_path, mode="wb") as f: + f.write(b"test_cat_file") + self.assertTrue(fs.info(file_virtual_path)["size"] > 0) + + # test cat file + content = fs.cat_file(file_virtual_path) + self.assertEqual(b"test_cat_file", content) # test cat dir dir_virtual_path = fileset_virtual_location + "/sub_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - with self.assertRaises(IsADirectoryError): - fs.cat_file(dir_virtual_path) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_get_file", f"{_fileset_dir}/test_get_file" - ), - ) + actual_path2 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + self.assertTrue(fs.exists(dir_virtual_path)) + with self.assertRaises(IsADirectoryError): + fs.cat_file(dir_virtual_path) + def test_get_file(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_get_file" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "fileset/fileset_catalog/tmp/test_get_file" - + actual_path = fileset_storage_location + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) sub_file_path = f"{fileset_storage_location}/test_file_1.par" local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) @@ -684,30 +771,46 @@ def test_get_file(self, *mock_methods): self.assertTrue(local_fs.exists(sub_dir_path)) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) # test open and write file file_virtual_path = fileset_virtual_location + "/test_file_1.par" - self.assertTrue(fs.exists(file_virtual_path)) - with fs.open(file_virtual_path, mode="wb") as f: - f.write(b"test_get_file") - self.assertTrue(fs.info(file_virtual_path)["size"] > 0) - - # test get file - local_path = self._fileset_dir + "/local_file.par" - local_fs.touch(local_path) - self.assertTrue(local_fs.exists(local_path)) - fs.get_file(file_virtual_path, local_path) - self.assertEqual(b"test_get_file", local_fs.cat_file(local_path)) + actual_path1 = fileset_storage_location + "/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + self.assertTrue(fs.exists(file_virtual_path)) + with fs.open(file_virtual_path, mode="wb") as f: + f.write(b"test_get_file") + self.assertTrue(fs.info(file_virtual_path)["size"] > 0) + + # test get file + local_path = self._fileset_dir + "/local_file.par" + local_fs.touch(local_path) + self.assertTrue(local_fs.exists(local_path)) + fs.get_file(file_virtual_path, local_path) + self.assertEqual(b"test_get_file", local_fs.cat_file(local_path)) # test get a dir dir_virtual_path = fileset_virtual_location + "/sub_dir" - local_path = self._fileset_dir + "/local_dir" - self.assertTrue(fs.exists(dir_virtual_path)) - fs.get_file(dir_virtual_path, local_path) - self.assertTrue(local_fs.exists(local_path)) + actual_path2 = fileset_storage_location + "/sub_dir" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + local_path = self._fileset_dir + "/local_dir" + self.assertTrue(fs.exists(dir_virtual_path)) + fs.get_file(dir_virtual_path, local_path) + self.assertTrue(local_fs.exists(local_path)) # test get a file to a remote file remote_path = "gvfs://" + fileset_virtual_location + "/test_file_2.par" @@ -715,100 +818,65 @@ def test_get_file(self, *mock_methods): fs.get_file(file_virtual_path, remote_path) def test_convert_actual_path(self, *mock_methods): - # test convert actual hdfs path - audit_dto = AuditDTO( - _creator="test", - _create_time="2022-01-01T00:00:00Z", - _last_modifier="test", - _last_modified_time="2024-04-05T10:10:35.218Z", - ) - hdfs_fileset: FilesetDTO = FilesetDTO( - _name="test_f1", - _comment="", - _type=FilesetDTO.Type.MANAGED, - _storage_location="hdfs://localhost:8090/fileset/test_f1", - _audit=audit_dto, - _properties={}, - ) - mock_hdfs_context: FilesetContext = FilesetContext( - name_identifier=NameIdentifier.of( - "test_metalake", "test_catalog", "test_schema", "test_f1" - ), - storage_type=StorageType.HDFS, - fileset=hdfs_fileset, - actual_path=hdfs_fileset.storage_location() + "/actual_path", - fs=LocalFileSystem(), - ) - fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, + ) + storage_location = "hdfs://localhost:8090/fileset/test_f1" + virtual_location = fs._get_virtual_location( + NameIdentifier.of("test_metalake", "test_catalog", "test_schema", "test_f1") ) # test actual path not start with storage location actual_path = "/not_start_with_storage/ttt" with self.assertRaises(GravitinoRuntimeException): - fs._convert_actual_path(actual_path, mock_hdfs_context) + fs._convert_actual_path(actual_path, storage_location, virtual_location) # test actual path start with storage location actual_path = "/fileset/test_f1/actual_path" - virtual_path = fs._convert_actual_path(actual_path, mock_hdfs_context) + virtual_path = fs._convert_actual_path( + actual_path, storage_location, virtual_location + ) self.assertEqual( "fileset/test_catalog/test_schema/test_f1/actual_path", virtual_path ) # test convert actual local path - audit_dto = AuditDTO( - _creator="test", - _create_time="2022-01-01T00:00:00Z", - _last_modifier="test", - _last_modified_time="2024-04-05T10:10:35.218Z", - ) - local_fileset: FilesetDTO = FilesetDTO( - _name="test_f1", - _comment="", - _type=FilesetDTO.Type.MANAGED, - _storage_location="file:/tmp/fileset/test_f1", - _audit=audit_dto, - _properties={}, - ) - mock_local_context: FilesetContext = FilesetContext( - name_identifier=NameIdentifier.of( - "test_metalake", "test_catalog", "test_schema", "test_f1" - ), - storage_type=StorageType.LOCAL, - fileset=local_fileset, - actual_path=local_fileset.storage_location() + "/actual_path", - fs=LocalFileSystem(), - ) - fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, + ) + storage_location = "file:/tmp/fileset/test_f1" + virtual_location = fs._get_virtual_location( + NameIdentifier.of("test_metalake", "test_catalog", "test_schema", "test_f1") ) # test actual path not start with storage location actual_path = "/not_start_with_storage/ttt" with self.assertRaises(GravitinoRuntimeException): - fs._convert_actual_path(actual_path, mock_local_context) + fs._convert_actual_path(actual_path, storage_location, virtual_location) # test actual path start with storage location actual_path = "/tmp/fileset/test_f1/actual_path" - virtual_path = fs._convert_actual_path(actual_path, mock_local_context) + virtual_path = fs._convert_actual_path( + actual_path, storage_location, virtual_location + ) self.assertEqual( "fileset/test_catalog/test_schema/test_f1/actual_path", virtual_path ) # test storage location without "/" actual_path = "/tmp/test_convert_actual_path/sub_dir/1.parquet" - storage_location1 = "file:/tmp/test_convert_actual_path" - mock_fileset1: Fileset = mock.Mock(spec=Fileset) - mock_fileset1.storage_location.return_value = storage_location1 - - mock_fileset_context1: FilesetContext = mock.Mock(spec=FilesetContext) - mock_fileset_context1.get_storage_type.return_value = StorageType.LOCAL - mock_fileset_context1.get_name_identifier.return_value = NameIdentifier.of( - "test_metalake", "catalog", "schema", "test_convert_actual_path" + storage_location = "file:/tmp/test_convert_actual_path" + virtual_location = fs._get_virtual_location( + NameIdentifier.of( + "test_metalake", "catalog", "schema", "test_convert_actual_path" + ) ) - mock_fileset_context1.get_fileset.return_value = mock_fileset1 - virtual_path = fs._convert_actual_path(actual_path, mock_fileset_context1) + virtual_path = fs._convert_actual_path( + actual_path, storage_location, virtual_location + ) self.assertEqual( "fileset/catalog/schema/test_convert_actual_path/sub_dir/1.parquet", virtual_path, @@ -816,107 +884,90 @@ def test_convert_actual_path(self, *mock_methods): # test storage location with "/" actual_path = "/tmp/test_convert_actual_path/sub_dir/1.parquet" - storage_location2 = "file:/tmp/test_convert_actual_path/" - mock_fileset2: Fileset = mock.Mock(spec=Fileset) - mock_fileset2.storage_location.return_value = storage_location2 - - mock_fileset_context2: FilesetContext = mock.Mock(spec=FilesetContext) - mock_fileset_context2.get_storage_type.return_value = StorageType.LOCAL - mock_fileset_context2.get_name_identifier.return_value = NameIdentifier.of( - "test_metalake", "catalog", "schema", "test_convert_actual_path" + storage_location = "file:/tmp/test_convert_actual_path/" + virtual_location = fs._get_virtual_location( + NameIdentifier.of( + "test_metalake", "catalog", "schema", "test_convert_actual_path" + ) ) - mock_fileset_context2.get_fileset.return_value = mock_fileset2 - virtual_path = fs._convert_actual_path(actual_path, mock_fileset_context2) + virtual_path = fs._convert_actual_path( + actual_path, storage_location, virtual_location + ) self.assertEqual( "fileset/catalog/schema/test_convert_actual_path/sub_dir/1.parquet", virtual_path, ) - def test_convert_info(self, *mock_methods3): - # test convert actual hdfs path - audit_dto = AuditDTO( - _creator="test", - _create_time="2022-01-01T00:00:00Z", - _last_modifier="test", - _last_modified_time="2024-04-05T10:10:35.218Z", - ) - hdfs_fileset: FilesetDTO = FilesetDTO( - _name="test_f1", - _comment="", - _type=FilesetDTO.Type.MANAGED, - _storage_location="hdfs://localhost:8090/fileset/test_f1", - _audit=audit_dto, - _properties={}, - ) - mock_hdfs_context: FilesetContext = FilesetContext( - name_identifier=NameIdentifier.of( - "test_metalake", "test_catalog", "test_schema", "test_f1" - ), - storage_type=StorageType.HDFS, - fileset=hdfs_fileset, - actual_path=hdfs_fileset.storage_location() + "/actual_path", - fs=LocalFileSystem(), - ) - + def test_convert_info(self, *mock_methods): fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) # test actual path not start with storage location - actual_path = "/not_start_with_storage/ttt" + entry = { + "name": "/not_start_with_storage/ttt", + "size": 1, + "type": "file", + "mtime": datetime.now(), + } + storage_location = "hdfs://localhost:8090/fileset/test_f1" + virtual_location = fs._get_virtual_location( + NameIdentifier.of("test_metalake", "test_catalog", "test_schema", "test_f1") + ) with self.assertRaises(GravitinoRuntimeException): - fs._convert_actual_path(actual_path, mock_hdfs_context) + fs._convert_actual_info(entry, storage_location, virtual_location) # test actual path start with storage location - actual_path = "/fileset/test_f1/actual_path" - virtual_path = fs._convert_actual_path(actual_path, mock_hdfs_context) + entry = { + "name": "/fileset/test_f1/actual_path", + "size": 1, + "type": "file", + "mtime": datetime.now(), + } + info = fs._convert_actual_info(entry, storage_location, virtual_location) self.assertEqual( - "fileset/test_catalog/test_schema/test_f1/actual_path", virtual_path + "fileset/test_catalog/test_schema/test_f1/actual_path", info["name"] ) # test convert actual local path - audit_dto = AuditDTO( - _creator="test", - _create_time="2022-01-01T00:00:00Z", - _last_modifier="test", - _last_modified_time="2024-04-05T10:10:35.218Z", - ) - local_fileset: FilesetDTO = FilesetDTO( - _name="test_f1", - _comment="", - _type=FilesetDTO.Type.MANAGED, - _storage_location="file:/tmp/fileset/test_f1", - _audit=audit_dto, - _properties={}, - ) - mock_local_context: FilesetContext = FilesetContext( - name_identifier=NameIdentifier.of( - "test_metalake", "test_catalog", "test_schema", "test_f1" - ), - storage_type=StorageType.LOCAL, - fileset=local_fileset, - actual_path=local_fileset.storage_location() + "/actual_path", - fs=LocalFileSystem(), - ) - fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) # test actual path not start with storage location - actual_path = "/not_start_with_storage/ttt" + entry = { + "name": "/not_start_with_storage/ttt", + "size": 1, + "type": "file", + "mtime": datetime.now(), + } + storage_location = "file:/tmp/fileset/test_f1" + virtual_location = fs._get_virtual_location( + NameIdentifier.of("test_metalake", "test_catalog", "test_schema", "test_f1") + ) with self.assertRaises(GravitinoRuntimeException): - fs._convert_actual_path(actual_path, mock_local_context) + fs._convert_actual_info(entry, storage_location, virtual_location) # test actual path start with storage location - actual_path = "/tmp/fileset/test_f1/actual_path" - virtual_path = fs._convert_actual_path(actual_path, mock_local_context) + entry = { + "name": "/tmp/fileset/test_f1/actual_path", + "size": 1, + "type": "file", + "mtime": datetime.now(), + } + info = fs._convert_actual_info(entry, storage_location, virtual_location) self.assertEqual( - "fileset/test_catalog/test_schema/test_f1/actual_path", virtual_path + "fileset/test_catalog/test_schema/test_f1/actual_path", info["name"] ) def test_extract_identifier(self, *mock_methods): fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) with self.assertRaises(GravitinoRuntimeException): fs._extract_identifier(path=None) @@ -932,152 +983,93 @@ def test_extract_identifier(self, *mock_methods): self.assertEqual("schema", identifier.namespace().level(2)) self.assertEqual("fileset", identifier.name()) - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_pandas", f"{_fileset_dir}/test_pandas" - ), - ) def test_pandas(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_pandas" + fileset_virtual_location = "gvfs://fileset/fileset_catalog/tmp/test_pandas" + local_fs = LocalFileSystem() local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "gvfs://fileset/fileset_catalog/tmp/test_pandas" data = pandas.DataFrame({"Name": ["A", "B", "C", "D"], "ID": [20, 21, 19, 18]}) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:8090", metalake_name="test_metalake" - ) - # to parquet - data.to_parquet(fileset_virtual_location + "/test.parquet", filesystem=fs) - self.assertTrue(local_fs.exists(fileset_storage_location + "/test.parquet")) - - # read parquet - ds1 = pandas.read_parquet( - path=fileset_virtual_location + "/test.parquet", filesystem=fs - ) - self.assertTrue(data.equals(ds1)) + server_uri="http://localhost:8090", + metalake_name="test_metalake", + skip_instance_cache=True, + ) + actual_path = fileset_storage_location + "/test.parquet" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + # to parquet + data.to_parquet(fileset_virtual_location + "/test.parquet", filesystem=fs) + self.assertTrue(local_fs.exists(fileset_storage_location + "/test.parquet")) + + # read parquet + ds1 = pandas.read_parquet( + path=fileset_virtual_location + "/test.parquet", filesystem=fs + ) + self.assertTrue(data.equals(ds1)) storage_options = { "server_uri": "http://localhost:8090", "metalake_name": "test_metalake", } - # to csv - data.to_csv( - fileset_virtual_location + "/test.csv", - index=False, - storage_options=storage_options, - ) - self.assertTrue(local_fs.exists(fileset_storage_location + "/test.csv")) - # read csv - ds2 = pandas.read_csv( - fileset_virtual_location + "/test.csv", storage_options=storage_options - ) - self.assertTrue(data.equals(ds2)) + actual_path1 = fileset_storage_location + actual_path2 = fileset_storage_location + "/test.csv" + + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + side_effect=[actual_path1, actual_path2, actual_path2], + ): + # to csv + data.to_csv( + fileset_virtual_location + "/test.csv", + index=False, + storage_options=storage_options, + ) + self.assertTrue(local_fs.exists(fileset_storage_location + "/test.csv")) + + # read csv + ds2 = pandas.read_csv( + fileset_virtual_location + "/test.csv", storage_options=storage_options + ) + self.assertTrue(data.equals(ds2)) - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_pyarrow", f"{_fileset_dir}/test_pyarrow" - ), - ) def test_pyarrow(self, *mock_methods): - local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_pyarrow" - local_fs.mkdir(fileset_storage_location) - fileset_virtual_location = "gvfs://fileset/fileset_catalog/tmp/test_pyarrow" - data = pandas.DataFrame({"Name": ["A", "B", "C", "D"], "ID": [20, 21, 19, 18]}) - fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:8090", metalake_name="test_metalake" - ) - - # to parquet - data.to_parquet(fileset_virtual_location + "/test.parquet", filesystem=fs) - self.assertTrue(local_fs.exists(fileset_storage_location + "/test.parquet")) - - # read as arrow dataset - arrow_dataset = dt.dataset( - fileset_virtual_location + "/test.parquet", filesystem=fs - ) - arrow_tb_1 = arrow_dataset.to_table() - - arrow_tb_2 = pa.Table.from_pandas(data) - self.assertTrue(arrow_tb_1.equals(arrow_tb_2)) - - # read as arrow parquet dataset - arrow_tb_3 = pq.read_table( - fileset_virtual_location + "/test.parquet", filesystem=fs - ) - self.assertTrue(arrow_tb_3.equals(arrow_tb_2)) - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_llama_index", f"{_fileset_dir}/test_llama_index" - ), - ) - def test_llama_index(self, *mock_methods): local_fs = LocalFileSystem() - fileset_storage_location = f"{self._fileset_dir}/test_llama_index" local_fs.mkdir(fileset_storage_location) - - fileset_virtual_location = "gvfs://fileset/fileset_catalog/tmp/test_llama_index" data = pandas.DataFrame({"Name": ["A", "B", "C", "D"], "ID": [20, 21, 19, 18]}) fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:8090", metalake_name="test_metalake" - ) - - storage_options = { - "server_uri": "http://localhost:8090", - "metalake_name": "test_metalake", - } - # to csv - data.to_csv( - fileset_virtual_location + "/test.csv", - index=False, - storage_options=storage_options, - ) - self.assertTrue(local_fs.exists(fileset_storage_location + "/test.csv")) + server_uri="http://localhost:8090", + metalake_name="test_metalake", + skip_instance_cache=True, + ) + actual_path = fileset_storage_location + "/test.parquet" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + # to parquet + data.to_parquet(fileset_virtual_location + "/test.parquet", filesystem=fs) + self.assertTrue(local_fs.exists(fileset_storage_location + "/test.parquet")) + + # read as arrow dataset + arrow_dataset = dt.dataset( + fileset_virtual_location + "/test.parquet", filesystem=fs + ) + arrow_tb_1 = arrow_dataset.to_table() + arrow_tb_2 = pa.Table.from_pandas(data) + self.assertTrue(arrow_tb_1.equals(arrow_tb_2)) - data.to_csv( - fileset_virtual_location + "/sub_dir/test1.csv", - index=False, - storage_options=storage_options, - ) - self.assertTrue( - local_fs.exists(fileset_storage_location + "/sub_dir/test1.csv") - ) + # read as arrow parquet dataset + arrow_tb_3 = pq.read_table( + fileset_virtual_location + "/test.parquet", filesystem=fs + ) + self.assertTrue(arrow_tb_3.equals(arrow_tb_2)) - reader = SimpleDirectoryReader( - input_dir="fileset/fileset_catalog/tmp/test_llama_index", - fs=fs, - recursive=True, # recursively searches all subdirectories - ) - documents = reader.load_data() - self.assertEqual(len(documents), 2) - doc_1 = documents[0] - result_1 = [line.strip().split(", ") for line in doc_1.text.split("\n")] - self.assertEqual(4, len(result_1)) - for row in result_1: - if row[0] == "A": - self.assertEqual(row[1], "20") - elif row[0] == "B": - self.assertEqual(row[1], "21") - elif row[0] == "C": - self.assertEqual(row[1], "19") - elif row[0] == "D": - self.assertEqual(row[1], "18") - - @patch( - "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", - return_value=mock_base.mock_load_fileset( - "test_location_with_tailing_slash", - f"{_fileset_dir}/test_location_with_tailing_slash/", - ), - ) def test_location_with_tailing_slash(self, *mock_methods): - local_fs = LocalFileSystem() # storage location is ending with a "/" fileset_storage_location = ( f"{self._fileset_dir}/test_location_with_tailing_slash/" @@ -1085,6 +1077,7 @@ def test_location_with_tailing_slash(self, *mock_methods): fileset_virtual_location = ( "fileset/fileset_catalog/tmp/test_location_with_tailing_slash" ) + local_fs = LocalFileSystem() local_fs.mkdir(fileset_storage_location) sub_dir_path = f"{fileset_storage_location}test_1" local_fs.mkdir(sub_dir_path) @@ -1093,82 +1086,45 @@ def test_location_with_tailing_slash(self, *mock_methods): local_fs.touch(sub_file_path) self.assertTrue(local_fs.exists(sub_file_path)) + actual_path = fileset_storage_location fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + skip_instance_cache=True, ) - self.assertTrue(fs.exists(fileset_virtual_location)) + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + self.assertTrue(fs.exists(fileset_virtual_location)) dir_virtual_path = fileset_virtual_location + "/test_1" - dir_info = fs.info(dir_virtual_path) - self.assertEqual(dir_info["name"], dir_virtual_path) + actual_path1 = fileset_storage_location + "test_1" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path1, + ): + dir_info = fs.info(dir_virtual_path) + self.assertEqual(dir_info["name"], dir_virtual_path) file_virtual_path = fileset_virtual_location + "/test_1/test_file_1.par" - file_info = fs.info(file_virtual_path) - self.assertEqual(file_info["name"], file_virtual_path) - - file_status = fs.ls(fileset_virtual_location, detail=True) - for status in file_status: - if status["name"].endswith("test_1"): - self.assertEqual(status["name"], dir_virtual_path) - elif status["name"].endswith("test_file_1.par"): - self.assertEqual(status["name"], file_virtual_path) - else: - raise GravitinoRuntimeException("Unexpected file found") - - def test_get_actual_path_by_ident(self, *mock_methods): - ident1 = NameIdentifier.of( - "test_metalake", "catalog", "schema", "test_get_actual_path_by_ident" - ) - storage_type = gvfs.StorageType.LOCAL - local_fs = LocalFileSystem() - - fs = gvfs.GravitinoVirtualFileSystem( - server_uri="http://localhost:9090", metalake_name="metalake_demo" - ) - - # test storage location end with "/" - storage_location_1 = f"{self._fileset_dir}/test_get_actual_path_by_ident/" - # virtual path end with "/" - virtual_path1 = "fileset/catalog/schema/test_get_actual_path_by_ident/" - local_fs.mkdir(storage_location_1) - self.assertTrue(local_fs.exists(storage_location_1)) - - mock_fileset1: Fileset = mock.Mock(spec=Fileset) - mock_fileset1.storage_location.return_value = storage_location_1 - - actual_path1 = fs._get_actual_path_by_ident( - ident1, mock_fileset1, local_fs, storage_type, virtual_path1 - ) - self.assertEqual(actual_path1, storage_location_1) - - # virtual path end without "/" - virtual_path2 = "fileset/catalog/schema/test_get_actual_path_by_ident" - actual_path2 = fs._get_actual_path_by_ident( - ident1, mock_fileset1, local_fs, storage_type, virtual_path2 - ) - self.assertEqual(actual_path2, storage_location_1) - - # test storage location end without "/" - ident2 = NameIdentifier.of( - "test_metalake", "catalog", "schema", "test_without_slash" - ) - storage_location_2 = f"{self._fileset_dir}/test_without_slash" - # virtual path end with "/" - virtual_path3 = "fileset/catalog/schema/test_without_slash/" - local_fs.mkdir(storage_location_2) - self.assertTrue(local_fs.exists(storage_location_2)) - - mock_fileset2: Fileset = mock.Mock(spec=Fileset) - mock_fileset2.storage_location.return_value = storage_location_2 - - actual_path3 = fs._get_actual_path_by_ident( - ident2, mock_fileset2, local_fs, storage_type, virtual_path3 - ) - self.assertEqual(actual_path3, f"{storage_location_2}/") - - # virtual path end without "/" - virtual_path4 = "fileset/catalog/schema/test_without_slash" - actual_path4 = fs._get_actual_path_by_ident( - ident2, mock_fileset2, local_fs, storage_type, virtual_path4 - ) - self.assertEqual(actual_path4, storage_location_2) + actual_path2 = fileset_storage_location + "test_1/test_file_1.par" + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path2, + ): + file_info = fs.info(file_virtual_path) + self.assertEqual(file_info["name"], file_virtual_path) + + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + file_status = fs.ls(fileset_virtual_location, detail=True) + for status in file_status: + if status["name"].endswith("test_1"): + self.assertEqual(status["name"], dir_virtual_path) + elif status["name"].endswith("test_file_1.par"): + self.assertEqual(status["name"], file_virtual_path) + else: + raise GravitinoRuntimeException("Unexpected file found") diff --git a/common/src/main/java/org/apache/gravitino/audit/FilesetDataOperation.java b/common/src/main/java/org/apache/gravitino/audit/FilesetDataOperation.java index b76d1f91b3b..88ac4d11b06 100644 --- a/common/src/main/java/org/apache/gravitino/audit/FilesetDataOperation.java +++ b/common/src/main/java/org/apache/gravitino/audit/FilesetDataOperation.java @@ -25,6 +25,10 @@ public enum FilesetDataOperation { CREATE, /** Opens a file. */ OPEN, + /** Opens a file and writes to it. */ + OPEN_AND_WRITE, + /** Opens a file and appends to it. */ + OPEN_AND_APPEND, /** Appends some content into a file. */ APPEND, /** Renames a file or a directory. */