diff --git a/internal/util/gcs_util.go b/internal/util/gcs_util.go new file mode 100644 index 0000000000..8b94cd6433 --- /dev/null +++ b/internal/util/gcs_util.go @@ -0,0 +1,76 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "strings" + + "github.com/googlecloudplatform/gcsfuse/v2/internal/storage/gcs" +) + +var ( + unsupportedObjectNameSubstrings = []string{"//"} + unsupportedObjectNamePrefixes = []string{"/"} + unsupportedObjectNames = []string{""} +) + +// isUnsupportedObjectName returns true if the passed +// string is a valid GCS object name or prefix, +// which is unsupported in GCSFuse. +func isUnsupportedObjectName(name string) bool { + for _, substring := range unsupportedObjectNameSubstrings { + if strings.Contains(name, substring) { + return true + } + } + for _, prefix := range unsupportedObjectNamePrefixes { + if strings.HasPrefix(name, prefix) { + return true + } + } + for _, unsupportedObjectName := range unsupportedObjectNames { + if name == unsupportedObjectName { + return true + } + } + return false +} + +// RemoveUnsupportedObjectsFromListing is a utility to ignore unsupported +// GCS object names such as those containing `//` in their names. +// As an example, GCS can have two different objects a//b and a/b at the same time +// in the same bucket. In linux FS however, both paths are same as a/b. +// So, GCSFuse will ignore objects with names like a//b to avoid causing `input/output error` in +// linux FS. +func RemoveUnsupportedObjectsFromListing(listing *gcs.Listing) (newListing *gcs.Listing, removedListing *gcs.Listing) { + newListing = &gcs.Listing{} + removedListing = &gcs.Listing{} + for _, collapsedRun := range listing.CollapsedRuns { + if !isUnsupportedObjectName(collapsedRun) { + newListing.CollapsedRuns = append(newListing.CollapsedRuns, collapsedRun) + } else { + removedListing.CollapsedRuns = append(removedListing.CollapsedRuns, collapsedRun) + } + } + for _, object := range listing.Objects { + if !isUnsupportedObjectName(object.Name) { + newListing.Objects = append(newListing.Objects, object) + } else { + removedListing.Objects = append(removedListing.Objects, object) + } + } + newListing.ContinuationToken = listing.ContinuationToken + return newListing, removedListing +} diff --git a/internal/util/gcs_util_test.go b/internal/util/gcs_util_test.go new file mode 100644 index 0000000000..9c4281c97e --- /dev/null +++ b/internal/util/gcs_util_test.go @@ -0,0 +1,113 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "fmt" + "testing" + + "github.com/googlecloudplatform/gcsfuse/v2/internal/storage/gcs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +//////////////////////////////////////////////////////////////////////// +// Boilerplate +//////////////////////////////////////////////////////////////////////// + +type GcsUtilTest struct { + suite.Suite +} + +func TestGcsUtil(t *testing.T) { + suite.Run(t, new(GcsUtilTest)) +} + +// ////////////////////////////////////////////////////////////////////// +// Tests +// ////////////////////////////////////////////////////////////////////// +func (ts *GcsUtilTest) TestIsUnsupportedObjectName() { + cases := []struct { + name string + isUnsupported bool + }{ + { + name: "foo", + isUnsupported: false, + }, + { + name: "foo/bar", + isUnsupported: false, + }, + { + name: "foo//bar", + isUnsupported: true, + }, + { + name: "abc/", + isUnsupported: false, + }, + { + name: "abc//", + isUnsupported: true, + }, + { + name: "/foo", + isUnsupported: true, + }, + { + name: "/", + isUnsupported: true, + }, + } + + for _, tc := range cases { + ts.Run(fmt.Sprintf("name=%s", tc.name), func() { + assert.Equal(ts.T(), tc.isUnsupported, isUnsupportedObjectName(tc.name)) + }) + } +} + +func (t *GcsUtilTest) Test_RemoveUnsupportedObjectsFromListing() { + createObject := func(name string) *gcs.Object { + return &gcs.Object{Name: name} + } + createObjects := func(names []string) []*gcs.Object { + objects := []*gcs.Object{} + for _, name := range names { + objects = append(objects, createObject(name)) + } + return objects + } + origGcsListing := &gcs.Listing{ + CollapsedRuns: []string{"/", "a/", "b//", "c/d/", "e//f/", "g/h//"}, + Objects: createObjects([]string{"a", "/b", "c/d", "e//f", "g/h//i"}), + ContinuationToken: "hfdwefo", + } + expectedNewGcsListing := &gcs.Listing{ + CollapsedRuns: []string{"a/", "c/d/"}, + Objects: createObjects([]string{"a", "c/d"}), + ContinuationToken: "hfdwefo", + } + expectedRemovedGcsListing := &gcs.Listing{ + CollapsedRuns: []string{"/", "b//", "e//f/", "g/h//"}, + Objects: createObjects([]string{"/b", "e//f", "g/h//i"}), + } + + newGcsListing, removedGcsListing := RemoveUnsupportedObjectsFromListing(origGcsListing) + + assert.Equal(t.T(), *expectedNewGcsListing, *newGcsListing) + assert.Equal(t.T(), *expectedRemovedGcsListing, *removedGcsListing) +}