Skip to content

Commit

Permalink
Add utilities to filter unsupported objects
Browse files Browse the repository at this point in the history
  • Loading branch information
gargnitingoogle committed Oct 11, 2024
1 parent 3207fba commit 6c980c4
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 0 deletions.
76 changes: 76 additions & 0 deletions internal/util/gcs_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package util

import (
"strings"

"github.com/googlecloudplatform/gcsfuse/v2/internal/storage/gcs"
)

var (
unsupportedObjectNameSubstrings = []string{"//"}
unsupportedObjectNamePrefixes = []string{"/"}
unsupportedObjectNames = []string{""}
)

// isUnsupportedObjectName returns true if the passed
// string is a valid GCS object name or prefix,
// which is unsupported in GCSFuse.
func isUnsupportedObjectName(name string) bool {
for _, substring := range unsupportedObjectNameSubstrings {
if strings.Contains(name, substring) {
return true
}
}
for _, prefix := range unsupportedObjectNamePrefixes {
if strings.HasPrefix(name, prefix) {
return true
}
}
for _, unsupportedObjectName := range unsupportedObjectNames {
if name == unsupportedObjectName {
return true
}
}
return false
}

// RemoveUnsupportedObjectsFromListing is a utility to ignore unsupported
// GCS object names such as those containing `//` in their names.
// As an example, GCS can have two different objects a//b and a/b at the same time
// in the same bucket. In linux FS however, both paths are same as a/b.
// So, GCSFuse will ignore objects with names like a//b to avoid causing `input/output error` in
// linux FS.
func RemoveUnsupportedObjectsFromListing(listing *gcs.Listing) (newListing *gcs.Listing, removedListing *gcs.Listing) {
newListing = &gcs.Listing{}
removedListing = &gcs.Listing{}
for _, collapsedRun := range listing.CollapsedRuns {
if !isUnsupportedObjectName(collapsedRun) {
newListing.CollapsedRuns = append(newListing.CollapsedRuns, collapsedRun)
} else {
removedListing.CollapsedRuns = append(removedListing.CollapsedRuns, collapsedRun)
}
}
for _, object := range listing.Objects {
if !isUnsupportedObjectName(object.Name) {
newListing.Objects = append(newListing.Objects, object)
} else {
removedListing.Objects = append(removedListing.Objects, object)
}
}
newListing.ContinuationToken = listing.ContinuationToken
return newListing, removedListing
}
113 changes: 113 additions & 0 deletions internal/util/gcs_util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package util

import (
"fmt"
"testing"

"github.com/googlecloudplatform/gcsfuse/v2/internal/storage/gcs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
)

////////////////////////////////////////////////////////////////////////
// Boilerplate
////////////////////////////////////////////////////////////////////////

type GcsUtilTest struct {
suite.Suite
}

func TestGcsUtil(t *testing.T) {
suite.Run(t, new(GcsUtilTest))
}

// //////////////////////////////////////////////////////////////////////
// Tests
// //////////////////////////////////////////////////////////////////////
func (ts *GcsUtilTest) TestIsUnsupportedObjectName() {
cases := []struct {
name string
isUnsupported bool
}{
{
name: "foo",
isUnsupported: false,
},
{
name: "foo/bar",
isUnsupported: false,
},
{
name: "foo//bar",
isUnsupported: true,
},
{
name: "abc/",
isUnsupported: false,
},
{
name: "abc//",
isUnsupported: true,
},
{
name: "/foo",
isUnsupported: true,
},
{
name: "/",
isUnsupported: true,
},
}

for _, tc := range cases {
ts.Run(fmt.Sprintf("name=%s", tc.name), func() {
assert.Equal(ts.T(), tc.isUnsupported, isUnsupportedObjectName(tc.name))
})
}
}

func (t *GcsUtilTest) Test_RemoveUnsupportedObjectsFromListing() {
createObject := func(name string) *gcs.Object {
return &gcs.Object{Name: name}
}
createObjects := func(names []string) []*gcs.Object {
objects := []*gcs.Object{}
for _, name := range names {
objects = append(objects, createObject(name))
}
return objects
}
origGcsListing := &gcs.Listing{
CollapsedRuns: []string{"/", "a/", "b//", "c/d/", "e//f/", "g/h//"},
Objects: createObjects([]string{"a", "/b", "c/d", "e//f", "g/h//i"}),
ContinuationToken: "hfdwefo",
}
expectedNewGcsListing := &gcs.Listing{
CollapsedRuns: []string{"a/", "c/d/"},
Objects: createObjects([]string{"a", "c/d"}),
ContinuationToken: "hfdwefo",
}
expectedRemovedGcsListing := &gcs.Listing{
CollapsedRuns: []string{"/", "b//", "e//f/", "g/h//"},
Objects: createObjects([]string{"/b", "e//f", "g/h//i"}),
}

newGcsListing, removedGcsListing := RemoveUnsupportedObjectsFromListing(origGcsListing)

assert.Equal(t.T(), *expectedNewGcsListing, *newGcsListing)
assert.Equal(t.T(), *expectedRemovedGcsListing, *removedGcsListing)
}

0 comments on commit 6c980c4

Please sign in to comment.