Skip to content

Commit

Permalink
Merge branch 'main' into adreed/logClosePanic
Browse files Browse the repository at this point in the history
  • Loading branch information
gapra-msft authored Oct 21, 2024
2 parents 057479c + a16fe8a commit 5d80a2a
Show file tree
Hide file tree
Showing 27 changed files with 593 additions and 134 deletions.
4 changes: 4 additions & 0 deletions cmd/copyEnumeratorInit.go
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,10 @@ func (cca *CookedCopyCmdArgs) MakeEscapedRelativePath(source bool, dstIsDir bool
return "" // ignore path encode rules
}

if object.relativePath == "\x00" { // Short circuit, our relative path is requesting root/
return "\x00"
}

// source is a EXACT path to the file
if object.isSingleSourceFile() {
// If we're finding an object from the source, it returns "" if it's already got it.
Expand Down
2 changes: 1 addition & 1 deletion cmd/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ func getPath(containerName, relativePath string, level LocationLevel, entityType
builder.WriteString(containerName + "/")
}
builder.WriteString(relativePath)
if entityType == common.EEntityType.Folder() {
if entityType == common.EEntityType.Folder() && !strings.HasSuffix(relativePath, "/") {
builder.WriteString("/")
}
return builder.String()
Expand Down
5 changes: 5 additions & 0 deletions cmd/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type rawSyncCmdArgs struct {
compareHash string
localHashStorageMode string

includeDirectoryStubs bool // Includes hdi_isfolder objects in the sync even w/o preservePermissions.
preservePermissions bool
preserveSMBPermissions bool // deprecated and synonymous with preservePermissions
preserveOwner bool
Expand Down Expand Up @@ -368,6 +369,8 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) {

cooked.deleteDestinationFileIfNecessary = raw.deleteDestinationFileIfNecessary

cooked.includeDirectoryStubs = raw.includeDirectoryStubs

return cooked, nil
}

Expand Down Expand Up @@ -417,6 +420,7 @@ type cookedSyncCmdArgs struct {
putBlobSize int64
forceIfReadOnly bool
backupMode bool
includeDirectoryStubs bool

// commandString hold the user given command which is logged to the Job log file
commandString string
Expand Down Expand Up @@ -789,6 +793,7 @@ func init() {
rootCmd.AddCommand(syncCmd)
syncCmd.PersistentFlags().BoolVar(&raw.recursive, "recursive", true, "True by default, look into sub-directories recursively when syncing between directories. (default true).")
syncCmd.PersistentFlags().StringVar(&raw.fromTo, "from-to", "", "Optionally specifies the source destination combination. For Example: LocalBlob, BlobLocal, LocalFile, FileLocal, BlobFile, FileBlob, etc.")
syncCmd.PersistentFlags().BoolVar(&raw.includeDirectoryStubs, "include-directory-stub", false, "False by default, includes blobs with the hdi_isfolder metadata in the transfer.")

// TODO: enable for copy with IfSourceNewer
// smb info/permissions can be persisted in the scenario of File -> File
Expand Down
4 changes: 2 additions & 2 deletions cmd/syncEnumerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s
}
}

includeDirStubs := cca.fromTo.From().SupportsHnsACLs() && cca.fromTo.To().SupportsHnsACLs() && cca.preservePermissions.IsTruthy()
includeDirStubs := (cca.fromTo.From().SupportsHnsACLs() && cca.fromTo.To().SupportsHnsACLs() && cca.preservePermissions.IsTruthy()) || cca.includeDirectoryStubs

// TODO: enable symlink support in a future release after evaluating the implications
// TODO: Consider passing an errorChannel so that enumeration errors during sync can be conveyed to the caller.
Expand Down Expand Up @@ -129,7 +129,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s
}

// decide our folder transfer strategy
fpo, folderMessage := NewFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), false, strings.EqualFold(cca.destination.Value, common.Dev_Null), false) // sync always acts like stripTopDir=true
fpo, folderMessage := NewFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), false, strings.EqualFold(cca.destination.Value, common.Dev_Null), cca.includeDirectoryStubs) // sync always acts like stripTopDir=true
if !cca.dryrunMode {
glcm.Info(folderMessage)
}
Expand Down
20 changes: 13 additions & 7 deletions cmd/zc_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,20 @@ func (s *copyTransferProcessor) scheduleCopyTransfer(storedObject StoredObject)

// Escape paths on destinations where the characters are invalid
// And re-encode them where the characters are valid.
srcRelativePath := pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, true)
dstRelativePath := pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, false)
if srcRelativePath != "" {
srcRelativePath = "/" + srcRelativePath
}
if dstRelativePath != "" {
dstRelativePath = "/" + dstRelativePath
var srcRelativePath, dstRelativePath string
if storedObject.relativePath == "\x00" { // Short circuit when we're talking about root/, because the STE is funky about this.
srcRelativePath, dstRelativePath = storedObject.relativePath, storedObject.relativePath
} else {
srcRelativePath = pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, true)
dstRelativePath = pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, false)
if srcRelativePath != "" {
srcRelativePath = "/" + srcRelativePath
}
if dstRelativePath != "" {
dstRelativePath = "/" + dstRelativePath
}
}

copyTransfer, shouldSendToSte := storedObject.ToNewCopyTransfer(false, srcRelativePath, dstRelativePath, s.preserveAccessTier, s.folderPropertiesOption, s.symlinkHandlingType)

if s.copyJobTemplate.FromTo.To() == common.ELocation.None() {
Expand Down
82 changes: 58 additions & 24 deletions cmd/zc_traverser_blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ func (t *blobTraverser) IsDirectory(isSource bool) (bool, error) {
}

// All sources and DFS-destinations we'll look further

_, _, isDirStub, blobErr := t.getPropertiesIfSingleBlob()
// This call is fine, because there is no trailing / here-- If there's a trailing /, this is surely referring
_, _, isDirStub, _, blobErr := t.getPropertiesIfSingleBlob()

// We know for sure this is a single blob still, let it walk on through to the traverser.
if bloberror.HasCode(blobErr, bloberror.BlobUsesCustomerSpecifiedEncryption) {
Expand Down Expand Up @@ -127,36 +127,45 @@ func (t *blobTraverser) IsDirectory(isSource bool) (bool, error) {
return true, nil
}

func (t *blobTraverser) getPropertiesIfSingleBlob() (response *blob.GetPropertiesResponse, isBlob bool, isDirStub bool, err error) {
func (t *blobTraverser) getPropertiesIfSingleBlob() (response *blob.GetPropertiesResponse, isBlob bool, isDirStub bool, blobName string, err error) {
// trim away the trailing slash before we check whether it's a single blob
// so that we can detect the directory stub in case there is one
blobURLParts, err := blob.ParseURL(t.rawURL)
if err != nil {
return nil, false, false, err
return nil, false, false, "", err
}
blobURLParts.BlobName = strings.TrimSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING)

if blobURLParts.BlobName == "" {
// This is a container, which needs to be given a proper listing.
return nil, false, false, nil
return nil, false, false, "", nil
}

/*
If the user specified a trailing /, they may mean:
A) `folder/` with `hdi_isfolder`, this is intentional.
B) `folder` with `hdi_isfolder`
C) a virtual directory with children, but no stub
*/

retry:
blobClient, err := createBlobClientFromServiceClient(blobURLParts, t.serviceClient)
if err != nil {
return nil, false, false, err
return nil, false, false, blobURLParts.BlobName, err
}
props, err := blobClient.GetProperties(t.ctx, &blob.GetPropertiesOptions{CPKInfo: t.cpkOptions.GetCPKInfo()})

// if there was no problem getting the properties, it means that we are looking at a single blob
if err == nil {
if gCopyUtil.doesBlobRepresentAFolder(props.Metadata) {
return &props, false, true, nil
}

return &props, true, false, err
if err != nil && strings.HasSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING) {
// Trim & retry, maybe the directory stub is DFS style.
blobURLParts.BlobName = strings.TrimSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING)
goto retry
} else if err == nil {
// We found the target blob, great! Let's return the details.
isDir := gCopyUtil.doesBlobRepresentAFolder(props.Metadata)
return &props, !isDir, isDir, blobURLParts.BlobName, nil
}

return nil, false, false, err
// We found nothing.
return nil, false, false, "", err
}

func (t *blobTraverser) getBlobTags() (common.BlobTags, error) {
Expand Down Expand Up @@ -190,7 +199,7 @@ func (t *blobTraverser) Traverse(preprocessor objectMorpher, processor objectPro
}

// check if the url points to a single blob
blobProperties, isBlob, isDirStub, err := t.getPropertiesIfSingleBlob()
blobProperties, isBlob, isDirStub, blobName, err := t.getPropertiesIfSingleBlob()

var respErr *azcore.ResponseError
if errors.As(err, &respErr) {
Expand Down Expand Up @@ -223,11 +232,16 @@ func (t *blobTraverser) Traverse(preprocessor objectMorpher, processor objectPro
azcopyScanningLogger.Log(common.LogDebug, fmt.Sprintf("Root entity type: %s", getEntityType(blobProperties.Metadata)))
}

relPath := ""
if strings.HasSuffix(blobName, "/") {
relPath = "\x00" // Because the ste will trim the / suffix from our source, or we may not already have it.
}

blobPropsAdapter := blobPropertiesResponseAdapter{blobProperties}
storedObject := newStoredObject(
preprocessor,
getObjectNameOnly(strings.TrimSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING)),
"",
getObjectNameOnly(blobName),
relPath,
getEntityType(blobPropsAdapter.Metadata),
blobPropsAdapter.LastModified(),
blobPropsAdapter.ContentLength(),
Expand Down Expand Up @@ -339,15 +353,27 @@ func (t *blobTraverser) parallelList(containerClient *container.Client, containe

if t.includeDirectoryStubs {
// try to get properties on the directory itself, since it's not listed in BlobItems
blobClient := containerClient.NewBlobClient(strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING))
dName := strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)
blobClient := containerClient.NewBlobClient(dName)
altNameCheck:
pResp, err := blobClient.GetProperties(t.ctx, nil)
pbPropAdapter := blobPropertiesResponseAdapter{&pResp}
folderRelativePath := strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)
folderRelativePath = strings.TrimPrefix(folderRelativePath, searchPrefix)
if err == nil {
if !t.doesBlobRepresentAFolder(pResp.Metadata) { // We've picked up on a file *named* the folder, not the folder itself. Does folder/ exist?
if !strings.HasSuffix(dName, "/") {
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
dName += common.AZCOPY_PATH_SEPARATOR_STRING
goto altNameCheck // "foo" is a file, what about "foo/"?
}

goto skipDirAdd // We shouldn't add a blob that isn't a folder as a folder. You either have the folder metadata, or you don't.
}

pbPropAdapter := blobPropertiesResponseAdapter{&pResp}
folderRelativePath := strings.TrimPrefix(dName, searchPrefix)

storedObject := newStoredObject(
preprocessor,
getObjectNameOnly(strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)),
getObjectNameOnly(dName),
folderRelativePath,
common.EEntityType.Folder(),
pbPropAdapter.LastModified(),
Expand All @@ -371,7 +397,15 @@ func (t *blobTraverser) parallelList(containerClient *container.Client, containe
}

enqueueOutput(storedObject, err)
} else {
// There was nothing there, but is there folder/?
if !strings.HasSuffix(dName, "/") {
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
dName += common.AZCOPY_PATH_SEPARATOR_STRING
goto altNameCheck // "foo" is a file, what about "foo/"?
}
}
skipDirAdd:
}
}
}
Expand Down Expand Up @@ -487,7 +521,7 @@ func (t *blobTraverser) createStoredObjectForBlob(preprocessor objectMorpher, bl

func (t *blobTraverser) doesBlobRepresentAFolder(metadata map[string]*string) bool {
util := copyHandlerUtil{}
return util.doesBlobRepresentAFolder(metadata) && !(t.includeDirectoryStubs && t.recursive)
return util.doesBlobRepresentAFolder(metadata) // We should ignore these, because we pick them up in other ways.
}

func (t *blobTraverser) serialList(containerClient *container.Client, containerName string, searchPrefix string,
Expand Down
3 changes: 3 additions & 0 deletions common/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ func GenerateFullPath(rootPath, childPath string) string {
// if the childPath is empty, it means the rootPath already points to the desired entity
if childPath == "" {
return rootPath
} else if childPath == "\x00" { // The enumerator has asked us to target with a / at the end of our root path. This is a massive hack. When the footgun happens later, ping Adele!
return rootPath + rootSeparator
}

// otherwise, make sure a path separator is inserted between the rootPath if necessary
Expand All @@ -167,6 +169,7 @@ func GenerateFullPathWithQuery(rootPath, childPath, extraQuery string) string {
// Block Names of blobs are of format noted below.
// <5B empty placeholder> <16B GUID of AzCopy re-interpreted as string><5B PartNum><5B Index in the jobPart><5B blockNum>
const AZCOPY_BLOCKNAME_LENGTH = 48

func GenerateBlockBlobBlockID(blockNamePrefix string, index int32) string {
blockID := []byte(fmt.Sprintf("%s%05d", blockNamePrefix, index))
return base64.StdEncoding.EncodeToString(blockID)
Expand Down
43 changes: 43 additions & 0 deletions e2etest/newe2e_generic_wrangling.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,22 @@ func ListOfAny[T any](in []T) []any {
return out
}

func Keys[K comparable, V any](in map[K]V) []K {
out := make([]K, 0, len(in))
for k, _ := range in {
out = append(out, k)
}
return out
}

func AnyKeys[K comparable, V any](in map[K]V) []any {
out := make([]any, 0, len(in))
for k, _ := range in {
out = append(out, k)
}
return out
}

func CloneMap[K comparable, V any](in map[K]V) map[K]V {
out := make(map[K]V)

Expand All @@ -98,6 +114,23 @@ func CloneMap[K comparable, V any](in map[K]V) map[K]V {
return out
}

func CloneMapWithRule[K comparable, V any](in map[K]V, rule func(K, V) (key K, value V, include bool)) map[K]V {
out := make(map[K]V)

for k, v := range in {
var include bool
k, v, include = rule(k, v)

if !include {
continue
}

out[k] = v
}

return out
}

func ListContains[I comparable](item I, in []I) bool {
for _, v := range in {
if item == v {
Expand All @@ -108,6 +141,16 @@ func ListContains[I comparable](item I, in []I) bool {
return false
}

func Any[I any](items []I, f func(I) bool) bool {
for _, v := range items {
if f(v) {
return true
}
}

return false
}

func ClonePointer[T any](in *T) *T {
if in == nil {
return nil
Expand Down
45 changes: 42 additions & 3 deletions e2etest/newe2e_object_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"github.com/Azure/azure-storage-azcopy/v10/common"
"io"
"math/rand"
"strconv"
"time"
)

type ObjectContentContainer interface {
Expand All @@ -25,10 +27,47 @@ func SizeFromString(objectSize string) int64 {
return longSize
}

func NewRandomObjectContentContainer(a Asserter, size int64) ObjectContentContainer {
var megaSize = []string{
"B",
"KB",
"MB",
"GB",
"TB",
"PB",
"EB",
}

func SizeToString(size int64, megaUnits bool) string {
units := []string{
"B",
"KiB",
"MiB",
"GiB",
"TiB",
"PiB",
"EiB", // Let's face it, a file, account, or container probably won't be more than 1000 exabytes in YEARS.
// (and int64 literally isn't large enough to handle too many exbibytes. 128 bit processors when)
}
unit := 0
floatSize := float64(size)
gigSize := 1024

if megaUnits {
gigSize = 1000
units = megaSize
}

for floatSize/float64(gigSize) >= 1 {
unit++
floatSize /= float64(gigSize)
}

return strconv.FormatFloat(floatSize, 'f', 2, 64) + " " + units[unit]
}

func NewRandomObjectContentContainer(size int64) ObjectContentContainer {
buf := make([]byte, size)
_, err := rand.Read(buf)
a.NoError("Generate random data", err)
_, _ = rand.New(rand.NewSource(time.Now().Unix())).Read(buf)
return &ObjectContentContainerBuffer{buf}
}

Expand Down
Loading

0 comments on commit 5d80a2a

Please sign in to comment.