From d4c20946f26fe1e7508d1b7b25914f662ca093fb Mon Sep 17 00:00:00 2001 From: Lucas McDonald Date: Thu, 5 Sep 2024 13:42:05 -0700 Subject: [PATCH] fix: Python converts native strings to Dafny UTF-16 strings (#561) --- .../AwsSdkToDafnyShapeVisitor.java | 5 ++++- .../DafnyToAwsSdkShapeVisitor.java | 5 ++++- .../DafnyToLocalServiceShapeVisitor.java | 21 ++++++------------- .../LocalServiceToDafnyShapeVisitor.java | 19 ++++++----------- 4 files changed, 20 insertions(+), 30 deletions(-) diff --git a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/AwsSdkToDafnyShapeVisitor.java b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/AwsSdkToDafnyShapeVisitor.java index b83f32e88..d2f77c24a 100644 --- a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/AwsSdkToDafnyShapeVisitor.java +++ b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/AwsSdkToDafnyShapeVisitor.java @@ -206,7 +206,10 @@ public String stringShape(StringShape shape) { return enumShape(EnumShape.fromStringShape(shape).get()); } - return "Seq(" + dataSource + ")"; + // Convert native Python string to Dafny Seq of UTF-16 characters + return "Seq(''.join([chr(int.from_bytes(pair, 'big')) for pair in zip(*[iter(%1$s.encode('utf-16-be'))]*2)]))".formatted( + dataSource + ); } @Override diff --git a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/DafnyToAwsSdkShapeVisitor.java b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/DafnyToAwsSdkShapeVisitor.java index ec7bbab1a..4cbc0abcb 100644 --- a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/DafnyToAwsSdkShapeVisitor.java +++ b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/awssdk/shapevisitor/DafnyToAwsSdkShapeVisitor.java @@ -193,7 +193,10 @@ public String stringShape(StringShape shape) { if (shape.hasTrait(EnumTrait.class)) { return enumShape(EnumShape.fromStringShape(shape).get()); } - return dataSource + ".VerbatimString(False)"; + // Convert Dafny Seq of UTF-16 characters to native Python string + return "b''.join(ord(c).to_bytes(2, 'big') for c in %1$s).decode('utf-16-be')".formatted( + dataSource + ); } @Override diff --git a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/DafnyToLocalServiceShapeVisitor.java b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/DafnyToLocalServiceShapeVisitor.java index b667d61b7..0f5658948 100644 --- a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/DafnyToLocalServiceShapeVisitor.java +++ b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/DafnyToLocalServiceShapeVisitor.java @@ -239,23 +239,14 @@ public String booleanShape(BooleanShape shape) { @Override public String stringShape(StringShape shape) { - // If shape has @DafnyUtf8BytesTrait, use bytes converter + // Convert Dafny Seq of UTF-8 characters to native Python string if (shape.hasTrait(DafnyUtf8BytesTrait.class)) { - writer.addStdlibImport( - "smithy_dafny_standard_library.internaldafny.generated", - "UTF8" - ); - // Decode, then convert to native type - return "''.join(UTF8.default__.Decode(%1$s).value.Elements)".formatted( - dataSource - ); + return "bytes(%1$s.Elements).decode('utf-8')".formatted(dataSource); } - // Note: Other Smithy-Dafny code generators would treat enums here, - // as Polymorph-compatible Smithy models often define an enum - // as a StringShape with an EnumTrait. - // Smithy-Dafny-Python converts these StringShapes to EnumShapes - // at model load time, so those shapes are not handled here. - return dataSource + ".VerbatimString(False)"; + // Convert Dafny Seq of UTF-16 characters to native Python string + return "b''.join(ord(c).to_bytes(2, 'big') for c in %1$s).decode('utf-16-be')".formatted( + dataSource + ); } @Override diff --git a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/LocalServiceToDafnyShapeVisitor.java b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/LocalServiceToDafnyShapeVisitor.java index ce27505f4..b9aa10511 100644 --- a/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/LocalServiceToDafnyShapeVisitor.java +++ b/codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithypython/localservice/shapevisitor/LocalServiceToDafnyShapeVisitor.java @@ -242,20 +242,13 @@ public String booleanShape(BooleanShape shape) { public String stringShape(StringShape shape) { writer.addStdlibImport("_dafny", "Seq"); if (shape.hasTrait(DafnyUtf8BytesTrait.class)) { - writer.addStdlibImport( - "smithy_dafny_standard_library.internaldafny.generated", - "UTF8" - ); - // Convert to Dafny type, then UTF8 encode - return "UTF8.default__.Encode(Seq(%1$s)).value".formatted(dataSource); + // Convert native Python string to Dafny Seq of UTF-8 bytes + return "Seq(%1$s.encode('utf-8'))".formatted(dataSource); } - // Note: Other Smithy-Dafny code generators would treat enums here, - // as Polymorph-compatible Smithy models often define an enum - // as a StringShape with an EnumTrait. - // Smithy-Dafny-Python converts these StringShapes to EnumShapes - // at model load time, so those shapes are not handled here. - writer.addStdlibImport("_dafny", "Seq"); - return "Seq(" + dataSource + ")"; + // Convert native Python string to Dafny Seq of UTF-16 characters + return "Seq(''.join([chr(int.from_bytes(pair, 'big')) for pair in zip(*[iter(%1$s.encode('utf-16-be'))]*2)]))".formatted( + dataSource + ); } @Override