Skip to content

Commit

Permalink
fix: Python converts native strings to Dafny UTF-16 strings (#561)
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasmcdonald3 authored Sep 5, 2024
1 parent a9afaac commit d4c2094
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,10 @@ public String stringShape(StringShape shape) {
return enumShape(EnumShape.fromStringShape(shape).get());
}

return "Seq(" + dataSource + ")";
// Convert native Python string to Dafny Seq of UTF-16 characters
return "Seq(''.join([chr(int.from_bytes(pair, 'big')) for pair in zip(*[iter(%1$s.encode('utf-16-be'))]*2)]))".formatted(
dataSource
);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,10 @@ public String stringShape(StringShape shape) {
if (shape.hasTrait(EnumTrait.class)) {
return enumShape(EnumShape.fromStringShape(shape).get());
}
return dataSource + ".VerbatimString(False)";
// Convert Dafny Seq of UTF-16 characters to native Python string
return "b''.join(ord(c).to_bytes(2, 'big') for c in %1$s).decode('utf-16-be')".formatted(
dataSource
);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,23 +239,14 @@ public String booleanShape(BooleanShape shape) {

@Override
public String stringShape(StringShape shape) {
// If shape has @DafnyUtf8BytesTrait, use bytes converter
// Convert Dafny Seq of UTF-8 characters to native Python string
if (shape.hasTrait(DafnyUtf8BytesTrait.class)) {
writer.addStdlibImport(
"smithy_dafny_standard_library.internaldafny.generated",
"UTF8"
);
// Decode, then convert to native type
return "''.join(UTF8.default__.Decode(%1$s).value.Elements)".formatted(
dataSource
);
return "bytes(%1$s.Elements).decode('utf-8')".formatted(dataSource);
}
// Note: Other Smithy-Dafny code generators would treat enums here,
// as Polymorph-compatible Smithy models often define an enum
// as a StringShape with an EnumTrait.
// Smithy-Dafny-Python converts these StringShapes to EnumShapes
// at model load time, so those shapes are not handled here.
return dataSource + ".VerbatimString(False)";
// Convert Dafny Seq of UTF-16 characters to native Python string
return "b''.join(ord(c).to_bytes(2, 'big') for c in %1$s).decode('utf-16-be')".formatted(
dataSource
);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,20 +242,13 @@ public String booleanShape(BooleanShape shape) {
public String stringShape(StringShape shape) {
writer.addStdlibImport("_dafny", "Seq");
if (shape.hasTrait(DafnyUtf8BytesTrait.class)) {
writer.addStdlibImport(
"smithy_dafny_standard_library.internaldafny.generated",
"UTF8"
);
// Convert to Dafny type, then UTF8 encode
return "UTF8.default__.Encode(Seq(%1$s)).value".formatted(dataSource);
// Convert native Python string to Dafny Seq of UTF-8 bytes
return "Seq(%1$s.encode('utf-8'))".formatted(dataSource);
}
// Note: Other Smithy-Dafny code generators would treat enums here,
// as Polymorph-compatible Smithy models often define an enum
// as a StringShape with an EnumTrait.
// Smithy-Dafny-Python converts these StringShapes to EnumShapes
// at model load time, so those shapes are not handled here.
writer.addStdlibImport("_dafny", "Seq");
return "Seq(" + dataSource + ")";
// Convert native Python string to Dafny Seq of UTF-16 characters
return "Seq(''.join([chr(int.from_bytes(pair, 'big')) for pair in zip(*[iter(%1$s.encode('utf-16-be'))]*2)]))".formatted(
dataSource
);
}

@Override
Expand Down

0 comments on commit d4c2094

Please sign in to comment.