Skip to content

Commit

Permalink
StringFormat supports {...}
Browse files Browse the repository at this point in the history
  • Loading branch information
fluentfuture committed Jun 5, 2023
1 parent 196882c commit 2bc3322
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 27 deletions.
70 changes: 43 additions & 27 deletions mug/src/main/java/com/google/mu/util/StringFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static com.google.mu.util.Substring.before;
import static com.google.mu.util.Substring.first;
import static com.google.mu.util.Substring.suffix;
import static com.google.mu.util.Substring.BoundStyle.INCLUSIVE;
import static com.google.mu.util.stream.MoreCollectors.combining;
import static com.google.mu.util.stream.MoreCollectors.onlyElement;
import static java.util.Collections.unmodifiableList;
Expand All @@ -27,12 +28,21 @@
import com.google.mu.util.stream.MoreStreams;

/**
* A (lossy) reverse operation of {@link String#format} to extract placeholder values from input
* strings according to a format string. For example:
* A string parser to extract placeholder values from input strings according to a format string.
* For example:
*
* <pre>{@code
* return new StringFormat("{address}+{subaddress}@{domain}")
* .parse("[email protected]", (address, subaddress, domain) -> ...);
* .parse("[email protected]", (address, subaddress, domain) -> ...);
* }</pre>
*
* <p>Starting from 6.7, if a certain placeholder is uninteresting and you'd rather not name it,
* you can use the special {@code ...} placeholder and then you won't need to assign a lambda
* variable to capture it:
*
* <pre>{@code
* return new StringFormat("{...}+{subaddress}@{domain}")
* .parse("[email protected]", (subaddress, domain) -> ...);
* }</pre>
*
* <p>Note that except the placeholders, characters in the format string are treated as literals.
Expand Down Expand Up @@ -65,8 +75,14 @@
* @since 6.6
*/
public final class StringFormat {
private static final Substring.RepeatingPattern PLACEHOLDERS =
Substring.consecutive(c -> c != '{' && c != '}') // Find the inner-most pairs of curly braces.
.immediatelyBetween("{", INCLUSIVE, "}", INCLUSIVE)
.repeatedly();
private final String format;
private final List<String> delimiters; // The string literals between placeholders
private final List<Boolean> toCapture;
private final int numCapturingPlaceholders;
private final CharPredicate requiredChars; // null for unconstrained matches

/**
Expand Down Expand Up @@ -117,29 +133,25 @@ public StringFormat(String format) {
}

private StringFormat(String format, CharPredicate requiredChars) {
Stream.Builder<String> delimiters = Stream.builder();
Stream.Builder<Boolean> toCapture = Stream.builder();
PLACEHOLDERS.split(format)
.forEachOrdered(
literal -> {
delimiters.add(literal.toString());
toCapture.add(!format.startsWith("...}", literal.index() + literal.length() + 1));
});
this.format = format;
this.delimiters =
Substring.consecutive(c -> c != '{' && c != '}') // Find the inner-most pairs of curly braces.
.immediatelyBetween("{", "}")
.repeatedly()
.split(format)
.map(
literal ->
// Format "{key:{k}, value:{v}}" will split into ["{key:{", "}, value:{", "}}"].
// Remove the leading "}" for all except the first split results, then remove
// the trailing '{' for all except the last split results. The result is the
// exact literals around {k} and {v}: ["{key:", ", value:", "}"].
literal.skip(
literal.index() == 0 ? 0 : 1,
literal.index() + literal.length() == format.length() ? 0 : 1))
.map(Substring.Match::toString)
.collect(toImmutableList());
this.delimiters = delimiters.build().collect(toImmutableList());
this.toCapture = toCapture.build().collect(toImmutableList());
this.numCapturingPlaceholders =
this.delimiters.size() - 1 - (int) this.toCapture.stream().filter(c -> !c).count();
this.requiredChars = requiredChars;
for (int i = 1; i < numPlaceholders(); i++) {
if (delimiters.get(i).isEmpty()) {
if (this.delimiters.get(i).isEmpty()) {
throw new IllegalArgumentException("Placeholders cannot be next to each other: " + format);
}
}
this.requiredChars = requiredChars;
}

/**
Expand Down Expand Up @@ -260,8 +272,8 @@ public Optional<List<Substring.Match>> parse(String input) {
if (!input.startsWith(delimiters.get(0))) { // first literal is the prefix
return Optional.empty();
}
List<Substring.Match> builder = new ArrayList<>(numCapturingPlaceholders);
final int numPlaceholders = numPlaceholders();
List<Substring.Match> builder = new ArrayList<>(numPlaceholders);
int inputIndex = delimiters.get(0).length();
for (int i = 1; i <= numPlaceholders; i++) {
// subsequent literals are searched left-to-right; last literal is the suffix.
Expand All @@ -271,7 +283,9 @@ public Optional<List<Substring.Match>> parse(String input) {
if (placeholder == null || !isValidPlaceholderValue(placeholder)) {
return Optional.empty();
}
builder.add(placeholder);
if (toCapture.get(i - 1)) {
builder.add(placeholder);
}
inputIndex = placeholder.index() + placeholder.length() + delimiters.get(i).length();
}
return optional(inputIndex == input.length(), unmodifiableList(builder));
Expand Down Expand Up @@ -307,7 +321,7 @@ public Stream<List<Substring.Match>> scan(String input) {
return null;
}
inputIndex += delimiters.get(0).length();
List<Substring.Match> builder = new ArrayList<>(numPlaceholders);
List<Substring.Match> builder = new ArrayList<>(numCapturingPlaceholders);
for (int i = 1; i <= numPlaceholders; i++) {
String literal = delimiters.get(i);
// Always search left-to-right. The last placeholder at the end of format is suffix.
Expand All @@ -319,7 +333,9 @@ public Stream<List<Substring.Match>> scan(String input) {
if (placeholder == null) {
return null;
}
builder.add(placeholder);
if (toCapture.get(i - 1)) {
builder.add(placeholder);
}
inputIndex = placeholder.index() + placeholder.length() + literal.length();
}
if (inputIndex == input.length()) {
Expand Down Expand Up @@ -525,11 +541,11 @@ private int numPlaceholders() {
}

private void checkPlaceholderCount(int expected) {
if (numPlaceholders() != expected) {
if (numCapturingPlaceholders != expected) {
throw new IllegalArgumentException(
String.format(
"format string has %s placeholders; %s expected.",
numPlaceholders(),
numCapturingPlaceholders,
expected));
}
}
Expand Down
74 changes: 74 additions & 0 deletions mug/src/test/java/com/google/mu/util/StringFormatTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.google.mu.util;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand Down Expand Up @@ -35,12 +36,24 @@ public void parse_onlyPlaceholder(@TestParameter Mode mode) {
assertThat(format.parse("Hello Tom!", v -> v)).hasValue("Hello Tom!");
}

@Test
public void parse_onlyEllipsis(@TestParameter Mode mode) {
StringFormat format = mode.formatOf("{...}");
assertThat(format.parse("Hello Tom!")).hasValue(ImmutableList.of());
}

@Test
public void parse_singlePlaceholder(@TestParameter Mode mode) {
StringFormat format = mode.formatOf("Hello {v}!");
assertThat(format.parse("Hello Tom!", v -> v)).hasValue("Tom");
}

@Test
public void parse_singlePlaceholder_withEllipsis(@TestParameter Mode mode) {
StringFormat format = mode.formatOf("Hello {...}!");
assertThat(format.parse("Hello Tom!")).hasValue(ImmutableList.of());
}

@Test
public void parse_multiplePlaceholders(@TestParameter Mode mode) {
StringFormat format = mode.formatOf("Hello {person}, welcome to {place}!");
Expand All @@ -51,6 +64,21 @@ public void parse_multiplePlaceholders(@TestParameter Mode mode) {
.inOrder();
}

@Test
public void parse_multiplePlaceholders_withEllipsis(@TestParameter Mode mode) {
StringFormat format = mode.formatOf("Hello {...}, welcome to {place}!");
assertThat(
format.parse("Hello Gandolf, welcome to Isengard!").get().stream()
.map(Object::toString))
.containsExactly("Isengard");
}

@Test
public void parse_multiplePlaceholders_withEllipsis_usingLambda(@TestParameter Mode mode) {
StringFormat format = mode.formatOf("Hello {...}, welcome to {place}!");
assertThat(format.parse("Hello Gandolf, welcome to Isengard!", p -> p)).hasValue("Isengard");
}

@Test
public void parse_multiplePlaceholdersWithSameName(@TestParameter Mode mode) {
StringFormat format = mode.formatOf("Hello {name} and {name}!");
Expand Down Expand Up @@ -350,6 +378,15 @@ public void scan_singlePlaceholder(@TestParameter Mode mode) {
.inOrder();
}

@Test
public void scan_singlePlaceholder_withEllipsis(@TestParameter Mode mode) {
assertThat(mode.formatOf("[id={...}]").scan("id=1")).isEmpty();
assertThat(mode.formatOf("[id={...}]").scan("[id=foo]")).containsExactly(ImmutableList.of());
assertThat(mode.formatOf("[id={...}]").scan("[id=foo][id=bar]"))
.containsExactly(ImmutableList.of(), ImmutableList.of())
.inOrder();
}

@Test
public void scan_singlePlaceholder_emptyInput(@TestParameter Mode mode) {
assertThat(mode.formatOf("[id={id}]").scan("", id -> id)).isEmpty();
Expand Down Expand Up @@ -390,6 +427,32 @@ public void scan_twoPlaceholders(@TestParameter Mode mode) {
.inOrder();
}

@Test
public void scan_twoPlaceholders_withEllipsis(@TestParameter Mode mode) {
assertThat(
mode.formatOf("[id={...}, name={name}]")
.scan("[id=foo, name=bar]")
.map(l -> l.stream().map(Substring.Match::toString).collect(toImmutableList())))
.containsExactly(ImmutableList.of("bar"));
assertThat(
mode.formatOf("[id={...}, name={name}]")
.scan("[id=, name=bar][id=zoo, name=boo]")
.map(l -> l.stream().map(Substring.Match::toString).collect(toImmutableList())))
.containsExactly(ImmutableList.of("bar"), ImmutableList.of("boo"))
.inOrder();
}

@Test
public void scan_twoPlaceholders_withEllipsis_usingLambda(@TestParameter Mode mode) {
assertThat(mode.formatOf("[id={id}, name={...}]").scan("[id=foo, name=bar]", id -> id))
.containsExactly("foo");
assertThat(
mode.formatOf("[id={...}, name={name}]")
.scan("[id=, name=bar][id=zoo, name=boo]", name -> name))
.containsExactly("bar", "boo")
.inOrder();
}

@Test
public void scan_twoPlaceholders_nullFilteredOut(@TestParameter Mode mode) {
assertThat(
Expand Down Expand Up @@ -620,6 +683,12 @@ public void scan_singlePlaceholderOnly(@TestParameter Mode mode) {
assertThat(mode.formatOf("{s}").scan("whatever", s -> s)).containsExactly("whatever");
}

@Test
public void scan_singleEllipsisOnly(@TestParameter Mode mode) {
assertThat(mode.formatOf("{...}").scan("whatever")).containsExactly(ImmutableList.of());
assertThat(new StringFormat("{...}").scan("")).containsExactly(ImmutableList.of());
}

@Test
public void scan_singlePlaceholderOnly_emptyInput() {
assertThat(new StringFormat("{s}").scan("", s -> s)).containsExactly("");
Expand Down Expand Up @@ -672,6 +741,11 @@ public void format_placeholdersFilled(@TestParameter Mode mode) {
assertThat(mode.formatOf("{a} + {b} = {c}").format(1, 2, 3)).isEqualTo("1 + 2 = 3");
}

@Test
public void format_ellipsisFilled(@TestParameter Mode mode) {
assertThat(mode.formatOf("{a} + {b} = {...}").format(1, 2, 3)).isEqualTo("1 + 2 = 3");
}

@Test
public void format_nullValueAllowed(@TestParameter Mode mode) {
assertThat(mode.formatOf("{key} == {value}").format("x", null)).isEqualTo("x == null");
Expand Down

0 comments on commit 2bc3322

Please sign in to comment.