Skip to content

Commit

Permalink
Add Substring.RepeatingPattern.cut()
Browse files Browse the repository at this point in the history
  • Loading branch information
fluentfuture committed Dec 10, 2023
1 parent d545762 commit fecebc8
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public static ParameterizedQuery of(@CompileTimeConstant String query, Object...
}

/**
* Returns a template of {@iink QueryJobConfiguration} based on the {@code template} string.
* Returns a template of {@link QueryJobConfiguration} based on the {@code template} string.
*
* <p>For example:
*
Expand Down
54 changes: 54 additions & 0 deletions mug/src/main/java/com/google/mu/util/Substring.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package com.google.mu.util;

import static com.google.mu.util.InternalCollectors.toImmutableList;
import static com.google.mu.util.stream.MoreStreams.whileNotNull;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.util.Comparator.comparingInt;
Expand Down Expand Up @@ -1888,6 +1889,59 @@ public Stream<Match> splitThenTrim(String string) {
return split(string).map(Match::trim);
}

/**
* Returns a stream of {@code Match} objects from the input {@code string} as demarcated by this
* delimiter pattern. It's similar to {@link #split} but includes both the substrings split by
* the delimiters and the delimiter substrings themselves, interpolated in the order they appear
* in the input string.
*
* <p>For example,
*
* <pre>{@code
* spanningInOrder("{", "}").repeatedly().cut("Dear {user}: please {act}.")
* }</pre>
*
* will result in the stream of {@code ["Dear ", "{user}", ": please ", "{act}", "."]}.
*
* <p>The returned {@code Match} objects are cheap "views" of the matched substring sequences.
* Because {@code Match} implements {@code CharSequence}, the returned {@code Match} objects can
* be directly passed to {@code CharSequence}-accepting APIs such as {@link
* CharMatcher#trimFrom}, {@link Pattern#splitThenTrim}, etc.
*
* @since 7.1
*/
public Stream<Match> cut(String string) {
Iterator<Match> delimiters = match(string).iterator();
return whileNotNull(
new Supplier<Match>() {
Match delimiter = null;
int next = 0;

@Override
public Match get() {
if (next == -1) {
return null;
}
if (delimiter == null) { // Should return the substring before the next delimiter.
if (delimiters.hasNext()) {
delimiter = delimiters.next();
Match result = Match.nonBacktrackable(string, next, delimiter.index() - next);
next = delimiter.endIndex;
return result;
} else {
Match result = Match.nonBacktrackable(string, next, string.length() - next);
next = -1;
return result;
}
}
// should return delimiter
Match result = delimiter;
delimiter = null;
return result;
}
});
}

/**
* Returns a {@link BiStream} of key value pairs from {@code input}.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ void splitsThenTrimsDistinctTo(String... parts) {
.inOrder();
}

void cutsTo(String... parts) {
assertThat(
pattern.repeatedly().cut(input).map(Substring.Match::toString).limit(parts.length + 10))
.containsExactlyElementsIn(asList(parts))
.inOrder();
}

void twoWaySplitsTo(String left, String right) {
assertThat(pattern.split(input).map((a, b) -> a)).hasValue(left);
assertThat(pattern.split(input).map((a, b) -> b)).hasValue(right);
Expand Down
32 changes: 32 additions & 0 deletions mug/src/test/java/com/google/mu/util/SubstringPatternTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,38 @@ public void repeatedly_splitThenTrim_match() {
assertPattern(first("/"), " foo/bar/").splitsThenTrimsTo("foo", "bar", "");
}

@Test
public void repeatedly_cut_noMatch() {
assertPattern(first("://"), "abc").cutsTo("abc");
}

@Test
public void repeatedly_cut_match() {
assertPattern(first("//"), "//foo").cutsTo("", "//", "foo");
assertPattern(first("/"), "foo/bar").cutsTo("foo", "/", "bar");
assertPattern(first("/"), "foo/bar/").cutsTo("foo", "/", "bar", "/", "");
}

@Test
public void repeatedly_cut_byBetweenPattern() {
Substring.Pattern comment = Substring.between(before(first("/*")), after(first("*/")));
assertPattern(comment, "a").cutsTo("a");
assertPattern(comment, "a/*comment*/").cutsTo("a", "/*comment*/", "");
assertPattern(comment, "a/*comment*/b").cutsTo("a", "/*comment*/", "b");
assertPattern(comment, "a/*c1*/b/*c2*/").cutsTo("a", "/*c1*/", "b", "/*c2*/", "");
assertPattern(comment, "a/*c1*/b/*c2*/c").cutsTo("a", "/*c1*/", "b", "/*c2*/", "c");
}

@Test
public void repeatedly_cut_beginning() {
assertPattern(BEGINNING, "foo").cutsTo("", "", "f", "", "o", "", "o", "", "");
}

@Test
public void repeatedly_cut_end() {
assertPattern(END, "foo").cutsTo("foo", "", "");
}

@Test
public void after_noMatch() {
assertPattern(Substring.after(first("//")), "abc").findsNothing();
Expand Down

0 comments on commit fecebc8

Please sign in to comment.