Skip to content

Commit

Permalink
Eliminate duplicate station names
Browse files Browse the repository at this point in the history
  • Loading branch information
mtopolnik committed Jan 5, 2024
1 parent 712dccd commit 0057011
Showing 1 changed file with 21 additions and 8 deletions.
29 changes: 21 additions & 8 deletions src/main/java/dev/morling/onebrc/CreateMeasurements3.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.concurrent.ThreadLocalRandom;

public class CreateMeasurements3 {
Expand Down Expand Up @@ -78,6 +80,7 @@ private static ArrayList<WeatherStation> generateWeatherStations() throws Except
}
}
final var weatherStations = new ArrayList<WeatherStation>();
final var names = new HashSet<String>();
var minLen = Integer.MAX_VALUE;
var maxLen = Integer.MIN_VALUE;
try (var rows = new BufferedReader(new FileReader("data/weather_stations.csv"))) {
Expand All @@ -104,18 +107,15 @@ private static ArrayList<WeatherStation> generateWeatherStations() throws Except
}
var name = new String(buf, 0, nameLen).trim();
while (name.length() < nameLen) {
var n = nameSource.read();
if (n == -1) {
throw new Exception("Name source exhausted");
}
var ch = (char) n;
if (ch != ' ') {
name += ch;
}
name += readNonSpace(nameSource);
}
while (names.contains(name)) {
name = name.substring(1) + readNonSpace(nameSource);
}
if (name.indexOf(';') != -1) {
throw new Exception("Station name contains a semicolon!");
}
names.add(name);
var lat = Float.parseFloat(row.substring(row.indexOf(';') + 1));
// Guesstimate mean temperature using cosine of latitude
var avgTemp = (float) (30 * Math.cos(Math.toRadians(lat))) - 10;
Expand All @@ -125,4 +125,17 @@ private static ArrayList<WeatherStation> generateWeatherStations() throws Except
System.out.format("Generated %,d station names with length from %,d to %,d%n", KEYSET_SIZE, minLen, maxLen);
return weatherStations;
}

private static char readNonSpace(StringReader nameSource) throws IOException {
while (true) {
var n = nameSource.read();
if (n == -1) {
throw new IOException("Name source exhausted");
}
var ch = (char) n;
if (ch != ' ') {
return ch;
}
}
}
}

0 comments on commit 0057011

Please sign in to comment.