From f92f5128409b047acd456b7f3bea6f02d2c2cc77 Mon Sep 17 00:00:00 2001 From: rakow Date: Thu, 11 Jul 2024 22:17:27 +0200 Subject: [PATCH] improve diverse plan generator and models --- input/v6.3/params/bg_diverse_9_fixed.yaml | 42 ++++++ input/v6.3/params/mxl_diverse_9.yaml | 34 +++++ .../prepare/choices/ComputePlanChoices.java | 2 +- .../prepare/choices/DiversePlanGenerator.java | 35 ++++- .../prepare/choices/RandomPlanGenerator.java | 2 + .../matsim/prepare/network/ModifyNetwork.java | 134 ++++++++++++++++++ .../estimate_biogeme_plan_choice.py | 59 +++++--- .../estimate_mixed_plan_choice.py | 26 +++- src/main/sh/runPlanChoices.sh | 19 +-- 9 files changed, 317 insertions(+), 36 deletions(-) create mode 100644 input/v6.3/params/bg_diverse_9_fixed.yaml create mode 100644 input/v6.3/params/mxl_diverse_9.yaml create mode 100644 src/main/java/org/matsim/prepare/network/ModifyNetwork.java diff --git a/input/v6.3/params/bg_diverse_9_fixed.yaml b/input/v6.3/params/bg_diverse_9_fixed.yaml new file mode 100644 index 00000000..582625c8 --- /dev/null +++ b/input/v6.3/params/bg_diverse_9_fixed.yaml @@ -0,0 +1,42 @@ +# Results for model plan-choices-diverse_9_fixed_ascs +# Nbr of parameters: 4 +# Sample size: 8259 +# Excluded data: 0 +# Null log likelihood: -16584.31 +# Final log likelihood: -18747.14 +# Likelihood ratio test (null): -4325.659 +# Rho square (null): -0.13 +# Rho bar square (null): -0.131 +# Akaike Information Criterion: 37502.28 +# Bayesian Information Criterion: 37530.35 +scoring: + scoringParameters: + - modeParams: + - mode: walk + constant: 0 + - mode: car + constant: -0.5341414592094356 + dailyMonetaryConstant: -14.30 + dailyUtilityConstant: 6.078136 + - mode: pt + constant: 0.3971116 + - mode: bike + constant: -1.3538876325 + - mode: ride + constant: -1.23976957093642 +advancedScoring: + scoringParameters: + - subpopulation: person + modeParams: + - mode: car + deltaDailyConstant: 13.368402 + varDailyConstant: truncatedNormal + - mode: bike + deltaConstant: 2.040308 + varConstant: normal + - mode: pt + deltaConstant: 3.609201 + varConstant: normal + - mode: ride + deltaConstant: 0.968879 + varConstant: normal \ No newline at end of file diff --git a/input/v6.3/params/mxl_diverse_9.yaml b/input/v6.3/params/mxl_diverse_9.yaml new file mode 100644 index 00000000..16ddd53e --- /dev/null +++ b/input/v6.3/params/mxl_diverse_9.yaml @@ -0,0 +1,34 @@ +# Log-Likelihood= -14744.630 +# AIC= 29505.259 +# BIC= 29561.412 +scoring: + scoringParameters: + - modeParams: + - mode: walk + constant: 0 + - mode: car + constant: 0 + dailyMonetaryConstant: -14.30 + dailyUtilityConstant: 8.9634862 + - mode: pt + constant: 0.2118402 + - mode: bike + constant: -1.4056730 + - mode: ride + constant: -2.0662459 +advancedScoring: + scoringParameters: + - subpopulation: person + modeParams: + - mode: car + deltaDailyConstant: 5.2291379 + varDailyConstant: truncatedNormal + - mode: bike + deltaConstant: 0.6887108 + varConstant: normal + - mode: pt + deltaConstant: 1.7932632 + varConstant: normal + - mode: ride + deltaConstant: 1.2994295 + varConstant: normal \ No newline at end of file diff --git a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java index 330246dd..758827ae 100644 --- a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java +++ b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java @@ -181,7 +181,7 @@ public Integer call() throws Exception { String out = output.toString().replace(".csv", "-%s_%d.csv".formatted(planCandidates, topK)); - if (timeUtil && planCandidates == PlanCandidates.bestK) { + if (timeUtil && (planCandidates == PlanCandidates.bestK || planCandidates == PlanCandidates.diverse)) { out = out.replace(".csv", "-tt-only.csv"); } diff --git a/src/main/java/org/matsim/prepare/choices/DiversePlanGenerator.java b/src/main/java/org/matsim/prepare/choices/DiversePlanGenerator.java index b4a51116..5352a5ef 100644 --- a/src/main/java/org/matsim/prepare/choices/DiversePlanGenerator.java +++ b/src/main/java/org/matsim/prepare/choices/DiversePlanGenerator.java @@ -16,6 +16,7 @@ public class DiversePlanGenerator implements CandidateGenerator { private final int topK; private final TopKChoicesGenerator gen; + private final SplittableRandom rnd = new SplittableRandom(0); DiversePlanGenerator(int topK, TopKChoicesGenerator generator) { this.topK = topK; @@ -33,20 +34,42 @@ public List generate(PlanModel planModel, @Nullable Set c List candidates = new ArrayList<>(); boolean carUser = PersonUtils.canUseCar(planModel.getPerson()); - Set modes = new HashSet<>(consideredModes); - modes.remove(carUser ? "ride": "car"); - for (String mode : modes) { + if (!carUser && mode.equals("car")) + continue; + List tmp = gen.generate(planModel, Set.of(mode), mask); if (!tmp.isEmpty()) candidates.add(tmp.get(0)); } - Collections.sort(candidates); - candidates.add(0, existing); + candidates.addFirst(existing); + + // Add combination of modes as well + addToCandidates(candidates, gen.generate(planModel, modes, mask), carUser ? "car" : "ride", 1); + addToCandidates(candidates, gen.generate(planModel, consideredModes, mask), null, 1); - return candidates.stream().distinct().limit(topK).toList(); + // Remove the primary mode to generate remaining alternatives + modes.remove(carUser ? "car" : "ride"); + addToCandidates(candidates, gen.generate(planModel, modes, mask), null, 2); + + return candidates.stream().distinct().limit(this.topK).toList(); } + + private void addToCandidates(List candidates, List topK, String requireMode, int n) { + + topK.removeIf(candidates::contains); + + if (requireMode != null) { + topK.removeIf(c -> !c.containsMode(requireMode)); + } + + for (int i = 0; i < n; i++) { + if (topK.size() > 1) + candidates.add(topK.remove(rnd.nextInt(topK.size()))); + } + } + } diff --git a/src/main/java/org/matsim/prepare/choices/RandomPlanGenerator.java b/src/main/java/org/matsim/prepare/choices/RandomPlanGenerator.java index 4c5a8453..51d70ab8 100644 --- a/src/main/java/org/matsim/prepare/choices/RandomPlanGenerator.java +++ b/src/main/java/org/matsim/prepare/choices/RandomPlanGenerator.java @@ -33,6 +33,8 @@ public List generate(PlanModel planModel, @Nullable Set c PlanCandidate existing = gen.generatePredefined(planModel, chosen).get(0); // This changes the internal state to randomize the estimates + // TODO random selection is biased because of mass conservation + // due to that, this class should not be used for (Map.Entry> entry : planModel.getEstimates().entrySet()) { for (ModeEstimate est : entry.getValue()) { double[] utils = est.getEstimates(); diff --git a/src/main/java/org/matsim/prepare/network/ModifyNetwork.java b/src/main/java/org/matsim/prepare/network/ModifyNetwork.java new file mode 100644 index 00000000..5f240609 --- /dev/null +++ b/src/main/java/org/matsim/prepare/network/ModifyNetwork.java @@ -0,0 +1,134 @@ +package org.matsim.prepare.network; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.geotools.api.feature.simple.SimpleFeature; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.LineString; +import org.locationtech.jts.geom.Point; +import org.locationtech.jts.index.strtree.STRtree; +import org.matsim.api.core.v01.Id; +import org.matsim.api.core.v01.network.Link; +import org.matsim.api.core.v01.network.Network; +import org.matsim.api.core.v01.network.Node; +import org.matsim.application.MATSimAppCommand; +import org.matsim.application.options.ShpOptions; +import org.matsim.core.network.NetworkUtils; +import org.matsim.core.utils.geometry.geotools.MGC; +import picocli.CommandLine; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.List; +import java.util.function.ToDoubleFunction; + +@CommandLine.Command(name = "modify-network", description = "Remove or add network elements") +public class ModifyNetwork implements MATSimAppCommand { + + private static final Logger log = LogManager.getLogger(ModifyNetwork.class); + + @CommandLine.Option(names = {"--network"}, description = "Path to the network file", required = true) + private String networkPath; + + @CommandLine.Option(names = {"--remove-links"}, description = "Path to the CSV file with link ids to remove") + private Path removeCSV; + + @CommandLine.Option(names = {"--matching-distance"}, description = "Distance in meters to match links from shapefile to network nodes", defaultValue = "10.0") + private double matchingDistance; + + @CommandLine.Option(names = "--output", description = "Path to the output network file", required = true) + private Path output; + + @CommandLine.Mixin + private ShpOptions shp = new ShpOptions(); + + public static void main(String[] args) { + new ModifyNetwork().execute(args); + } + + @Override + public Integer call() throws Exception { + + Network network = NetworkUtils.readNetwork(networkPath); + + if (removeCSV != null) { + removeLinks(network); + } + + if (shp.isDefined()) { + addLinksFromShape(network); + } + + return 0; + } + + private void removeLinks(Network network) throws IOException { + for (String line : Files.readAllLines(removeCSV)) { + Id linkId = Id.createLinkId(line); + network.removeLink(linkId); + } + + // Remove links that are not connected + List> toRemove = network.getNodes().values().stream().filter(n -> n.getInLinks().isEmpty() && n.getOutLinks().isEmpty()) + .map(Node::getId) + .toList(); + + toRemove.forEach(network::removeNode); + } + + /** + * Read line geometry from a shapefile and add links to the network. + */ + private void addLinksFromShape(Network network) { + + STRtree nodeIndex = new STRtree(); + for (Node node : network.getNodes().values()) { + nodeIndex.insert(MGC.coord2Point(node.getCoord()).getEnvelopeInternal(), node); + } + nodeIndex.build(); + + List features = shp.readFeatures(); + for (SimpleFeature feature : features) { + // Find the nearest node + LineString geom = (LineString) feature.getDefaultGeometry(); + + Id linkId = Id.createLinkId(feature.getID()); + Node fromNode = matchNode(nodeIndex, geom.getCoordinateN(0)); + Node toNode = matchNode(nodeIndex, geom.getCoordinateN(geom.getNumPoints() - 1)); + + if (fromNode == toNode) { + throw new IllegalArgumentException("Link %s would have the same start and end node. Check the shapefile and improve matching to zones.".formatted(linkId)); + } + + Link link = network.getFactory().createLink(linkId, fromNode, toNode); + + // TODO: Set link attributes + + network.addLink(link); + } + } + + @SuppressWarnings("unchecked") + private Node matchNode(STRtree index, Coordinate coord) { + + Point point = MGC.coordinate2Point(coord); + + ToDoubleFunction distance = n -> NetworkUtils.getEuclideanDistance(((Node) n).getCoord(), MGC.coordinate2Coord(coord)); + + List result = index.query(point.buffer(matchingDistance).getEnvelopeInternal()) + .stream() + .map(Node.class::cast) + .filter(n -> distance.applyAsDouble(n) < matchingDistance) + .sorted(Comparator.comparingDouble(distance)) + .toList(); + + if (result.isEmpty()) { + throw new IllegalArgumentException("No node found for coordinate %s".formatted(coord)); + } + + return result.getFirst(); + } + +} diff --git a/src/main/python/choicemodels/estimate_biogeme_plan_choice.py b/src/main/python/choicemodels/estimate_biogeme_plan_choice.py index 65ddf51a..695ed668 100644 --- a/src/main/python/choicemodels/estimate_biogeme_plan_choice.py +++ b/src/main/python/choicemodels/estimate_biogeme_plan_choice.py @@ -1,8 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os import argparse +import os import biogeme.biogeme as bio import biogeme.database as db @@ -17,11 +17,15 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Estimate choice model for daily trip usage") parser.add_argument("--input", help="Path to the input file", type=str, - default="../../../../plan-choices-bestK_9-tt-only.csv") - parser.add_argument("--mxl-modes", help="Modes to use mixed logit for", nargs="+", type=set, default=["pt", "bike", "ride"]) + default="../../../../plan-choices-diverse_9-tt-only.csv") + parser.add_argument("--mxl-modes", help="Modes to use mixed logit for", nargs="+", type=set, + default=["pt", "bike", "ride"]) parser.add_argument("--est-performing", help="Estimate the beta for performing", action="store_true") parser.add_argument("--est-exp-income", help="Estimate exponent for income", action="store_true") parser.add_argument("--est-util-money", help="Estimate utility of money", action="store_true") + parser.add_argument("--ascs", help="Predefined ASCs", nargs="+", action='append', default=[]) + parser.add_argument("--car-util", help="Fixed utility for car", type=float, default=None) + parser.add_argument("--no-mxl", help="Disable mixed logit", action="store_true") args = parser.parse_args() @@ -40,7 +44,9 @@ "TN": (tn_generator, "truncated normal generator for mixed logit") }) - mixed_logit = True + fixed_ascs = {x: float(y) for x, y in args.ascs} + if fixed_ascs: + print("Using fixed ascs", fixed_ascs) # Variables for constants ASC = {} @@ -53,29 +59,41 @@ UTIL_MONEY = Beta('UTIL_MONEY', 1, 0, 2, ESTIMATE if args.est_util_money else FIXED) - BETA_PERFORMING = Beta('BETA_PERFORMING', 6.88, 0, 10, ESTIMATE if args.est_performing else FIXED) + BETA_PERFORMING = Beta('BETA_PERFORMING', 6.88, 1, 15, ESTIMATE if args.est_performing else FIXED) for mode in ds.modes: + + val = fixed_ascs.get(mode, 0) + status = FIXED if mode in ("walk", "car") or mode in fixed_ascs else ESTIMATE + # Base asc - asc = Beta(f"ASC_{mode}", 0, None, None, FIXED if mode in ("walk", "car") else ESTIMATE) + asc = Beta(f"ASC_{mode}", val, None, None, status) # Pt does not have its own random parameter - if mode not in args.mxl_modes: + if mode not in args.mxl_modes or args.no_mxl: ASC[mode] = asc else: # The random parameter SD[mode] = Beta(f"ASC_{mode}_s", 1, None, None, ESTIMATE) ASC[mode] = asc + SD[mode] * bioDraws(f"{mode}_RND", "NORMAL_ANTI") - B_UTIL = Beta('B_CAR_UTIL', 10, 0, 15, ESTIMATE) - B_UTIL_S = Beta('B_CAR_UTIL_SD', 1, 0, 15, ESTIMATE) + if args.car_util: + print("Using fixed utility for car", args.car_util) + + B_UTIL = Beta('B_CAR_UTIL', 10 if not args.car_util else args.car_util, + 0, 15, FIXED if args.car_util else ESTIMATE) - B_CAR_RND = B_UTIL + B_UTIL_S * bioDraws('B_CAR_UTIL_RND', 'TN') + if args.no_mxl: + B_CAR = B_UTIL + else: + B_UTIL_S = Beta('B_CAR_UTIL_SD', 1, 0, 15, ESTIMATE) + B_CAR = B_UTIL + B_UTIL_S * bioDraws('B_CAR_UTIL_RND', 'TN') U = {} AV = {} for i in range(1, ds.k + 1): + # Price is already negative u = v[f"plan_{i}_price"] * UTIL_MONEY * (ds.global_income / v["income"]) ** EXP_INCOME u -= v[f"plan_{i}_pt_n_switches"] @@ -84,21 +102,30 @@ u += -BETA_PERFORMING * v[f"plan_{i}_{mode}_hours"] * (2 if mode == "ride" else 1) - if mixed_logit: - u += v[f"plan_{i}_car_used"] * B_CAR_RND + u += v[f"plan_{i}_car_used"] * B_CAR U[i] = u AV[i] = v[f"plan_{i}_valid"] - if mixed_logit: + if args.no_mxl: + logprob = models.loglogit(U, AV, v["choice"]) + else: prob = models.logit(U, AV, v["choice"]) logprob = log(MonteCarlo(prob)) - else: - logprob = models.loglogit(U, AV, v["choice"]) biogeme = bio.BIOGEME(database, logprob) - biogeme.modelName = os.path.basename(args.input).replace(".csv", "") + modelName = os.path.basename(args.input).replace(".csv", "") + if args.est_performing: + modelName += "_performing" + if args.est_exp_income: + modelName += "_exp_income" + if args.est_util_money: + modelName += "_util_money" + if args.ascs: + modelName += "_fixed_ascs" + + biogeme.modelName = modelName biogeme.weight = v["weight"] biogeme.calculateNullLoglikelihood(AV) diff --git a/src/main/python/choicemodels/estimate_mixed_plan_choice.py b/src/main/python/choicemodels/estimate_mixed_plan_choice.py index 970eaba8..7a29c8c8 100644 --- a/src/main/python/choicemodels/estimate_mixed_plan_choice.py +++ b/src/main/python/choicemodels/estimate_mixed_plan_choice.py @@ -8,10 +8,26 @@ from prepare import read_plan_choices + +def analyse_mode_share(df, modes): + """ Analyse mode share of all alternatives """ + + # Only valid choices + tf = df[df.valid == 1] + + counts = {} + for mode in modes: + counts[mode] = tf[f"{mode}_usage"].sum() + + total = sum(counts.values()) + counts = {k: v / total for k, v in counts.items()} + print("Trip share of %.2f%% valid choices" % (100 * len(tf) / len(df)), counts) + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Estimate the plan choice mixed logit model") parser.add_argument("--input", help="Path to the input file", type=str, - default="../../../plan-choices-bestK_9-tt-only.csv") + default="../../../plan-choices-diverse_9.csv") parser.add_argument("--n-draws", help="Number of draws for the estimation", type=int, default=1500) parser.add_argument("--batch-size", help="Batch size for the estimation", type=int, default=None) parser.add_argument("--sample", help="Use sample of choice data", type=float, default=1) @@ -26,10 +42,12 @@ alt_list=[f"plan_{i}" for i in range(1, ds.k + 1)], empty_val=0, varying=ds.varying, alt_is_prefix=True) + analyse_mode_share(df, ds.modes) + MixedLogit.check_if_gpu_available() # ASC is present as mode_usage - varnames = [f"{mode}_usage" for mode in ds.modes if mode != "walk" and mode != "car"] + ["car_used"] + ["tt_hours"] + varnames = [f"{mode}_usage" for mode in ds.modes if mode != "walk" and mode != "car"] + ["car_used"] # varnames += ["pt_ride_hours", "car_ride_hours", "bike_ride_hours"] # varnames = ["car_used", "car_usage"] @@ -43,8 +61,8 @@ alts=df['alt'], ids=df['custom_id'], avail=df['valid'], random_state=args.seed, addit=addit, # randvars={"car_used": "tn"}, - randvars={"car_used": "tn", "tt_hours": "tn", "bike_usage": "n", "pt_usage": "n", "ride_usage": "n"}, - fixedvars={"car_used": None, "tt_hours": None}, + randvars={"car_used": "tn", "bike_usage": "n", "pt_usage": "n", "ride_usage": "n"}, + fixedvars={"car_used": None}, n_draws=args.n_draws, batch_size=args.batch_size, halton=True, skip_std_errs=True, optim_method='L-BFGS-B') diff --git a/src/main/sh/runPlanChoices.sh b/src/main/sh/runPlanChoices.sh index 7799f979..b575c2d1 100755 --- a/src/main/sh/runPlanChoices.sh +++ b/src/main/sh/runPlanChoices.sh @@ -13,13 +13,14 @@ run_eval() { } -run_eval "--plan-candidates bestK --top-k 3" -run_eval "--plan-candidates bestK --top-k 5" -run_eval "--plan-candidates bestK --top-k 9" -run_eval "--plan-candidates bestK --top-k 3 --time-util-only" -run_eval "--plan-candidates bestK --top-k 5 --time-util-only" -run_eval "--plan-candidates bestK --top-k 9 --time-util-only" -run_eval "--plan-candidates random --top-k 3" -run_eval "--plan-candidates random --top-k 5" -run_eval "--plan-candidates random --top-k 9" +#run_eval "--plan-candidates bestK --top-k 3" +#run_eval "--plan-candidates bestK --top-k 5" +#run_eval "--plan-candidates bestK --top-k 9" +#run_eval "--plan-candidates bestK --top-k 3 --time-util-only" +#run_eval "--plan-candidates bestK --top-k 5 --time-util-only" +#run_eval "--plan-candidates bestK --top-k 9 --time-util-only" +#run_eval "--plan-candidates random --top-k 3" +#run_eval "--plan-candidates random --top-k 5" +#run_eval "--plan-candidates random --top-k 9" run_eval "--plan-candidates diverse --top-k 9" +run_eval "--plan-candidates diverse --top-k 9 --time-util-only" \ No newline at end of file