diff --git a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java index 9a790299..880a1bcc 100644 --- a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java +++ b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java @@ -18,6 +18,7 @@ import org.matsim.core.config.groups.PlansConfigGroup; import org.matsim.core.controler.Controler; import org.matsim.core.controler.OutputDirectoryHierarchy; +import org.matsim.core.population.PersonUtils; import org.matsim.core.population.algorithms.ParallelPersonAlgorithmUtils; import org.matsim.core.population.algorithms.PersonAlgorithm; import org.matsim.core.router.*; @@ -60,14 +61,15 @@ public class ComputePlanChoices implements MATSimAppCommand, PersonAlgorithm { private boolean timeUtil; @CommandLine.Option(names = "--calc-scores", description = "Perform pseudo scoring for each plan", defaultValue = "false") private boolean calcScores; - @CommandLine.Option(names = "--plan-candidates", description = "Method to generate plan candidates", defaultValue = "bestK") + @CommandLine.Option(names = "--plan-candidates", description = "Method to generate plan candidates", defaultValue = "diverse") private PlanCandidates planCandidates = PlanCandidates.bestK; - - @CommandLine.Option(names = "--output", description = "Path to output csv.", required = true) + @CommandLine.Option(names = "--output", description = "Path to output csv.", defaultValue = "plan-choices.csv") private Path output; + private ThreadLocal thread; private ProgressBar pb; - private MainModeIdentifier mmi = new DefaultAnalysisMainModeIdentifier(); + private double globalAvgIncome; + private final MainModeIdentifier mmi = new DefaultAnalysisMainModeIdentifier(); public static void main(String[] args) { new ComputePlanChoices().execute(args); @@ -76,6 +78,11 @@ public static void main(String[] args) { @Override public Integer call() throws Exception { + if (!output.getFileName().toString().contains(".csv")) { + log.error("Output file must be a csv file"); + return 2; + } + Config config = this.scenario.getConfig(); config.controller().setOutputDirectory("choice-output"); config.controller().setLastIteration(0); @@ -140,19 +147,32 @@ public Integer call() throws Exception { ) ); + globalAvgIncome = population.getPersons().values().stream() + .map(PersonUtils::getIncome) + .filter(Objects::nonNull) + .mapToDouble(d -> d) + .filter(dd -> dd > 0) + .average() + .orElse(Double.NaN); + pb = new ProgressBar("Computing plan choices", population.getPersons().size()); ParallelPersonAlgorithmUtils.run(population, Runtime.getRuntime().availableProcessors(), this); pb.close(); - log.info("Writing {} choices to {}", rows.size(), output); + String out = output.toString().replace(".csv", "-%s_%d.csv".formatted(planCandidates, topK)); + + log.info("Writing {} choices to {}", rows.size(), out); - try (CSVPrinter csv = new CSVPrinter(Files.newBufferedWriter(output), CSVFormat.DEFAULT)) { + try (CSVPrinter csv = new CSVPrinter(Files.newBufferedWriter(Path.of(out)), CSVFormat.DEFAULT.builder().setCommentMarker('#').build())) { // header List header = new ArrayList<>(); header.add("person"); + header.add("weight"); + header.add("income"); + header.add("util_money"); header.add("choice"); for (int i = 1; i <= topK; i++) { @@ -168,6 +188,8 @@ public Integer call() throws Exception { header.add(String.format("plan_%d_valid", i)); } + csv.printComment("Average global income: " + globalAvgIncome); + csv.printRecord(header); for (List row : rows) { @@ -213,6 +235,10 @@ public void run(Person person) { List row = new ArrayList<>(); row.add(person.getAttributes().getAttribute(TripAnalysis.ATTR_REF_ID)); + row.add(person.getAttributes().getAttribute(TripAnalysis.ATTR_REF_WEIGHT)); + row.add(PersonUtils.getIncome(person)); + row.add(globalAvgIncome / PersonUtils.getIncome(person)); + // choice, always the first one row.add(1); diff --git a/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java b/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java index 21dcbd5d..b8e46c3a 100644 --- a/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java +++ b/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java @@ -99,24 +99,6 @@ public ScoringParameters() { super(GROUP_NAME, true); } - /** - * Checks if the given attributes match the config. If true these parameters are applicable to tbe object. - */ - public boolean matchObject(Attributes attr, Map categories) { - - for (Map.Entry e : this.getParams().entrySet()) { - // might be null if not defined - Object objValue = attr.getAttribute(e.getKey()); - String category = categories.get(e.getKey()).categorize(objValue); - - // compare as string - if (!Objects.toString(category).equals(e.getValue())) - return false; - } - - return true; - } - public Map getModeParams() { return modeParams; } diff --git a/src/main/python/estimate_mixed_plan_choice.py b/src/main/python/estimate_mixed_plan_choice.py index dbd643bd..1e784d93 100644 --- a/src/main/python/estimate_mixed_plan_choice.py +++ b/src/main/python/estimate_mixed_plan_choice.py @@ -11,7 +11,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Estimate the plan choice mixed logit model") - parser.add_argument("--input", help="Path to the input file", type=str, default="../../../plan-choices-random.csv") + parser.add_argument("--input", help="Path to the input file", type=str, default="../../../plan-choices.csv") parser.add_argument("--n-draws", help="Number of draws for the estimation", type=int, default=1500) parser.add_argument("--batch-size", help="Batch size for the estimation", type=int, default=None) parser.add_argument("--sample", help="Use sample of choice data", type=float, default=0.2) @@ -20,14 +20,14 @@ args = parser.parse_args() - df_wide = pd.read_csv(args.input) + df_wide = pd.read_csv(args.input, comment="#") modes = list(df_wide.columns.str.extract(r"_([a-zA-z]+)_usage", expand=False).dropna().unique()) print("Modes: ", modes) k = df_wide.columns.str.extract(r"plan_(\d+)", expand=False).dropna().to_numpy(int).max() - print("Number of choices: ", len(df_wide)) - print("Number of plans per choice: ", k) + print("Number of plans: ", len(df_wide)) + print("Number of choices for plan: ", k) # df_wide["p_id"] = df_wide["p_id"].str.replace(r"_\d+$", "", regex=True) # df_wide["person"] = df_wide["person"].astype('category').cat.codes @@ -62,11 +62,12 @@ # varnames = ["car_used", "car_usage"] # Additive costs - addit = df["costs"] + df["car_fixed_cost"] - df["pt_n_switches"] + # utils contains the price and opportunist costs + addit = df["utils"] - df["pt_n_switches"] if not args.mnl: model = MixedLogit() - model.fit(X=df[varnames], y=df['choice'], varnames=varnames, + model.fit(X=df[varnames], y=df['choice'], weights=df['weight'], varnames=varnames, alts=df['alt'], ids=df['custom_id'], avail=df['valid'], random_state=args.seed, addit=addit, # randvars={"car_used": "tn"}, @@ -79,7 +80,7 @@ #varnames += ["car_usage"] model = MultinomialLogit() - model.fit(X=df[varnames], y=df['choice'], varnames=varnames, + model.fit(X=df[varnames], y=df['choice'], weights=df['weight'], varnames=varnames, alts=df['alt'], ids=df['custom_id'], avail=df['valid'], random_state=args.seed, addit=addit) diff --git a/src/main/python/estimate_plan_choice.py b/src/main/python/estimate_plan_choice.py index 05b52c24..49ae67f5 100644 --- a/src/main/python/estimate_plan_choice.py +++ b/src/main/python/estimate_plan_choice.py @@ -25,9 +25,9 @@ def tn_generator(sample_size: int, number_of_draws: int) -> np.ndarray: def calc_costs(df, k, modes): # Cost parameter from current berlin model - fixed_costs = defaultdict(lambda: 0.0, car=-15, pt=-3) + daily_costs = defaultdict(lambda: 0.0, car=-14.30, pt=-3) km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149) - time_cost = -6.88 + util_performing = -6.88 # Normalize activity utilities to be near zero # columns = [f"plan_{i}_act_util" for i in range(1, k + 1)] @@ -35,35 +35,39 @@ def calc_costs(df, k, modes): # utils = df.loc[t.Index, columns] # df.loc[t.Index, columns] -= utils.max() + # Marginal utility of money as factor + util_money = df.util_money for i in range(1, k + 1): - # Costs will also include time costs - df[f"plan_{i}_costs"] = 0 # Price is only monetary costs df[f"plan_{i}_price"] = 0 + # Costs will also include time costs + df[f"plan_{i}_utils"] = 0 + df[f"plan_{i}_tt_hours"] = 0 for mode in modes: - df[f"plan_{i}_{mode}_fixed_cost"] = (df[f"plan_{i}_{mode}_usage"] > 0) * fixed_costs[mode] - df[f"plan_{i}_{mode}_used"] = (df[f"plan_{i}_{mode}_usage"] > 0) * 1 + fixed_costs = (df[f"plan_{i}_{mode}_usage"] > 0) * daily_costs[mode] + distance_costs = df[f"plan_{i}_{mode}_km"] * km_costs[mode] - df[f"plan_{i}_price"] += df[f"plan_{i}_{mode}_fixed_cost"] + df[f"plan_{i}_{mode}_km"] * km_costs[mode] + df[f"plan_{i}_{mode}_fixed_cost"] = fixed_costs + df[f"plan_{i}_price"] += fixed_costs + distance_costs + + df[f"plan_{i}_{mode}_used"] = (df[f"plan_{i}_{mode}_usage"] > 0) * 1 df[f"plan_{i}_tt_hours"] += df[f"plan_{i}_{mode}_hours"] # Add configured time costs - df[f"plan_{i}_costs"] += df[f"plan_{i}_{mode}_km"] * km_costs[mode] + df[f"plan_{i}_utils"] += (fixed_costs + distance_costs) * util_money + # Add time costs the overall costs - df[f"plan_{i}_costs"] += time_cost * df[f"plan_{i}_{mode}_hours"] + df[f"plan_{i}_utils"] += util_performing * df[f"plan_{i}_{mode}_hours"] - # Add additional ride time costs + # Add additional ride time utils for the driver if mode == "ride": - df[f"plan_{i}_costs"] += time_cost * df[f"plan_{i}_{mode}_hours"] - - if mode == "pt": - df[f"plan_{i}_costs"] += (df[f"plan_{i}_pt_usage"] > 0) * fixed_costs[mode] + df[f"plan_{i}_utils"] += util_performing * df[f"plan_{i}_{mode}_hours"] # Defragment df df = df.copy()