improvement of the choice model

matsim-scenarios · Jul 7, 2024 · 87f43d2 · 87f43d2
1 parent 51f4bb3
commit 87f43d2
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 45 deletions.
diff --git a/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java b/src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java
@@ -18,6 +18,7 @@
 import org.matsim.core.config.groups.PlansConfigGroup;
 import org.matsim.core.controler.Controler;
 import org.matsim.core.controler.OutputDirectoryHierarchy;
+import org.matsim.core.population.PersonUtils;
 import org.matsim.core.population.algorithms.ParallelPersonAlgorithmUtils;
 import org.matsim.core.population.algorithms.PersonAlgorithm;
 import org.matsim.core.router.*;
@@ -60,14 +61,15 @@ public class ComputePlanChoices implements MATSimAppCommand, PersonAlgorithm {
 	private boolean timeUtil;
 	@CommandLine.Option(names = "--calc-scores", description = "Perform pseudo scoring for each plan", defaultValue = "false")
 	private boolean calcScores;
-	@CommandLine.Option(names = "--plan-candidates", description = "Method to generate plan candidates", defaultValue = "bestK")
+	@CommandLine.Option(names = "--plan-candidates", description = "Method to generate plan candidates", defaultValue = "diverse")
 	private PlanCandidates planCandidates = PlanCandidates.bestK;
-
-	@CommandLine.Option(names = "--output", description = "Path to output csv.", required = true)
+	@CommandLine.Option(names = "--output", description = "Path to output csv.", defaultValue = "plan-choices.csv")
 	private Path output;
+
 	private ThreadLocal<Ctx> thread;
 	private ProgressBar pb;
-	private MainModeIdentifier mmi = new DefaultAnalysisMainModeIdentifier();
+	private double globalAvgIncome;
+	private final MainModeIdentifier mmi = new DefaultAnalysisMainModeIdentifier();
 
 	public static void main(String[] args) {
 		new ComputePlanChoices().execute(args);
@@ -76,6 +78,11 @@ public static void main(String[] args) {
 	@Override
 	public Integer call() throws Exception {
 
+		if (!output.getFileName().toString().contains(".csv")) {
+			log.error("Output file must be a csv file");
+			return 2;
+		}
+
 		Config config = this.scenario.getConfig();
 		config.controller().setOutputDirectory("choice-output");
 		config.controller().setLastIteration(0);
@@ -140,19 +147,32 @@ public Integer call() throws Exception {
 			)
 		);
 
+		globalAvgIncome = population.getPersons().values().stream()
+			.map(PersonUtils::getIncome)
+			.filter(Objects::nonNull)
+			.mapToDouble(d -> d)
+			.filter(dd -> dd > 0)
+			.average()
+			.orElse(Double.NaN);
+
 		pb = new ProgressBar("Computing plan choices", population.getPersons().size());
 
 		ParallelPersonAlgorithmUtils.run(population, Runtime.getRuntime().availableProcessors(), this);
 
 		pb.close();
 
-		log.info("Writing {} choices to {}", rows.size(), output);
+		String out = output.toString().replace(".csv", "-%s_%d.csv".formatted(planCandidates, topK));
+
+		log.info("Writing {} choices to {}", rows.size(), out);
 
-		try (CSVPrinter csv = new CSVPrinter(Files.newBufferedWriter(output), CSVFormat.DEFAULT)) {
+		try (CSVPrinter csv = new CSVPrinter(Files.newBufferedWriter(Path.of(out)), CSVFormat.DEFAULT.builder().setCommentMarker('#').build())) {
 
 			// header
 			List<Object> header = new ArrayList<>();
 			header.add("person");
+			header.add("weight");
+			header.add("income");
+			header.add("util_money");
 			header.add("choice");
 
 			for (int i = 1; i <= topK; i++) {
@@ -168,6 +188,8 @@ public Integer call() throws Exception {
 				header.add(String.format("plan_%d_valid", i));
 			}
 
+			csv.printComment("Average global income: " + globalAvgIncome);
+
 			csv.printRecord(header);
 
 			for (List<Object> row : rows) {
@@ -213,6 +235,10 @@ public void run(Person person) {
 		List<Object> row = new ArrayList<>();
 
 		row.add(person.getAttributes().getAttribute(TripAnalysis.ATTR_REF_ID));
+		row.add(person.getAttributes().getAttribute(TripAnalysis.ATTR_REF_WEIGHT));
+		row.add(PersonUtils.getIncome(person));
+		row.add(globalAvgIncome / PersonUtils.getIncome(person));
+
 		// choice, always the first one
 		row.add(1);
 

diff --git a/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java b/src/main/java/org/matsim/run/scoring/AdvancedScoringConfigGroup.java
@@ -99,24 +99,6 @@ public ScoringParameters() {
 			super(GROUP_NAME, true);
 		}
 
-		/**
-		 * Checks if the given attributes match the config. If true these parameters are applicable to tbe object.
-		 */
-		public boolean matchObject(Attributes attr, Map<String, Category> categories) {
-
-			for (Map.Entry<String, String> e : this.getParams().entrySet()) {
-				// might be null if not defined
-				Object objValue = attr.getAttribute(e.getKey());
-				String category = categories.get(e.getKey()).categorize(objValue);
-
-				// compare as string
-				if (!Objects.toString(category).equals(e.getValue()))
-					return false;
-			}
-
-			return true;
-		}
-
 		public Map<String, ModeParams> getModeParams() {
 			return modeParams;
 		}

diff --git a/src/main/python/estimate_mixed_plan_choice.py b/src/main/python/estimate_mixed_plan_choice.py
@@ -11,7 +11,7 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Estimate the plan choice mixed logit model")
-    parser.add_argument("--input", help="Path to the input file", type=str, default="../../../plan-choices-random.csv")
+    parser.add_argument("--input", help="Path to the input file", type=str, default="../../../plan-choices.csv")
     parser.add_argument("--n-draws", help="Number of draws for the estimation", type=int, default=1500)
     parser.add_argument("--batch-size", help="Batch size for the estimation", type=int, default=None)
     parser.add_argument("--sample", help="Use sample of choice data", type=float, default=0.2)
@@ -20,14 +20,14 @@
 
     args = parser.parse_args()
 
-    df_wide = pd.read_csv(args.input)
+    df_wide = pd.read_csv(args.input, comment="#")
 
     modes = list(df_wide.columns.str.extract(r"_([a-zA-z]+)_usage", expand=False).dropna().unique())
     print("Modes: ", modes)
 
     k = df_wide.columns.str.extract(r"plan_(\d+)", expand=False).dropna().to_numpy(int).max()
-    print("Number of choices: ", len(df_wide))
-    print("Number of plans per choice: ", k)
+    print("Number of plans: ", len(df_wide))
+    print("Number of choices for plan: ", k)
 
     # df_wide["p_id"] = df_wide["p_id"].str.replace(r"_\d+$", "", regex=True)
     # df_wide["person"] = df_wide["person"].astype('category').cat.codes
@@ -62,11 +62,12 @@
     # varnames = ["car_used", "car_usage"]
 
     # Additive costs
-    addit = df["costs"] + df["car_fixed_cost"] - df["pt_n_switches"]
+    # utils contains the price and opportunist costs
+    addit = df["utils"] - df["pt_n_switches"]
 
     if not args.mnl:
         model = MixedLogit()
-        model.fit(X=df[varnames], y=df['choice'], varnames=varnames,
+        model.fit(X=df[varnames], y=df['choice'], weights=df['weight'], varnames=varnames,
                   alts=df['alt'], ids=df['custom_id'], avail=df['valid'], random_state=args.seed,
                   addit=addit,
                   # randvars={"car_used": "tn"},
@@ -79,7 +80,7 @@
         #varnames += ["car_usage"]
 
         model = MultinomialLogit()
-        model.fit(X=df[varnames], y=df['choice'], varnames=varnames,
+        model.fit(X=df[varnames], y=df['choice'], weights=df['weight'], varnames=varnames,
                   alts=df['alt'], ids=df['custom_id'], avail=df['valid'], random_state=args.seed,
                   addit=addit)
 

diff --git a/src/main/python/estimate_plan_choice.py b/src/main/python/estimate_plan_choice.py
@@ -25,45 +25,49 @@ def tn_generator(sample_size: int, number_of_draws: int) -> np.ndarray:
 def calc_costs(df, k, modes):
 
     # Cost parameter from current berlin model
-    fixed_costs = defaultdict(lambda: 0.0, car=-15, pt=-3)
+    daily_costs = defaultdict(lambda: 0.0, car=-14.30, pt=-3)
     km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149)
-    time_cost = -6.88
+    util_performing = -6.88
 
     # Normalize activity utilities to be near zero
     # columns = [f"plan_{i}_act_util" for i in range(1, k + 1)]
     # for t in df.itertuples():
     #     utils = df.loc[t.Index, columns]
     #     df.loc[t.Index, columns] -= utils.max()
 
+    # Marginal utility of money as factor
+    util_money = df.util_money
 
     for i in range(1, k + 1):
 
-        # Costs will also include time costs
-        df[f"plan_{i}_costs"] = 0
         # Price is only monetary costs
         df[f"plan_{i}_price"] = 0
 
+        # Costs will also include time costs
+        df[f"plan_{i}_utils"] = 0
+
         df[f"plan_{i}_tt_hours"] = 0
 
         for mode in modes:
 
-            df[f"plan_{i}_{mode}_fixed_cost"] = (df[f"plan_{i}_{mode}_usage"] > 0) * fixed_costs[mode]
-            df[f"plan_{i}_{mode}_used"] = (df[f"plan_{i}_{mode}_usage"] > 0) * 1
+            fixed_costs = (df[f"plan_{i}_{mode}_usage"] > 0) * daily_costs[mode]
+            distance_costs = df[f"plan_{i}_{mode}_km"] * km_costs[mode]
 
-            df[f"plan_{i}_price"] += df[f"plan_{i}_{mode}_fixed_cost"] + df[f"plan_{i}_{mode}_km"] * km_costs[mode]
+            df[f"plan_{i}_{mode}_fixed_cost"] = fixed_costs
+            df[f"plan_{i}_price"] += fixed_costs + distance_costs
+
+            df[f"plan_{i}_{mode}_used"] = (df[f"plan_{i}_{mode}_usage"] > 0) * 1
             df[f"plan_{i}_tt_hours"] += df[f"plan_{i}_{mode}_hours"]
 
             # Add configured time costs
-            df[f"plan_{i}_costs"] += df[f"plan_{i}_{mode}_km"] * km_costs[mode]
+            df[f"plan_{i}_utils"] += (fixed_costs + distance_costs) * util_money
+
             # Add time costs the overall costs
-            df[f"plan_{i}_costs"] += time_cost * df[f"plan_{i}_{mode}_hours"]
+            df[f"plan_{i}_utils"] += util_performing * df[f"plan_{i}_{mode}_hours"]
 
-            # Add additional ride time costs
+            # Add additional ride time utils for the driver
             if mode == "ride":
-                df[f"plan_{i}_costs"] += time_cost * df[f"plan_{i}_{mode}_hours"]
-
-            if mode == "pt":
-                df[f"plan_{i}_costs"] += (df[f"plan_{i}_pt_usage"] > 0) * fixed_costs[mode]
+                df[f"plan_{i}_utils"] += util_performing * df[f"plan_{i}_{mode}_hours"]
 
         # Defragment df
         df = df.copy()