Skip to content

Commit

Permalink
improvement of the choice model
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow committed Jul 7, 2024
1 parent 51f4bb3 commit 87f43d2
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 45 deletions.
38 changes: 32 additions & 6 deletions src/main/java/org/matsim/prepare/choices/ComputePlanChoices.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.matsim.core.config.groups.PlansConfigGroup;
import org.matsim.core.controler.Controler;
import org.matsim.core.controler.OutputDirectoryHierarchy;
import org.matsim.core.population.PersonUtils;
import org.matsim.core.population.algorithms.ParallelPersonAlgorithmUtils;
import org.matsim.core.population.algorithms.PersonAlgorithm;
import org.matsim.core.router.*;
Expand Down Expand Up @@ -60,14 +61,15 @@ public class ComputePlanChoices implements MATSimAppCommand, PersonAlgorithm {
private boolean timeUtil;
@CommandLine.Option(names = "--calc-scores", description = "Perform pseudo scoring for each plan", defaultValue = "false")
private boolean calcScores;
@CommandLine.Option(names = "--plan-candidates", description = "Method to generate plan candidates", defaultValue = "bestK")
@CommandLine.Option(names = "--plan-candidates", description = "Method to generate plan candidates", defaultValue = "diverse")
private PlanCandidates planCandidates = PlanCandidates.bestK;

@CommandLine.Option(names = "--output", description = "Path to output csv.", required = true)
@CommandLine.Option(names = "--output", description = "Path to output csv.", defaultValue = "plan-choices.csv")
private Path output;

private ThreadLocal<Ctx> thread;
private ProgressBar pb;
private MainModeIdentifier mmi = new DefaultAnalysisMainModeIdentifier();
private double globalAvgIncome;
private final MainModeIdentifier mmi = new DefaultAnalysisMainModeIdentifier();

public static void main(String[] args) {
new ComputePlanChoices().execute(args);
Expand All @@ -76,6 +78,11 @@ public static void main(String[] args) {
@Override
public Integer call() throws Exception {

if (!output.getFileName().toString().contains(".csv")) {
log.error("Output file must be a csv file");
return 2;
}

Config config = this.scenario.getConfig();
config.controller().setOutputDirectory("choice-output");
config.controller().setLastIteration(0);
Expand Down Expand Up @@ -140,19 +147,32 @@ public Integer call() throws Exception {
)
);

globalAvgIncome = population.getPersons().values().stream()
.map(PersonUtils::getIncome)
.filter(Objects::nonNull)
.mapToDouble(d -> d)
.filter(dd -> dd > 0)
.average()
.orElse(Double.NaN);

pb = new ProgressBar("Computing plan choices", population.getPersons().size());

ParallelPersonAlgorithmUtils.run(population, Runtime.getRuntime().availableProcessors(), this);

pb.close();

log.info("Writing {} choices to {}", rows.size(), output);
String out = output.toString().replace(".csv", "-%s_%d.csv".formatted(planCandidates, topK));

log.info("Writing {} choices to {}", rows.size(), out);

try (CSVPrinter csv = new CSVPrinter(Files.newBufferedWriter(output), CSVFormat.DEFAULT)) {
try (CSVPrinter csv = new CSVPrinter(Files.newBufferedWriter(Path.of(out)), CSVFormat.DEFAULT.builder().setCommentMarker('#').build())) {

// header
List<Object> header = new ArrayList<>();
header.add("person");
header.add("weight");
header.add("income");
header.add("util_money");
header.add("choice");

for (int i = 1; i <= topK; i++) {
Expand All @@ -168,6 +188,8 @@ public Integer call() throws Exception {
header.add(String.format("plan_%d_valid", i));
}

csv.printComment("Average global income: " + globalAvgIncome);

csv.printRecord(header);

for (List<Object> row : rows) {
Expand Down Expand Up @@ -213,6 +235,10 @@ public void run(Person person) {
List<Object> row = new ArrayList<>();

row.add(person.getAttributes().getAttribute(TripAnalysis.ATTR_REF_ID));
row.add(person.getAttributes().getAttribute(TripAnalysis.ATTR_REF_WEIGHT));
row.add(PersonUtils.getIncome(person));
row.add(globalAvgIncome / PersonUtils.getIncome(person));

// choice, always the first one
row.add(1);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,24 +99,6 @@ public ScoringParameters() {
super(GROUP_NAME, true);
}

/**
* Checks if the given attributes match the config. If true these parameters are applicable to tbe object.
*/
public boolean matchObject(Attributes attr, Map<String, Category> categories) {

for (Map.Entry<String, String> e : this.getParams().entrySet()) {
// might be null if not defined
Object objValue = attr.getAttribute(e.getKey());
String category = categories.get(e.getKey()).categorize(objValue);

// compare as string
if (!Objects.toString(category).equals(e.getValue()))
return false;
}

return true;
}

public Map<String, ModeParams> getModeParams() {
return modeParams;
}
Expand Down
15 changes: 8 additions & 7 deletions src/main/python/estimate_mixed_plan_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Estimate the plan choice mixed logit model")
parser.add_argument("--input", help="Path to the input file", type=str, default="../../../plan-choices-random.csv")
parser.add_argument("--input", help="Path to the input file", type=str, default="../../../plan-choices.csv")
parser.add_argument("--n-draws", help="Number of draws for the estimation", type=int, default=1500)
parser.add_argument("--batch-size", help="Batch size for the estimation", type=int, default=None)
parser.add_argument("--sample", help="Use sample of choice data", type=float, default=0.2)
Expand All @@ -20,14 +20,14 @@

args = parser.parse_args()

df_wide = pd.read_csv(args.input)
df_wide = pd.read_csv(args.input, comment="#")

modes = list(df_wide.columns.str.extract(r"_([a-zA-z]+)_usage", expand=False).dropna().unique())
print("Modes: ", modes)

k = df_wide.columns.str.extract(r"plan_(\d+)", expand=False).dropna().to_numpy(int).max()
print("Number of choices: ", len(df_wide))
print("Number of plans per choice: ", k)
print("Number of plans: ", len(df_wide))
print("Number of choices for plan: ", k)

# df_wide["p_id"] = df_wide["p_id"].str.replace(r"_\d+$", "", regex=True)
# df_wide["person"] = df_wide["person"].astype('category').cat.codes
Expand Down Expand Up @@ -62,11 +62,12 @@
# varnames = ["car_used", "car_usage"]

# Additive costs
addit = df["costs"] + df["car_fixed_cost"] - df["pt_n_switches"]
# utils contains the price and opportunist costs
addit = df["utils"] - df["pt_n_switches"]

if not args.mnl:
model = MixedLogit()
model.fit(X=df[varnames], y=df['choice'], varnames=varnames,
model.fit(X=df[varnames], y=df['choice'], weights=df['weight'], varnames=varnames,
alts=df['alt'], ids=df['custom_id'], avail=df['valid'], random_state=args.seed,
addit=addit,
# randvars={"car_used": "tn"},
Expand All @@ -79,7 +80,7 @@
#varnames += ["car_usage"]

model = MultinomialLogit()
model.fit(X=df[varnames], y=df['choice'], varnames=varnames,
model.fit(X=df[varnames], y=df['choice'], weights=df['weight'], varnames=varnames,
alts=df['alt'], ids=df['custom_id'], avail=df['valid'], random_state=args.seed,
addit=addit)

Expand Down
32 changes: 18 additions & 14 deletions src/main/python/estimate_plan_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,45 +25,49 @@ def tn_generator(sample_size: int, number_of_draws: int) -> np.ndarray:
def calc_costs(df, k, modes):

# Cost parameter from current berlin model
fixed_costs = defaultdict(lambda: 0.0, car=-15, pt=-3)
daily_costs = defaultdict(lambda: 0.0, car=-14.30, pt=-3)
km_costs = defaultdict(lambda: 0.0, car=-0.149, ride=-0.149)
time_cost = -6.88
util_performing = -6.88

# Normalize activity utilities to be near zero
# columns = [f"plan_{i}_act_util" for i in range(1, k + 1)]
# for t in df.itertuples():
# utils = df.loc[t.Index, columns]
# df.loc[t.Index, columns] -= utils.max()

# Marginal utility of money as factor
util_money = df.util_money

for i in range(1, k + 1):

# Costs will also include time costs
df[f"plan_{i}_costs"] = 0
# Price is only monetary costs
df[f"plan_{i}_price"] = 0

# Costs will also include time costs
df[f"plan_{i}_utils"] = 0

df[f"plan_{i}_tt_hours"] = 0

for mode in modes:

df[f"plan_{i}_{mode}_fixed_cost"] = (df[f"plan_{i}_{mode}_usage"] > 0) * fixed_costs[mode]
df[f"plan_{i}_{mode}_used"] = (df[f"plan_{i}_{mode}_usage"] > 0) * 1
fixed_costs = (df[f"plan_{i}_{mode}_usage"] > 0) * daily_costs[mode]
distance_costs = df[f"plan_{i}_{mode}_km"] * km_costs[mode]

df[f"plan_{i}_price"] += df[f"plan_{i}_{mode}_fixed_cost"] + df[f"plan_{i}_{mode}_km"] * km_costs[mode]
df[f"plan_{i}_{mode}_fixed_cost"] = fixed_costs
df[f"plan_{i}_price"] += fixed_costs + distance_costs

df[f"plan_{i}_{mode}_used"] = (df[f"plan_{i}_{mode}_usage"] > 0) * 1
df[f"plan_{i}_tt_hours"] += df[f"plan_{i}_{mode}_hours"]

# Add configured time costs
df[f"plan_{i}_costs"] += df[f"plan_{i}_{mode}_km"] * km_costs[mode]
df[f"plan_{i}_utils"] += (fixed_costs + distance_costs) * util_money

# Add time costs the overall costs
df[f"plan_{i}_costs"] += time_cost * df[f"plan_{i}_{mode}_hours"]
df[f"plan_{i}_utils"] += util_performing * df[f"plan_{i}_{mode}_hours"]

# Add additional ride time costs
# Add additional ride time utils for the driver
if mode == "ride":
df[f"plan_{i}_costs"] += time_cost * df[f"plan_{i}_{mode}_hours"]

if mode == "pt":
df[f"plan_{i}_costs"] += (df[f"plan_{i}_pt_usage"] > 0) * fixed_costs[mode]
df[f"plan_{i}_utils"] += util_performing * df[f"plan_{i}_{mode}_hours"]

# Defragment df
df = df.copy()
Expand Down

0 comments on commit 87f43d2

Please sign in to comment.