Skip to content

Commit

Permalink
updated biogeme script
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow committed Jul 9, 2024
1 parent c36120d commit b8f2221
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 33 deletions.
69 changes: 39 additions & 30 deletions src/main/python/choicemodels/estimate_biogeme_plan_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,31 @@
# -*- coding: utf-8 -*-

import argparse

import biogeme.biogeme as bio
import biogeme.database as db
import biogeme.models as models
import pandas as pd
from biogeme.expressions import Beta, bioDraws, log, MonteCarlo

from prepare import calc_plan_variables, tn_generator
from prepare import read_plan_choices, tn_generator

ESTIMATE = 0
FIXED = 1

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Estimate choice model for daily trip usage")
parser.add_argument("--input", help="Path to the input file", type=str, default="../../../plan-choices.csv")
parser.add_argument("--input", help="Path to the input file", type=str,
default="../../../../plan-choices-bestK_9-tt-only.csv")

args = parser.parse_args()

df = pd.read_csv(args.input)
ds = read_plan_choices(args.input)

df.drop(columns=["person"], inplace=True)

# TODO: for testing only
df = df.sample(frac=0.2)
# Needs to be numeric
ds.df["choice"] = 1

# Convert all the columns to numeric
df = df * 1

k = df.columns.str.extract(r"plan_(\d+)", expand=False).dropna().to_numpy(int).max()

print("Number of choices: ", len(df))
print("Number of plans per choice: ", k)

modes = list(df.columns.str.extract(r"_([a-zA-z]+)_usage", expand=False).dropna().unique())
print("Modes: ", modes)

df = calc_plan_variables(df, k, modes)
df = ds.df * 1

database = db.Database("data/plan-choices", df)
v = database.variables
Expand All @@ -45,37 +37,48 @@

mixed_logit = True

# Variables for constants
ASC = {}
for mode in modes:

# Variables for variation
SD = {}

# Factor on marginal utility of money
F_UTIl_MONEY = Beta('UTIL_MONEY', 1, 0, 1.5, ESTIMATE)

BETA_PERFORMING = Beta('BETA_PERFORMING', 6, 0, 10, ESTIMATE)

for mode in ds.modes:
# Base asc
asc = Beta(f"ASC_{mode}", 0, None, None, 1 if mode in ("walk", "car") else 0)
asc = Beta(f"ASC_{mode}", 0, None, None, FIXED if mode in ("walk", "car") else ESTIMATE)

# Pt does not have its own random parameter
if True or mode == "walk" or mode == "pt":
ASC[mode] = asc
else:
# The random parameter
asc_s = Beta(f"ASC_{mode}_s", 1, None, None, 0)
ASC[mode] = asc + asc_s * bioDraws(f"ASC_{mode}_RND", "NORMAL_ANTI")
SD[mode] = Beta(f"ASC_{mode}_s", 1, None, None, 0)
ASC[mode] = asc + SD[mode] * bioDraws(f"{mode}_RND", "NORMAL_ANTI")

B_UTIL = Beta('B_CAR_UTIL', 10, 0, 20, 0)
B_UTIL_S = Beta('B_CAR_UTIL_SD', 1, 0, 10, 0)

B_TIME_RND = B_UTIL + B_UTIL_S * bioDraws('B_CAR_UTIL_RND', 'TN')
B_CAR_RND = B_UTIL + B_UTIL_S * bioDraws('B_CAR_UTIL_RND', 'TN')

U = {}
AV = {}

for i in range(1, k + 1):
u = v[f"plan_{i}_car_fixed_cost"]
u += v[f"plan_{i}_costs"]
for i in range(1, ds.k + 1):
u = v[f"plan_{i}_price"] * (ds.global_income / v["income"]) ** F_UTIl_MONEY
u -= v[f"plan_{i}_pt_n_switches"]

for mode in modes:
for mode in ds.modes:
u += ASC[mode] * v[f"plan_{i}_{mode}_usage"]

u += -BETA_PERFORMING * v[f"plan_{i}_{mode}_hours"] * (2 if mode == "ride" else 1)

if mixed_logit:
u += v[f"plan_{i}_car_used"] * B_TIME_RND
u += v[f"plan_{i}_car_used"] * B_CAR_RND

U[i] = u
AV[i] = v[f"plan_{i}_valid"]
Expand All @@ -88,9 +91,15 @@

biogeme = bio.BIOGEME(database, logprob)

biogeme.calculateNullLoglikelihood(AV)
biogeme.modelName = "plan_choice"
biogeme.weight = v["weight"]
biogeme.generate_pickle = False
biogeme.save_iterations = False
biogeme.number_of_draws = 10000
biogeme.second_derivatives = 0.1
biogeme.seed_param = 42

biogeme.calculateNullLoglikelihood(AV)

results = biogeme.estimate()
print(results.short_summary())
Expand Down
10 changes: 7 additions & 3 deletions src/main/python/choicemodels/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

TN = truncnorm(0, np.inf)

PlanChoice = namedtuple("PlanChoice", ["df", "modes", "varying", "k"])
PlanChoice = namedtuple("PlanChoice", ["df", "modes", "varying", "k", "global_income"])


def read_plan_choices(input_file: str, sample: float = 1, seed: int = 42) -> PlanChoice:
Expand Down Expand Up @@ -42,11 +42,15 @@ def read_plan_choices(input_file: str, sample: float = 1, seed: int = 42) -> Pla
df_wide['custom_id'] = np.arange(len(df_wide)) # Add unique identifier
df_wide['choice'] = df_wide['choice'].map({1: "plan_1"})

df_wide = calc_plan_variables(df_wide, k, modes)
df_wide = calc_plan_variables(df_wide, k, modes, True)

varying = list(df_wide.columns.str.extract(r"plan_1_([a-zA-z_]+)", expand=False).dropna().unique())

return PlanChoice(df_wide, modes, varying, k)
with open(input_file) as f:
_, _, income = f.readline().rpartition(":")
global_income = float(income.strip())

return PlanChoice(df_wide, modes, varying, k, global_income)


def tn_generator(sample_size: int, number_of_draws: int) -> np.ndarray:
Expand Down

0 comments on commit b8f2221

Please sign in to comment.