diff --git a/src/main/java/org/matsim/prepare/population/CalcIncome.java b/src/main/java/org/matsim/prepare/population/CalcIncome.java new file mode 100644 index 00000000..8ef3dcb9 --- /dev/null +++ b/src/main/java/org/matsim/prepare/population/CalcIncome.java @@ -0,0 +1,79 @@ +package org.matsim.prepare.population; + +import org.apache.commons.math3.distribution.EnumeratedIntegerDistribution; +import org.apache.commons.math3.random.Well19937c; +import org.matsim.api.core.v01.population.Person; +import org.matsim.core.population.PersonUtils; +import org.matsim.core.population.PopulationUtils; +import org.matsim.core.population.algorithms.PersonAlgorithm; + +import java.util.HashMap; +import java.util.Map; +import java.util.SplittableRandom; +import java.util.stream.IntStream; + +/** + * Draw income from distribution, according to household size and income group. Based on SrV income groups. + */ +public class CalcIncome implements PersonAlgorithm { + + /** + * Income groups in Euro. The last element is the maximum income in the model, which is not known but defined. + */ + private static final int[] INCOME_GROUPS = new int[]{0, 500, 900, 1500, 2000, 2600, 3000, 3600, 4600, 5600, 8000}; + + /** + * Distribution per economic status. See python file extract income. + */ + private static final Map INCOME_DIST = Map.of( + "very_low", new double[]{0.086, 0.342, 0.343, 0.165, 0.058, 0.004, 0.002, 0.000, 0.000, 0.000}, + "low", new double[]{0.000, 0.000, 0.443, 0.343, 0.123, 0.031, 0.056, 0.005, 0.000, 0.000}, + "medium", new double[]{0.000, 0.000, 0.000, 0.154, 0.324, 0.196, 0.237, 0.084, 0.005, 0.000}, + "high", new double[]{0.000, 0.000, 0.000, 0.000, 0.000, 0.066, 0.069, 0.433, 0.377, 0.055}, + "very_high", new double[]{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.025, 0.975} + ); + + private final SplittableRandom rnd = new SplittableRandom(1234); + + private final Map dists = new HashMap<>(); + + + public CalcIncome() { + + for (Map.Entry e : INCOME_DIST.entrySet()) { + EnumeratedIntegerDistribution d = new EnumeratedIntegerDistribution( + new Well19937c(0), + IntStream.range(0, e.getValue().length).toArray(), e.getValue()); + dists.put(e.getKey(), d); + } + + } + + @Override + public void run(Person person) { + + + // Only handle persons + if (!PopulationUtils.getSubpopulation(person).equals("person")) + return; + + int hh = (int) person.getAttributes().getAttribute(Attributes.HOUSEHOLD_SIZE); + String economicStatus = (String) person.getAttributes().getAttribute(Attributes.ECONOMIC_STATUS); + + // This is only approximate correct at best, finer grained income data is not available + // Economic status is normally per household and defined here: + // https://tu-dresden.de/bu/verkehr/ivs/srv/ressourcen/dateien/SrV2018_Tabellenbericht_Oberzentren_500TEW-_flach.pdf?lang=de + // page 17 + + EnumeratedIntegerDistribution dist = dists.get(economicStatus); + + int idx = dist.sample(); + + // income between lower and upper bound is uniformly sampled + int income = rnd.nextInt(INCOME_GROUPS[idx], INCOME_GROUPS[idx + 1]); + + // Income is divided equally to household + PersonUtils.setIncome(person, (double) income / hh); + } + +} diff --git a/src/main/java/org/matsim/run/RunOpenBerlinScenario.java b/src/main/java/org/matsim/run/RunOpenBerlinScenario.java index 2f6260c3..864bb135 100644 --- a/src/main/java/org/matsim/run/RunOpenBerlinScenario.java +++ b/src/main/java/org/matsim/run/RunOpenBerlinScenario.java @@ -2,6 +2,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.matsim.api.core.v01.Scenario; import org.matsim.api.core.v01.TransportMode; import org.matsim.application.MATSimApplication; import org.matsim.application.options.SampleOptions; @@ -11,10 +12,13 @@ import org.matsim.core.controler.AbstractModule; import org.matsim.core.controler.Controler; import org.matsim.core.replanning.strategies.DefaultPlanStrategiesModule; +import org.matsim.core.scoring.functions.ScoringParametersForPerson; import org.matsim.prepare.RunOpenBerlinCalibration; +import org.matsim.prepare.population.CalcIncome; import org.matsim.simwrapper.SimWrapperConfigGroup; import org.matsim.simwrapper.SimWrapperModule; import picocli.CommandLine; +import playground.vsp.scoring.IncomeDependentUtilityOfMoneyPersonScoringParameters; import java.util.List; @@ -92,6 +96,16 @@ protected Config prepareConfig(Config config) { return config; } + @Override + protected void prepareScenario(Scenario scenario) { + + CalcIncome income = new CalcIncome(); + + // Calculate the income for each person, in next versions this might also be done during creation of the population + scenario.getPopulation().getPersons().values().forEach(income::run); + + } + @Override protected void prepareControler(Controler controler) { @@ -103,6 +117,8 @@ public void install() { addTravelTimeBinding(TransportMode.ride).to(networkTravelTime()); addTravelDisutilityFactoryBinding(TransportMode.ride).to(carTravelDisutilityFactoryKey()); + bind(ScoringParametersForPerson.class).to(IncomeDependentUtilityOfMoneyPersonScoringParameters.class).asEagerSingleton(); + } }); diff --git a/src/main/python/extract_income.py b/src/main/python/extract_income.py new file mode 100644 index 00000000..2a20ecac --- /dev/null +++ b/src/main/python/extract_income.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import numpy as np +import os +from matsim.scenariogen.data import read_all + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Converter for survey data") + + parser.add_argument("-d", "--directory", default=os.path.expanduser( + "~/Development/matsim-scenarios/shared-svn/projects/matsim-berlin/data/SrV/")) + + args = parser.parse_args() + + hh, persons, trips = read_all([args.directory + "Berlin+Umland", args.directory + "Brandenburg"]) + + hh = hh[hh.income >= 0] + + # Large households are underrepresented and capped (same operation as in input) + hh.n_persons = np.minimum(hh.n_persons, 5) + + groups = list(sorted(set(hh.income))) + + def calc(x): + counts = x.groupby("income").size() + prob = counts / sum(counts) + return prob.to_frame().transpose() + + + dist = hh.groupby(["economic_status"]).apply(calc).fillna(0).reset_index().drop(columns=["level_1"]) + + print("Income groups:", groups) + + for t in dist[["economic_status"] + groups].itertuples(): + print('"%s", new double[]{%s},' % (t.economic_status, ", ".join("%.3f" % x for x in t[2:])))