Skip to content

Commit

Permalink
Merge branch 'master' into gis-based-mode-detection
Browse files Browse the repository at this point in the history
  • Loading branch information
shankari committed Sep 14, 2023
2 parents 8b7645c + 55704fc commit f6bf89a
Show file tree
Hide file tree
Showing 9 changed files with 318 additions and 120 deletions.
29 changes: 25 additions & 4 deletions emission/analysis/modelling/similarity/od_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,28 @@ class OriginDestinationSimilarity(eamss.SimilarityMetric):
def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
return ctfe.od_features(trip)

def similarity(self, a: List[float], b: List[float]) -> List[float]:
o_dist = ecc.calDistance([a[0], a[1]], [b[0], b[1]])
d_dist = ecc.calDistance([a[2], a[3]], [b[2], b[3]])
return [o_dist, d_dist]
def similarity(self, a: List[float], b: List[float], clustering_way='origin-destination') -> List[float]:
"""
a : a list of point features that takes the forms
[point1_longitude,point1_latitude,point2_longitude,point2_latitude]
b : a list of point features that takes the forms
[point1_longitude,point1_latitude,point2_longitude,point2_latitude]
clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
tells the part of the trip to be used for binning trips together if that
part lies within threshold.
return: a list of size 1 ([distance between point1-point3]) if a and b take form 1
or of size 2 ([distance between point1-point3, distance between point2-point4])
if a and b take form 2.
"""
origin_dist = ecc.calDistance(a[0:2], b[0:2])
destination_dist=ecc.calDistance(a[2:4], b[2:4])

if clustering_way == 'origin-destination':
return [origin_dist,destination_dist]
elif clustering_way == 'origin':
return [origin_dist]
else:
return [destination_dist]
17 changes: 12 additions & 5 deletions emission/analysis/modelling/similarity/similarity_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,32 @@ def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
pass

@abstractmethod
def similarity(self, a: List[float], b: List[float]) -> List[float]:
def similarity(self, a: List[float], b: List[float], clustering_way = 'origin-destination') -> List[float]:
"""compares the features, producing their similarity
as computed by this similarity metric
:param a: features for a trip
:param b: features for another trip
:param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
tells the part of the trip to be used for binning trips together if that
part lies within a threshold.
:return: for each feature, the similarity of these features
"""
pass

def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
def similar(self, a: List[float], b: List[float], thresh: float, clustering_way= 'origin-destination') -> bool:
"""compares the features, returning true if they are similar
within some threshold
:param a: features for a trip
:param a: features for a trip
:param b: features for another trip
:param thresh: threshold for similarity
:param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
tells the part of the trip to be used for binning trips together if that
part lies within a threshold.
:return: true if the feature similarity is within some threshold
"""
similarity_values = self.similarity(a, b)
is_similar = all(map(lambda sim: sim <= thresh, similarity_values))
similarity_values = self.similarity(a, b, clustering_way)
is_similar = all(sim <= thresh for sim in similarity_values)

return is_similar
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ class label to apply:
self.sim_thresh = config['similarity_threshold_meters']
self.apply_cutoff = config['apply_cutoff']
self.is_incremental = config['incremental_evaluation']
if config.get('clustering_way') is None:
self.clusteringWay='origin-destination' # previous default
else:
self.clusteringWay= config['clustering_way']
self.tripLabels=[]

self.bins: Dict[str, Dict] = {}

Expand Down Expand Up @@ -184,9 +189,11 @@ def _assign_bins(self, trips: List[ecwc.Confirmedtrip]):
logging.debug(f"adding trip to bin {bin_id} with features {trip_features}")
self.bins[bin_id]['feature_rows'].append(trip_features)
self.bins[bin_id]['labels'].append(trip_labels)
self.tripLabels.append(bin_id)
else:
# create new bin
new_bin_id = str(len(self.bins))
self.tripLabels.append(new_bin_id)
new_bin_record = {
'feature_rows': [trip_features],
'labels': [trip_labels],
Expand All @@ -200,14 +207,15 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
finds an existing bin where all bin features are "similar" to the incoming
trip features.
:param trip_features: feature row for the incoming trip
:param trip_features: feature row for the incoming trip.
takes the form [orig_lat, orig_lon, dest_lat, dest_lon]
:return: the id of a bin if a match was found, otherwise None
"""
for bin_id, bin_record in self.bins.items():
matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh)
for bin_sample in bin_record['feature_rows']])
if matches_bin:
return bin_id
matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh,self.clusteringWay)
for bin_sample in bin_record['feature_rows']])
if matches_bin:
return bin_id
return None

def _nearest_bin(self, trip: ecwc.Confirmedtrip) -> Tuple[Optional[int], Optional[Dict]]:
Expand Down
5 changes: 5 additions & 0 deletions emission/tests/modellingTests/TestBackwardsCompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def testAnyVsAllWhilePredicting(self):
"metric": "od_similarity",
"similarity_threshold_meters": 16000, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_builder = eamtg.GreedySimilarityBinning(model_config)
Expand Down Expand Up @@ -96,6 +97,7 @@ def testRandomTripsWithinTheSameThreshold(self):
trips=n,
origin=(0, 0),
destination=(1, 1),
trip_part='od',
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
)
Expand All @@ -113,6 +115,7 @@ def testRandomTripsWithinTheSameThreshold(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_model = eamtg.GreedySimilarityBinning(model_config)
Expand Down Expand Up @@ -156,6 +159,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
trips=n,
origin=(0, 0),
destination=(1, 1),
trip_part='od',
label_data=label_data,
threshold=0.1, # Much bigger than the 500m threshold, so we will get multiple bins
)
Expand All @@ -173,6 +177,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_model = eamtg.GreedySimilarityBinning(model_config)
Expand Down
176 changes: 119 additions & 57 deletions emission/tests/modellingTests/TestGreedySimilarityBinning.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
import emission.tests.modellingTests.modellingTestAssets as etmm

import logging


Expand All @@ -10,44 +11,111 @@ def setUp(self) -> None:
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s',
level=logging.DEBUG)

def testBinning(self):
def testNoBinning(self):
"""
when $should_be_grouped trips are the same, they should appear in a bin
Tests the three (origin, destination and origin-destination based)
binning configuration for trips.
When the origin and destination points of trips are outside a threshold
limit, none of the trips should be binned with the other in any of the three
configs (origin, destination or origin-and-destination based).
"""

# generate $n trips.
n = 20
binning_threshold=500
#this generates 20 trips one-by-one, where each trip's respective origin and destination
# points are more than 500m away.


label_data = {
"mode_confirm": ['walk', 'bike', 'transit'],
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['drive']
}
}


trips =etmm.generate_mock_trips(
user_id="joe",
trips=n,
trip_part='__',
label_data=label_data,
within_threshold=1,
threshold=binning_threshold,
origin=(0,0),
destination=(1,1)
)

# parameters passed for testing. A list, where each element is one way of clustering
clustering_ways_paramters= ["origin","destination","origin-destination"]

#Testing each of the three clustering_ways by passing them as parameters
for cw in clustering_ways_paramters:
with self.subTest(clustering_way=cw):
#initialise the binning model and fit with previously generated trips
model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": binning_threshold, # meters,
"apply_cutoff": False,
"clustering_way": cw,
"incremental_evaluation": False
}
model= eamtg.GreedySimilarityBinning(model_config)
model.fit(trips)
#check each bins for no of trips
no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values()))
#Since all trips were sampled outside the threshold, there should be no bin
# with more then 1 trip
self.assertTrue(no_large_bin,"no bin should have more than 1 features in it")

# generate $n trips. $m of them should have origin and destinations sampled
def testBinning(self):
"""
Tests the three (origin, destination and origin-destination based)
binning configuration for trips.
When the points lie within threshold ,the trips are binned together.
"""
# generate $n trips. $m of them should have origin sampled
# within a radius that should have them binned.
n = 20
m = 5
trips = etmm.generate_mock_trips(
user_id="joe",
trips=n,
origin=(0, 0),
destination=(1, 1),
label_data=label_data,
within_threshold=m,
threshold=0.001, # ~ 111 meters in degrees WGS84
)

# pass in a test configuration to the binning algorithm
model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"incremental_evaluation": False
binning_threshold=500
label_data = {
"mode_confirm": ['walk', 'bike', 'transit'],
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['drive']
}
model = eamtg.GreedySimilarityBinning(model_config)

model.fit(trips)

# $m trip features should appear together in one bin
at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
# parameters passed for testing. A list, where each element of this list takes the form
# [trip part to be sampled within mentioned threshold , clustering way used to check similarity]
parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']]
for tp,cw in parameters:
with self.subTest(trip_part=tp,clustering_way=cw):
#generate random trips using utilities
trips =etmm.generate_mock_trips(
user_id="joe",
trips=n,
trip_part=tp,
label_data=label_data,
within_threshold=m,
threshold=binning_threshold,
origin=(0,0),
destination=(1,1)
)
#initialise the binning model and fit with previously generated trips
model_config = {
"metric": "od_similarity" ,
"similarity_threshold_meters": binning_threshold, # meters,
"apply_cutoff": False,
"clustering_way": cw,
"incremental_evaluation": False
}
model = eamtg.GreedySimilarityBinning(model_config)
model.fit(trips)
#check each bins for no of trips
one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
#Since 5 trips were sampled within the threshold, there should be one bin with 5 trips
self.assertTrue(one_large_bin, "one bin should have 5 features in it")

def testPrediction(self):
"""
Expand All @@ -60,23 +128,24 @@ def testPrediction(self):
}

n = 6
trips = etmm.generate_mock_trips(
user_id="joe",
trips=n,
origin=(0, 0),
destination=(1, 1),
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
)

trips =etmm.generate_mock_trips(
user_id="joe",
trips=n,
trip_part='od',
label_data=label_data,
within_threshold=n,
threshold=500,
origin=(0,0),
destination=(1,1)
)
model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin_destination',
"incremental_evaluation": False
}
model = eamtg.GreedySimilarityBinning(model_config)

model= eamtg.GreedySimilarityBinning(model_config)
train = trips[0:5]
test = trips[5]

Expand All @@ -95,33 +164,26 @@ def testNoPrediction(self):
"purpose_confirm": ['pizza_party'],
"replaced_mode": ['crabwalking']
}

n = 5
train = etmm.generate_mock_trips(
user_id="joe",
trips=n,
origin=(39.7645187, -104.9951944), # Denver, CO
destination=(39.7435206, -105.2369292), # Golden, CO
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
binning_threshold = 500
train = etmm.generate_mock_trips( user_id="joe",trips=n, origin=(39.7645187, -104.9951944), # Denver, CO
destination=(39.7435206, -105.2369292), # Golden, CO
trip_part='od', label_data=label_data,
threshold=binning_threshold, within_threshold=n
)
test = etmm.generate_mock_trips(
user_id="joe",
trips=1,
origin=(61.1042262, -150.5611644), # Anchorage, AK
destination=(62.2721466, -150.3233046), # Talkeetna, AK
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
test = etmm.generate_mock_trips( user_id="amanda",trips=n, origin=(61.1042262, -150.5611644), # Denver, CO
destination=(62.2721466, -150.3233046), # Golden, CO
trip_part='od', label_data=label_data,
threshold=binning_threshold, within_threshold=n
)

model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin_destination',
"incremental_evaluation": False
}
model = eamtg.GreedySimilarityBinning(model_config)

model= eamtg.GreedySimilarityBinning(model_config)
model.fit(train)
results, n = model.predict(test[0])

Expand Down
Loading

0 comments on commit f6bf89a

Please sign in to comment.