diff --git a/emission/analysis/modelling/similarity/od_similarity.py b/emission/analysis/modelling/similarity/od_similarity.py index 3b84bd764..056c721a3 100644 --- a/emission/analysis/modelling/similarity/od_similarity.py +++ b/emission/analysis/modelling/similarity/od_similarity.py @@ -15,7 +15,28 @@ class OriginDestinationSimilarity(eamss.SimilarityMetric): def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]: return ctfe.od_features(trip) - def similarity(self, a: List[float], b: List[float]) -> List[float]: - o_dist = ecc.calDistance([a[0], a[1]], [b[0], b[1]]) - d_dist = ecc.calDistance([a[2], a[3]], [b[2], b[3]]) - return [o_dist, d_dist] \ No newline at end of file + def similarity(self, a: List[float], b: List[float], clustering_way='origin-destination') -> List[float]: + """ + a : a list of point features that takes the forms + [point1_longitude,point1_latitude,point2_longitude,point2_latitude] + + b : a list of point features that takes the forms + [point1_longitude,point1_latitude,point2_longitude,point2_latitude] + + clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value. + tells the part of the trip to be used for binning trips together if that + part lies within threshold. + + return: a list of size 1 ([distance between point1-point3]) if a and b take form 1 + or of size 2 ([distance between point1-point3, distance between point2-point4]) + if a and b take form 2. + """ + origin_dist = ecc.calDistance(a[0:2], b[0:2]) + destination_dist=ecc.calDistance(a[2:4], b[2:4]) + + if clustering_way == 'origin-destination': + return [origin_dist,destination_dist] + elif clustering_way == 'origin': + return [origin_dist] + else: + return [destination_dist] \ No newline at end of file diff --git a/emission/analysis/modelling/similarity/similarity_metric.py b/emission/analysis/modelling/similarity/similarity_metric.py index 6be00216f..c009be9e9 100644 --- a/emission/analysis/modelling/similarity/similarity_metric.py +++ b/emission/analysis/modelling/similarity/similarity_metric.py @@ -17,25 +17,32 @@ def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]: pass @abstractmethod - def similarity(self, a: List[float], b: List[float]) -> List[float]: + def similarity(self, a: List[float], b: List[float], clustering_way = 'origin-destination') -> List[float]: """compares the features, producing their similarity as computed by this similarity metric :param a: features for a trip :param b: features for another trip + :param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value. + tells the part of the trip to be used for binning trips together if that + part lies within a threshold. :return: for each feature, the similarity of these features """ pass - def similar(self, a: List[float], b: List[float], thresh: float) -> bool: + def similar(self, a: List[float], b: List[float], thresh: float, clustering_way= 'origin-destination') -> bool: """compares the features, returning true if they are similar within some threshold - :param a: features for a trip + :param a: features for a trip :param b: features for another trip :param thresh: threshold for similarity + :param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value. + tells the part of the trip to be used for binning trips together if that + part lies within a threshold. :return: true if the feature similarity is within some threshold """ - similarity_values = self.similarity(a, b) - is_similar = all(map(lambda sim: sim <= thresh, similarity_values)) + similarity_values = self.similarity(a, b, clustering_way) + is_similar = all(sim <= thresh for sim in similarity_values) + return is_similar diff --git a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py index d750a451e..226fdefb5 100644 --- a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py +++ b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py @@ -119,6 +119,11 @@ class label to apply: self.sim_thresh = config['similarity_threshold_meters'] self.apply_cutoff = config['apply_cutoff'] self.is_incremental = config['incremental_evaluation'] + if config.get('clustering_way') is None: + self.clusteringWay='origin-destination' # previous default + else: + self.clusteringWay= config['clustering_way'] + self.tripLabels=[] self.bins: Dict[str, Dict] = {} @@ -184,9 +189,11 @@ def _assign_bins(self, trips: List[ecwc.Confirmedtrip]): logging.debug(f"adding trip to bin {bin_id} with features {trip_features}") self.bins[bin_id]['feature_rows'].append(trip_features) self.bins[bin_id]['labels'].append(trip_labels) + self.tripLabels.append(bin_id) else: # create new bin new_bin_id = str(len(self.bins)) + self.tripLabels.append(new_bin_id) new_bin_record = { 'feature_rows': [trip_features], 'labels': [trip_labels], @@ -200,14 +207,15 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]: finds an existing bin where all bin features are "similar" to the incoming trip features. - :param trip_features: feature row for the incoming trip + :param trip_features: feature row for the incoming trip. + takes the form [orig_lat, orig_lon, dest_lat, dest_lon] :return: the id of a bin if a match was found, otherwise None """ for bin_id, bin_record in self.bins.items(): - matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh) - for bin_sample in bin_record['feature_rows']]) - if matches_bin: - return bin_id + matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh,self.clusteringWay) + for bin_sample in bin_record['feature_rows']]) + if matches_bin: + return bin_id return None def _nearest_bin(self, trip: ecwc.Confirmedtrip) -> Tuple[Optional[int], Optional[Dict]]: diff --git a/emission/tests/modellingTests/TestBackwardsCompat.py b/emission/tests/modellingTests/TestBackwardsCompat.py index b81b5f529..c3cba4fae 100644 --- a/emission/tests/modellingTests/TestBackwardsCompat.py +++ b/emission/tests/modellingTests/TestBackwardsCompat.py @@ -59,6 +59,7 @@ def testAnyVsAllWhilePredicting(self): "metric": "od_similarity", "similarity_threshold_meters": 16000, # meters, "apply_cutoff": False, + "clustering_way": 'origin-destination', "incremental_evaluation": False } new_builder = eamtg.GreedySimilarityBinning(model_config) @@ -96,6 +97,7 @@ def testRandomTripsWithinTheSameThreshold(self): trips=n, origin=(0, 0), destination=(1, 1), + trip_part='od', label_data=label_data, threshold=0.001, # ~ 111 meters in degrees WGS84 ) @@ -113,6 +115,7 @@ def testRandomTripsWithinTheSameThreshold(self): "metric": "od_similarity", "similarity_threshold_meters": 500, # meters, "apply_cutoff": False, + "clustering_way": 'origin-destination', "incremental_evaluation": False } new_model = eamtg.GreedySimilarityBinning(model_config) @@ -156,6 +159,7 @@ def testRandomTripsOutsideTheSameThreshold(self): trips=n, origin=(0, 0), destination=(1, 1), + trip_part='od', label_data=label_data, threshold=0.1, # Much bigger than the 500m threshold, so we will get multiple bins ) @@ -173,6 +177,7 @@ def testRandomTripsOutsideTheSameThreshold(self): "metric": "od_similarity", "similarity_threshold_meters": 500, # meters, "apply_cutoff": False, + "clustering_way": 'origin-destination', "incremental_evaluation": False } new_model = eamtg.GreedySimilarityBinning(model_config) diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py index 32bed47aa..31b3261ae 100644 --- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py +++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py @@ -1,6 +1,7 @@ import unittest import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg import emission.tests.modellingTests.modellingTestAssets as etmm + import logging @@ -10,44 +11,111 @@ def setUp(self) -> None: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.DEBUG) - def testBinning(self): + def testNoBinning(self): """ - when $should_be_grouped trips are the same, they should appear in a bin + Tests the three (origin, destination and origin-destination based) + binning configuration for trips. + + When the origin and destination points of trips are outside a threshold + limit, none of the trips should be binned with the other in any of the three + configs (origin, destination or origin-and-destination based). """ + + # generate $n trips. + n = 20 + binning_threshold=500 + #this generates 20 trips one-by-one, where each trip's respective origin and destination + # points are more than 500m away. + + label_data = { "mode_confirm": ['walk', 'bike', 'transit'], "purpose_confirm": ['work', 'home', 'school'], "replaced_mode": ['drive'] - } + } + + + trips =etmm.generate_mock_trips( + user_id="joe", + trips=n, + trip_part='__', + label_data=label_data, + within_threshold=1, + threshold=binning_threshold, + origin=(0,0), + destination=(1,1) + ) + + # parameters passed for testing. A list, where each element is one way of clustering + clustering_ways_paramters= ["origin","destination","origin-destination"] + + #Testing each of the three clustering_ways by passing them as parameters + for cw in clustering_ways_paramters: + with self.subTest(clustering_way=cw): + #initialise the binning model and fit with previously generated trips + model_config = { + "metric": "od_similarity", + "similarity_threshold_meters": binning_threshold, # meters, + "apply_cutoff": False, + "clustering_way": cw, + "incremental_evaluation": False + } + model= eamtg.GreedySimilarityBinning(model_config) + model.fit(trips) + #check each bins for no of trips + no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values())) + #Since all trips were sampled outside the threshold, there should be no bin + # with more then 1 trip + self.assertTrue(no_large_bin,"no bin should have more than 1 features in it") - # generate $n trips. $m of them should have origin and destinations sampled + def testBinning(self): + """ + Tests the three (origin, destination and origin-destination based) + binning configuration for trips. + + When the points lie within threshold ,the trips are binned together. + """ + # generate $n trips. $m of them should have origin sampled # within a radius that should have them binned. n = 20 m = 5 - trips = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(0, 0), - destination=(1, 1), - label_data=label_data, - within_threshold=m, - threshold=0.001, # ~ 111 meters in degrees WGS84 - ) - - # pass in a test configuration to the binning algorithm - model_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 500, # meters, - "apply_cutoff": False, - "incremental_evaluation": False + binning_threshold=500 + label_data = { + "mode_confirm": ['walk', 'bike', 'transit'], + "purpose_confirm": ['work', 'home', 'school'], + "replaced_mode": ['drive'] } - model = eamtg.GreedySimilarityBinning(model_config) - - model.fit(trips) - # $m trip features should appear together in one bin - at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values())) - self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it") + # parameters passed for testing. A list, where each element of this list takes the form + # [trip part to be sampled within mentioned threshold , clustering way used to check similarity] + parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']] + for tp,cw in parameters: + with self.subTest(trip_part=tp,clustering_way=cw): + #generate random trips using utilities + trips =etmm.generate_mock_trips( + user_id="joe", + trips=n, + trip_part=tp, + label_data=label_data, + within_threshold=m, + threshold=binning_threshold, + origin=(0,0), + destination=(1,1) + ) + #initialise the binning model and fit with previously generated trips + model_config = { + "metric": "od_similarity" , + "similarity_threshold_meters": binning_threshold, # meters, + "apply_cutoff": False, + "clustering_way": cw, + "incremental_evaluation": False + } + model = eamtg.GreedySimilarityBinning(model_config) + model.fit(trips) + #check each bins for no of trips + one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values())) + #Since 5 trips were sampled within the threshold, there should be one bin with 5 trips + self.assertTrue(one_large_bin, "one bin should have 5 features in it") def testPrediction(self): """ @@ -60,23 +128,24 @@ def testPrediction(self): } n = 6 - trips = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(0, 0), - destination=(1, 1), - label_data=label_data, - threshold=0.001, # ~ 111 meters in degrees WGS84 - ) - + trips =etmm.generate_mock_trips( + user_id="joe", + trips=n, + trip_part='od', + label_data=label_data, + within_threshold=n, + threshold=500, + origin=(0,0), + destination=(1,1) + ) model_config = { "metric": "od_similarity", - "similarity_threshold_meters": 500, # meters, + "similarity_threshold_meters": 500, # meters, "apply_cutoff": False, + "clustering_way": 'origin_destination', "incremental_evaluation": False } - model = eamtg.GreedySimilarityBinning(model_config) - + model= eamtg.GreedySimilarityBinning(model_config) train = trips[0:5] test = trips[5] @@ -95,33 +164,26 @@ def testNoPrediction(self): "purpose_confirm": ['pizza_party'], "replaced_mode": ['crabwalking'] } - n = 5 - train = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(39.7645187, -104.9951944), # Denver, CO - destination=(39.7435206, -105.2369292), # Golden, CO - label_data=label_data, - threshold=0.001, # ~ 111 meters in degrees WGS84 + binning_threshold = 500 + train = etmm.generate_mock_trips( user_id="joe",trips=n, origin=(39.7645187, -104.9951944), # Denver, CO + destination=(39.7435206, -105.2369292), # Golden, CO + trip_part='od', label_data=label_data, + threshold=binning_threshold, within_threshold=n ) - test = etmm.generate_mock_trips( - user_id="joe", - trips=1, - origin=(61.1042262, -150.5611644), # Anchorage, AK - destination=(62.2721466, -150.3233046), # Talkeetna, AK - label_data=label_data, - threshold=0.001, # ~ 111 meters in degrees WGS84 + test = etmm.generate_mock_trips( user_id="amanda",trips=n, origin=(61.1042262, -150.5611644), # Denver, CO + destination=(62.2721466, -150.3233046), # Golden, CO + trip_part='od', label_data=label_data, + threshold=binning_threshold, within_threshold=n ) - model_config = { "metric": "od_similarity", - "similarity_threshold_meters": 500, # meters, + "similarity_threshold_meters": 500, # meters, "apply_cutoff": False, + "clustering_way": 'origin_destination', "incremental_evaluation": False } - model = eamtg.GreedySimilarityBinning(model_config) - + model= eamtg.GreedySimilarityBinning(model_config) model.fit(train) results, n = model.predict(test[0]) diff --git a/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py b/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py index aee6a6f09..1529f8df5 100644 --- a/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py +++ b/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py @@ -44,6 +44,7 @@ def setUp(self): "metric": "od_similarity", "similarity_threshold_meters": sim_threshold, "apply_cutoff": False, + "clustering_way": 'origin-destination', "incremental_evaluation": True } @@ -162,6 +163,7 @@ def testIncrementalRun(self): trips=self.new_trips_per_invocation, origin=self.origin, destination=self.destination, + trip_part='od', label_data=label_data, threshold=0.0001, # ~10m, start_ts=time.time() - 20, diff --git a/emission/tests/modellingTests/TestRunGreedyModel.py b/emission/tests/modellingTests/TestRunGreedyModel.py index 10f221909..9e4431fa3 100644 --- a/emission/tests/modellingTests/TestRunGreedyModel.py +++ b/emission/tests/modellingTests/TestRunGreedyModel.py @@ -62,6 +62,7 @@ def setUp(self): trips=self.total_trips, origin=self.origin, destination=self.destination, + trip_part='od', label_data=label_data, within_threshold=self.clustered_trips, threshold=0.004, # ~400m @@ -106,6 +107,7 @@ def testTrainGreedyModelWithZeroTrips(self): "metric": "od_similarity", "similarity_threshold_meters": 500, "apply_cutoff": False, + "clustering_way": 'origin-destination', "incremental_evaluation": False } @@ -142,6 +144,7 @@ def test1RoundTripGreedySimilarityBinning(self): "metric": "od_similarity", "similarity_threshold_meters": 500, "apply_cutoff": False, + "clustering_way": 'origin-destination', "incremental_evaluation": False } diff --git a/emission/tests/modellingTests/TestSimilarityMetric.py b/emission/tests/modellingTests/TestSimilarityMetric.py index ae37fc39a..fe038be4e 100644 --- a/emission/tests/modellingTests/TestSimilarityMetric.py +++ b/emission/tests/modellingTests/TestSimilarityMetric.py @@ -1,31 +1,59 @@ import unittest -import emission.tests.modellingTests.modellingTestAssets as etmm import emission.analysis.modelling.similarity.od_similarity as eamso - +import emission.tests.modellingTests.modellingTestAssets as etmm class TestSimilarityMetric(unittest.TestCase): def testODsAreSimilar(self): - generate_points_thresh = 0.001 # approx. 111 meters - similarity_threshold = 500 # - # random, but, points are sampled within a circle and should always be < sim threshold - trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], threshold=generate_points_thresh) + similarity_threshold = 500 # in meters metric = eamso.OriginDestinationSimilarity() - coords0 = metric.extract_features(trips[0]) - coords1 = metric.extract_features(trips[1]) - similar = metric.similar(coords0, coords1, similarity_threshold) - self.assertTrue(similar) + + # parameters passed for testing is set here. A list, where each element of this list takes the form + # [trip part to be sampled within mentioned threshold, (start_coord,end_coord)] + # Since the extracted_features function returns in the form [origin_lat,origin_long,destination_lat,destination_long], + # if clustering is to be done by : + # a.origin, we pass first two values of this list,i.e. from 0 till before 2 index + # b.destination, we pas last two values of this list,i.e. from 2 till before 4 index + # c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index + parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']] + + for tp,cw in parameters: + with self.subTest(trip_part=tp): + #generate 2 trips with parameter values + trips = etmm.generate_mock_trips('joe',2, threshold=similarity_threshold,origin=[0, 0], destination=[1, 1], within_threshold=2,trip_part=tp) + # depending on the parametrs, extract the relevant coordinates + trip0_coords = metric.extract_features(trips[0]) + trip1_coords = metric.extract_features(trips[1]) + #check for similarity using relevant coordinates + similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold,cw) + # Since both origin and destination poitns lie within threshold limits,they should be similar + # when we check by just origin or just destination or both origin-and-destination + self.assertTrue(similarOD) def testODsAreNotSimilar(self): - generate_points_thresh = 0.001 # approx. 111 meters - similarity_threshold = 500 # - - trips0 = etmm.generate_mock_trips('bob', 1, [0, 0], [1, 1], threshold=generate_points_thresh) - trips1 = etmm.generate_mock_trips('alice', 1, [2, 2], [3, 3], threshold=generate_points_thresh) + similarity_threshold = 500 metric = eamso.OriginDestinationSimilarity() - coords0 = metric.extract_features(trips0[0]) - coords1 = metric.extract_features(trips1[0]) - similar = metric.similar(coords0, coords1, similarity_threshold) - self.assertFalse(similar) + + # parameters passed for testing is set. A list, where each element of this list takes the form + # [(start_coord,end_coord)] + # Since the extracted_features function return in the form [origin_lat,origin_long,destination_lat,destination_long], + # if clustering shouldn't happend, then + # a.origin, we pass first two values of this list,i.e. from 0 till before 2 index + # b.destination, we pas last two values of this list,i.e. from 2 till before 4 index + # c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index + parameters= ['origin','destination','origin-destination'] + n=2 + #this generates 2 trips one-by-one, where each trip's respective origin and destination + # points are more than 500m away. + trips = [ etmm.generate_mock_trips('joe',2, origin=[i, i], destination=[i+1, i+1], trip_part= 'od', within_threshold=1,threshold=500)[0] for i in range(n)] + trip0_coord = metric.extract_features(trips[0]) + trip1_coord = metric.extract_features(trips[1]) + + for cw in parameters: + with self.subTest(clustering_way=cw): + IsSimilar = metric.similar(trip0_coord,trip1_coord, similarity_threshold,cw) + # Two trips with neither origin nor destination coordinates within the threshold + # must not be similar by any configuration of similarity testing. + self.assertFalse(IsSimilar) if __name__ == '__main__': unittest.main() diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py index 879a3a2ca..252b2ad34 100644 --- a/emission/tests/modellingTests/modellingTestAssets.py +++ b/emission/tests/modellingTests/modellingTestAssets.py @@ -1,35 +1,81 @@ import random from typing import Optional, Tuple, List, Dict from uuid import UUID -import emission.analysis.modelling.trip_model.trip_model as eamtm +import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg import emission.core.wrapper.confirmedtrip as ecwc - +import emission.core.common as ecc import emission.core.wrapper.entry as ecwe import time import math +def generate_random_point(): + """Generate a completetly random point valid WGS84 latitiude and longtidude. + CAUTION : In order to save trips, GeoJSON requires points in [lon,lat] format""" + lat=random.uniform(-90,90) + lon=random.uniform(-180,180) + return [lon,lat] + +def generate_nearby_random_points(ref_coords,threshold): + """ + Generate valid WGS84 latitiude and longtidude in threshold(m) proximity to + ref coordinates. + """ + #convert given threshold in m to approx WGS84 coord dist. + thresholdInWGS84 = threshold* (0.000001/0.11) + + #generate a random coordinate in threshold's limit around the ref points. OR we + + # for eg, ref point is 0,0 and threshold is 100m , so we generate a radius from 0 to 50, say 34 + # in this example. A random radius is also generted from 0 to 360,say 0. We then take 34 step along x axis direction + # till radius length to get our new point, (34,0). When this function is called the next time to generate a point + #that has to be binned with previous one, we again generate r and theta , say 24 , 180 this time. + # Now this new point is at (-24,0). Both these points are within threshold (100 in this case)limit and therefore will + #be binned together. + radius=random.uniform(0,thresholdInWGS84/2) + theta=random.uniform(0,2*math.pi) + dx = radius * math.cos(theta) + dy = radius * math.sin (theta) + #This basically gives a way to sample a point from within a circle of radius thresholdInWGS84/2 + # around the ref. point. + return [ref_coords[0] + dy , ref_coords[1] + dx] def generate_trip_coordinates( - ref_coords: Tuple[float, float], - within_threshold: bool, - threshold: float, - max: float = 0.1 # approx. 10km in WGS84 + points_list: list[float], + ref_coords, + insideThreshold: bool, + threshold_meters: float, ) -> Tuple[float, float]: - """generates trip coordinate data to use when mocking a set of trip data. + """generates trip coordinate data to use when mocking a set of trip data.i + If the coordinate generated is to be binned together, it is generated in proximity of + the previous points in the points_list. Otherwise, if this point is not to be binned together, + keep generating a random trip unless we find one that would not bin with previously + accepeted trips. - :param ref_coords: reference coordinates to use as the center of the sampling circle - :param within_threshold: how many of these trips are within some distance threshold - :param threshold: the distance threshold, in WGS84 - :param max: max distance, in WGS84, defaults to 0.1 (approx. 10km) + :param points_list: list of all the previoushlt selected points + :param within_threshold: is this point supposed to be within some distance threshold + :param threshold_meters: the distance threshold, in meters :return: generated coordinate pairs sampled in a circle from some coordinates up to some threshold """ - angle = 2 * math.pi * random.random() - radius_threshold = threshold / 2 - radius = random.uniform(0, radius_threshold) if within_threshold else random.uniform(radius_threshold, max) - x = radius * math.cos(angle) + ref_coords[0] - y = radius * math.sin(angle) + ref_coords[1] - return (x, y) + # if the point is to be generated within a threshold and it's not the first point + if insideThreshold and points_list: + # if no ref. coordinates are provided, use any previously accepted point as ref. + if ref_coords == None: + ref_coords=random.choice(points_list) + # generate a new point in threshold proximity to ref. point + new_point = generate_nearby_random_points(ref_coords, threshold_meters) + else: # If point need not be in the threshold OR if its the first point we are generating, then + #Generate random coordinates if no reference coords were provided + if ref_coords == None: + new_point = generate_random_point() + else: + # if ref coordinate are provided, use them as the startisng point. + new_point = ref_coords + # If the newly generated new_point ( be it when ref_coords given or not given) is not more + # than threshold_meters away from all the previously accepted points, keep generating new_point # + while not all(ecc.calDistance(new_point, pt) > threshold_meters for pt in points_list): + new_point = generate_random_point() + return new_point def extract_trip_labels(trips: List[ecwc.Confirmedtrip]) -> Dict: @@ -129,14 +175,14 @@ def build_mock_trip( def generate_mock_trips( user_id, trips, - origin, - destination, + threshold, + trip_part='od', + origin=None, + destination=None, label_data = None, within_threshold = None, start_ts: None = None, end_ts: None = None, - threshold = 0.01, - max = 0.1, has_label_p = 1.0, seed = 0): """mocking function that generates multiple trips for a user. some are sampled @@ -157,14 +203,23 @@ def generate_mock_trips( :param user_id: user UUID :param trips: number of trips - :param origin: origin coordinates - :param destination: destination coordinates + :param trip_part: when mock trips are generated, coordinates of this part of + the trips will be within the threshold. trip_part can take one + among the four values: + 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie + within the mentioned threshold when trips are generated), + 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned + threshold when trips are generated), + 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the + mentioned threshold when trips are generated) + 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips + will lie within the mentioned threshold when trips are generated) + :param origin : reference point for trip origin generally + :param destination : reference point for trip origin generally :param label_data: dictionary of label data, see above, defaults to None :param within_threshold: number of trips that should fall within the provided - distance threshold in degrees WGS84, defaults to None - :param threshold: distance threshold in WGS84 for sampling, defaults to 0.01 - :param max: maximum distance beyond the threshold for trips sampled that - are not within the threshold, defaults to 0.1 degrees WGS84 + distance threshold in m + :param threshold: distance threshold in WGS84 for sampling :param has_label_p: probability a trip has labels, defaults to 1.0 :param seed: random seed, defaults to 0 :return: randomly sampled trips @@ -174,9 +229,16 @@ def generate_mock_trips( within = within_threshold if within_threshold is not None else trips trips_within_threshold = [i < within for i in range(trips)] result = [] + origin_points=[] + destination_points=[] + + # generate 'trip' number of points based on which among 'o' (Origin) ,'d' (Destination) or + # 'od' (Origin-Destination) or '__' (None) should be in threshold proximity to each other. for within in trips_within_threshold: - o = generate_trip_coordinates(origin, within, threshold, max) - d = generate_trip_coordinates(destination, within, threshold, max) + origin_points.append(generate_trip_coordinates(origin_points, origin, insideThreshold= (trip_part[0] == 'o' and within), threshold_meters= threshold)) + destination_points.append(generate_trip_coordinates(destination_points, destination, insideThreshold=(trip_part[1] == 'd' and within), threshold_meters=threshold)) + + for o,d in zip(origin_points,destination_points): labels = {} if label_data is None or random.random() > has_label_p \ else sample_trip_labels( mode_labels=label_data.get('mode_confirm'), @@ -199,6 +261,6 @@ def generate_mock_trips( "purpose_confirm": ['work', 'home', 'school'], "replaced_mode": ['walk', 'bike', 'drive'] } - result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1], label_data, 6) + result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1],'od', label_data, 6) for r in result: print(r) \ No newline at end of file