Skip to content

Commit

Permalink
ENH: add centroid method for angles (#453)
Browse files Browse the repository at this point in the history
* ENH: add centroid method for angles

* BUG: add missing index to centroid output

* BUG: Fix angle median bug for sp

* CLN: black

* CLN: give up and adapt test

* TST: Add test for planar crs
  • Loading branch information
bifbof authored Dec 12, 2022
1 parent d3ade20 commit 88ef62e
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 31 deletions.
28 changes: 14 additions & 14 deletions tests/data/geolife_long/trips.csv
Original file line number Diff line number Diff line change
@@ -1,15 1,15 @@
id,user_id,started_at,finished_at,origin_staypoint_id,destination_staypoint_id,geom
0,0,2008-10-23 02:53:04 00:00,2008-10-23 03:05:00 00:00,,0.0,"MULTIPOINT (116.3184169999999966 39.9847019999999986, 116.2987250000000046 39.9840195000000035)"
1,0,2008-10-23 04:08:07 00:00,2008-10-23 04:34:37 00:00,0.0,1.0,"MULTIPOINT (116.2987250000000046 39.9840195000000035, 116.3248009999999937 39.9997295000000021)"
2,0,2008-10-23 09:42:25 00:00,2008-10-23 10:45:41 00:00,1.0,5.0,"MULTIPOINT (116.3248009999999937 39.9997295000000021, 116.3216824999999943 40.0089049999999986)"
3,0,2008-10-23 11:09:22 00:00,2008-10-23 11:10:57 00:00,5.0,6.0,"MULTIPOINT (116.3216824999999943 40.0089049999999986, 116.3209779999999967 40.0092695000000033)"
4,0,2008-10-24 02:09:59 00:00,2008-10-24 02:47:06 00:00,6.0,,"MULTIPOINT (116.3209779999999967 40.0092695000000033, 116.3211620000000011 40.0092089999999985)"
5,1,2008-10-23 05:53:05 00:00,2008-10-23 06:01:42 00:00,,10.0,"MULTIPOINT (116.3192360000000036 39.9840939999999989, 116.3275380000000041 39.9780510000000007)"
6,1,2008-10-23 10:32:53 00:00,2008-10-23 11:10:19 00:00,10.0,11.0,"MULTIPOINT (116.3275380000000041 39.9780510000000007, 116.3064135000000050 40.0138019999999983)"
7,1,2008-10-23 11:49:08 00:00,2008-10-23 11:49:48 00:00,11.0,12.0,"MULTIPOINT (116.3064135000000050 40.0138019999999983, 116.3065320000000042 40.0138200000000026)"
8,1,2008-10-23 23:43:46 00:00,2008-10-24 00:23:03 00:00,12.0,14.0,"MULTIPOINT (116.3065320000000042 40.0138200000000026, 116.3266370000000052 39.9779920000000004)"
9,1,2008-10-24 01:45:41 00:00,2008-10-24 02:02:46 00:00,14.0,15.0,"MULTIPOINT (116.3266370000000052 39.9779920000000004, 116.3084029999999984 39.9810395000000027)"
10,1,2008-10-24 02:28:19 00:00,2008-10-24 02:31:32 00:00,15.0,16.0,"MULTIPOINT (116.3084029999999984 39.9810395000000027, 116.3101889999999941 39.9813740000000024)"
11,1,2008-10-24 03:16:35 00:00,2008-10-24 04:12:50 00:00,16.0,20.0,"MULTIPOINT (116.3101889999999941 39.9813740000000024, 116.3267620000000022 39.9779330000000002)"
12,1,2008-10-24 05:28:05 00:00,2008-10-24 05:39:50 00:00,20.0,21.0,"MULTIPOINT (116.3267620000000022 39.9779330000000002, 116.3112610000000018 39.9827119999999994)"
13,1,2008-10-24 06:08:42 00:00,2008-10-24 06:35:50 00:00,21.0,,"MULTIPOINT (116.3112610000000018 39.9827119999999994, 116.3270629999999954 39.9778995000000007)"
0,0,2008-10-23 02:53:04 00:00,2008-10-23 03:05:00 00:00,,0.0,"MULTIPOINT (116.318417 39.984702, 116.29872033333335 39.98398866666667)"
1,0,2008-10-23 04:08:07 00:00,2008-10-23 04:34:37 00:00,0.0,1.0,"MULTIPOINT (116.29872033333335 39.98398866666667, 116.324803 39.99972133333333)"
2,0,2008-10-23 09:42:25 00:00,2008-10-23 10:45:41 00:00,1.0,5.0,"MULTIPOINT (116.324803 39.99972133333333, 116.321629 40.00890557142857)"
3,0,2008-10-23 11:09:22 00:00,2008-10-23 11:10:57 00:00,5.0,6.0,"MULTIPOINT (116.321629 40.00890557142857, 116.32097166666667 40.00928)"
4,0,2008-10-24 02:09:59 00:00,2008-10-24 02:47:06 00:00,6.0,,"MULTIPOINT (116.32097166666667 40.00928, 116.321162 40.009209)"
5,1,2008-10-23 05:53:05 00:00,2008-10-23 06:01:42 00:00,,10.0,"MULTIPOINT (116.319236 39.984094, 116.32752433333334 39.978049666666664)"
6,1,2008-10-23 10:32:53 00:00,2008-10-23 11:10:19 00:00,10.0,11.0,"MULTIPOINT (116.32752433333334 39.978049666666664, 116.30641350000002 40.013802)"
7,1,2008-10-23 11:49:08 00:00,2008-10-23 11:49:48 00:00,11.0,12.0,"MULTIPOINT (116.30641350000002 40.013802, 116.30653043609027 40.01383060902254)"
8,1,2008-10-23 23:43:46 00:00,2008-10-24 00:23:03 00:00,12.0,14.0,"MULTIPOINT (116.30653043609027 40.01383060902254, 116.32663666666667 39.977995)"
9,1,2008-10-24 01:45:41 00:00,2008-10-24 02:02:46 00:00,14.0,15.0,"MULTIPOINT (116.32663666666667 39.977995, 116.3084012 39.981039200000005)"
10,1,2008-10-24 02:28:19 00:00,2008-10-24 02:31:32 00:00,15.0,16.0,"MULTIPOINT (116.3084012 39.981039200000005, 116.31014576923077 39.98138838461539)"
11,1,2008-10-24 03:16:35 00:00,2008-10-24 04:12:50 00:00,16.0,20.0,"MULTIPOINT (116.31014576923077 39.98138838461539, 116.32675288888888 39.97793)"
12,1,2008-10-24 05:28:05 00:00,2008-10-24 05:39:50 00:00,20.0,21.0,"MULTIPOINT (116.32675288888888 39.97793, 116.31125442857143 39.982705714285714)"
13,1,2008-10-24 06:08:42 00:00,2008-10-24 06:35:50 00:00,21.0,,"MULTIPOINT (116.31125442857143 39.982705714285714, 116.327063 39.977899)"
18 changes: 18 additions & 0 deletions tests/preprocessing/test_positionfixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 305,24 @@ def test_unknown_distance_metric(self, example_positionfixes):
)


class Test__create_new_staypoints:
"""Test __create_new_staypoints."""

def test_planar_crs(self, geolife_pfs_sp_long):
"""Test if planar crs are handled as well"""
pfs, _ = geolife_pfs_sp_long
_, sp_wgs84 = pfs.as_positionfixes.generate_staypoints(
method="sliding", dist_threshold=100, time_threshold=5.0, include_last=True
)
pfs = pfs.set_crs(2056, allow_override=True)
_, sp_lv95 = pfs.as_positionfixes.generate_staypoints(
method="sliding", dist_threshold=100, time_threshold=5.0, include_last=True
)
sp_lv95.set_crs(4326, allow_override=True, inplace=True)
# planar and non-planar differ only if we experience a wrap in coords like [ 180, -180]
assert_geodataframe_equal(sp_wgs84, sp_lv95, check_less_precise=True)


class TestGenerate_triplegs:
"""Tests for generate_triplegs() method."""

Expand Down
5 changes: 2 additions & 3 deletions tests/preprocessing/test_staypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 7,7 @@
import pytest
from shapely.geometry import Point
from sklearn.cluster import DBSCAN
from geopandas.testing import assert_geodataframe_equal
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal

import trackintel as ti
from trackintel.geogr.distances import calculate_distance_matrix
Expand Down Expand Up @@ -219,8 219,7 @@ def test_dbscan_loc(self):
other_locs = gpd.GeoDataFrame(other_locs, columns=["user_id", "id", "center"], geometry="center", crs=sp.crs)
other_locs.set_index("id", inplace=True)

assert all(other_locs["center"] == locs["center"])
assert all(other_locs.index == locs.index)
assert_geoseries_equal(other_locs["center"], locs["center"], check_less_precise=True)

def test_dbscan_user_dataset(self):
"""Test user and dataset location generation."""
Expand Down
2 changes: 1 addition & 1 deletion tests/preprocessing/test_triplegs.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 71,7 @@ def test_generate_trips(self, example_triplegs_higher_gap_threshold):
["user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id", "geom"]
]
# test if generated trips are equal
assert_geodataframe_equal(trips_loaded, trips)
assert_geodataframe_equal(trips_loaded, trips, check_less_precise=True)

def test_trip_wo_geom(self, example_triplegs_higher_gap_threshold):
"""Test if the add_geometry parameter shows correct behavior"""
Expand Down
22 changes: 20 additions & 2 deletions tests/preprocessing/test_util.py
Original file line number Diff line number Diff line change
@@ -1,10 1,13 @@
import datetime

import geopandas as gpd
from geopandas.testing import assert_geoseries_equal
import pandas as pd
from pandas.testing import assert_frame_equal
import pytest
from pandas.testing import assert_frame_equal
from shapely.geometry import MultiPoint, Point

from trackintel.preprocessing.util import calc_temp_overlap, _explode_agg
from trackintel.preprocessing.util import _explode_agg, calc_temp_overlap, angle_centroid_multipoints


@pytest.fixture
Expand Down Expand Up @@ -88,3 91,18 @@ def test_index_dtype_with_None(self):
returned_df = _explode_agg("id", "c", orig_df, agg_df)
solution_df = pd.DataFrame(orig)
assert_frame_equal(returned_df, solution_df)


class TestAngleCentroidMultipoints:
"""Test util method angle_centroid_multipoints"""

# test adapted from https://rosettacode.org/wiki/Averages/Mean_angle
a = Point((130, 45))
b = MultiPoint([(160, 10), (-170, 20)])
c = MultiPoint([(20, 0), (30, 10), (40, 20)])
d = MultiPoint([(350, 0), (10, 0)])
e = MultiPoint([(90, 0), (180, 0), (270, 0), (360, 0)])
g = gpd.GeoSeries([a, b, c, d, e])
g_solution = gpd.GeoSeries([a, Point([175, 15]), Point([30, 10]), Point(0, 0), Point(-90, 0)])
g = gpd.GeoSeries(angle_centroid_multipoints(g))
assert_geoseries_equal(g, g_solution, check_less_precise=True)
14 changes: 9 additions & 5 deletions trackintel/preprocessing/positionfixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 4,10 @@
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import LineString, Point
from shapely.geometry import LineString

from trackintel.geogr.distances import haversine_dist
from trackintel.preprocessing.util import applyParallel, _explode_agg
from trackintel.geogr.distances import check_gdf_planar, haversine_dist
from trackintel.preprocessing.util import _explode_agg, angle_centroid_multipoints, applyParallel


def generate_staypoints(
Expand Down Expand Up @@ -411,7 411,7 @@ def _generate_staypoints_sliding_user(
y = df[geo_col].y.to_numpy()

ret_sp = []
start = 0
curr = start = 0
for curr in range(1, len(df)):

# the gap of two consecutive positionfixes should not be too long
Expand Down Expand Up @@ -451,8 451,12 @@ def __create_new_staypoints(start, end, pfs, elevation_flag, geo_col, last_flag=
# if end is the last pfs, we want to include the info from it as well
if last_flag:
end = len(pfs)
points = pfs[geo_col].iloc[start:end].unary_union
if check_gdf_planar(pfs):
new_sp[geo_col] = points.centroid
else:
new_sp[geo_col] = angle_centroid_multipoints(points)[0]

new_sp[geo_col] = Point(pfs[geo_col].iloc[start:end].x.median(), pfs[geo_col].iloc[start:end].y.median())
if elevation_flag:
new_sp["elevation"] = pfs["elevation"].iloc[start:end].median()
new_sp["pfs_id"] = pfs.index[start:end].to_list()
Expand Down
14 changes: 8 additions & 6 deletions trackintel/preprocessing/staypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 4,8 @@
from sklearn.cluster import DBSCAN
import warnings

from trackintel.geogr.distances import meters_to_decimal_degrees
from trackintel.preprocessing.util import applyParallel
from trackintel.geogr.distances import meters_to_decimal_degrees, check_gdf_planar
from trackintel.preprocessing.util import applyParallel, angle_centroid_multipoints


def generate_locations(
Expand Down Expand Up @@ -136,9 136,11 @@ def generate_locations(
# filter staypoints not belonging to locations
locs = locs.loc[locs["location_id"] != -1]

with warnings.catch_warnings(): # TODO: fix bug for geographic crs #437
warnings.simplefilter("ignore", category=UserWarning)
locs["center"] = locs.geometry.centroid # creates warning for geographic crs
if check_gdf_planar(locs):
locs["center"] = locs.geometry.centroid
else:
# error of wrapping e.g. mean([-180, 180]) -> own function needed
locs["center"] = angle_centroid_multipoints(locs.geometry)

# extent is the convex hull of the geometry
locs["extent"] = locs.geometry.convex_hull
Expand All @@ -154,7 156,7 @@ def generate_locations(
else:
locs.loc[pointLine_idx, "extent"] = locs.loc[pointLine_idx, "extent"].buffer(epsilon)

locs = locs.set_geometry("center")
locs = locs.set_geometry("center", crs=sp.crs)
locs = locs[["user_id", "location_id", "center", "extent"]]

# index management
Expand Down
35 changes: 35 additions & 0 deletions trackintel/preprocessing/util.py
Original file line number Diff line number Diff line change
@@ -1,6 1,11 @@
from datetime import timedelta

import geopandas as gpd
import numpy as np
import pandas as pd
import pygeos
from joblib import Parallel, delayed
from shapely.geometry.base import BaseGeometry
from tqdm import tqdm


Expand Down Expand Up @@ -103,3 108,33 @@ def _explode_agg(column, agg, orig_df, agg_df):
temp = temp[temp[column].notna()]
temp.index = temp[column]
return orig_df.join(temp[agg], how="left")


def angle_centroid_multipoints(geometry):
"""Calculate the mean of angles of MultiPoints
Parameters
----------
geometry : GeoSeries, shapely.geometry.Point, shapely.geometry.MultiPoint
Should contain only Points or MultiPoints any other lead to wrong results.
Returns
-------
geopandas.GeometryArray
Centroid of geometries (shapely.Point)
"""
g = pygeos.from_shapely(geometry)
g, index = pygeos.get_coordinates(g, return_index=True)
# number of coordinate pairs per MultiPoint
count = np.bincount(index)
x, y = g[:, 0], g[:, 1]
# calculate mean of y Coordinates -> no wrapping
y = np.bincount(index, weights=y) / count
# calculate mean of x Coordinates with wrapping
x_rad = np.deg2rad(x)
x_sin = np.bincount(index, weights=np.sin(x_rad)) / count
x_cos = np.bincount(index, weights=np.cos(x_rad)) / count
x = np.rad2deg(np.arctan2(x_sin, x_cos))
# shapely Geometry has no crs information
crs = None if isinstance(geometry, BaseGeometry) else geometry.crs
return gpd.points_from_xy(x, y, crs=crs)

0 comments on commit 88ef62e

Please sign in to comment.