-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspex.py
147 lines (121 loc) · 5.25 KB
/
spex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from os.path import join, exists, isdir
from os import mkdir
import argparse
from collections import namedtuple
import pandas as pd
from haversine import haversine
from mex import mex
stayPoint = namedtuple("StayPoint", ["Lat", "Lon", "Arrival", "Departure"])
class spex:
def __init__(self):
parsed_args = self.__parse_args()
self.fname, self.act, self.out_folder, self.t, self.r, self.d = parsed_args
self.t = self.t if self.t != -1 else 30
self.r = self.r if self.r != -1 else 500
self.d = self.d if self.d != "" else "haversine"
if not exists(self.out_folder) or not isdir(self.out_folder):
mkdir(self.out_folder)
def __parse_args(self):
desc = """
SPEX: Stay Point EXtractor.
"""
parser = argparse.ArgumentParser(description=desc)
parser.add_argument("filename",
help="Trace file name")
parser.add_argument("output_location",
help="Output folder location")
parser.add_argument("--action", required=True,
help="Choose what action to do.",
choices=["extract_points",
"extract_metrics",
"extract_both"])
parser.add_argument("-t", nargs="?",
help="Time threshold (in minutes)",
const=30, default=30, type=int)
parser.add_argument("-d", nargs="?",
help="Distance function (haversine or euclidean)",
const="haversine", default="haversine",
choices=["haversine", "euclidean"])
parser.add_argument("-r", nargs="?",
help="Radius threshold (in meters)",
const=500, default=500, type=int)
parsed = parser.parse_args()
fname = parsed.filename
action = parsed.action
output_folder = parsed.output_location
t_value = parsed.t
d_value = parsed.d
r_value = parsed.r
return fname, action, output_folder, t_value, r_value, d_value
def load_trace(self):
df = pd.read_csv(self.fname)
for required in ["userid", "latitude", "longitude", "time"]:
if required not in df.columns:
SystemError("Column {} missing from trace.".format(required))
df.sort_values(by="time", inplace=True)
self.trace = df
def __get_point(self, i):
return (self.trace.loc[i, "latitude"], self.trace.loc[i, "longitude"])
def __euclidean(self, a, b):
return pow(pow(a[0] - b[0], 2) + pow(a[1] - b[1], 2), 1/2)
def __haversine(self, a, b):
return haversine(a, b) * 1000
def __dist(self, a, b):
if self.d == "haversine":
return self.__haversine(a, b)
elif self.d == "euclidean":
return self.__euclidean(a, b)
else:
SystemError("Distance function {} not defined.".format(self.d))
def get_staypoints(self, data):
time_in_sec = self.t * 60
stay_points = []
i, j = 0, 0
point_i = self.__get_point(i)
point_num = len(data)
while i < point_num:
j = i + 1
point_j = self.__get_point(j)
while j < point_num:
point_j = self.__get_point(j)
d = self.__dist(point_i, point_j)
if d > self.r:
delta_t = data.loc[j, "time"] - data.loc[i, "time"]
if delta_t > time_in_sec:
sp = stayPoint(Lat=data.loc[i:(j+1), "latitude"].mean(),
Lon=data.loc[i:(j+1), "longitude"].mean(),
Arrival=data.loc[i, "time"],
Departure=data.loc[j, "time"])
stay_points.append(sp)
i = j
point_i = self.__get_point(i)
break
j = j + 1
if j >= point_num - 1:
break
return stay_points
def extract_by_user(self):
number_of_users = len(self.trace.userid.unique())
for idx, (uname, udata) in enumerate(self.trace.groupby("userid"), 1):
s = "\rProcessing user with id #{} ({} of {})"
print(s.format(uname, idx, number_of_users), end="")
ud = udata.reset_index(drop=True)
stay_points = self.get_staypoints(ud)
out_filename = "{}_stay_points.csv".format(uname)
out_location = join(self.out_folder, out_filename)
with open(out_location, "w+") as out:
out.write("latitude,longitude,arrival,departure\n")
for sp in stay_points:
out.write("{},{},{},{}\n".format(sp.Lat, sp.Lon,
sp.Arrival, sp.Departure))
if __name__ == "__main__":
e = spex()
if e.act == "extract_trace":
e.load_trace()
e.extract_by_user()
elif e.act == "extract_metrics":
mex(e.fname, e.out_folder, e.t, e.d, e.r)
else:
e.load_trace()
e.extract_by_user()
mex(e.fname, e.out_folder, e.t, e.d, e.r)