Here is a snippet which will remove redundant points from timeseries data using numpy. I recently had to do this and had no luck finding any help via google. Here is my solution:
def remove_redundant_points(points):
"""
Returns a point list with any redundant points (points where
the value didn't change from the previous point) removed.
The resulting list has the points before and after any value
change.
:param points: Array of points (time, value)
:return: Trimmed Array of points with any points where value
doesn't change before or after removed.
"""
changepoints = numpy.where(points[1:, 1] != points[:-1, 1])[0]
keepindexes = numpy.unique(numpy.concatenate(
([0, len(points) - 1],
changepoints, changepoints + 1)))
return points[keepindexes]
Examples...
>>> values = [1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 5, 6, 5, 5, 5, 5, 5, 5]
>>> pts = zip(range(len(values)), values)
>>> pts
[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 2), (6, 2), (7, 2), (8, 3), (9, 4),
(10, 5), (11, 6), (12, 5), (13, 5), (14, 5), (15, 5), (16, 5), (17, 5)]
>>> trimmed_pts = remove_redundant_points(numpy.array(pts))
>>> trimmed_pts.tolist()
[[0, 1], [4, 1], [5, 2], [7, 2], [8, 3], [9, 4], [10, 5], [11, 6], [12, 5], [17,
5]]
>>> pts = []
>>> v = 0
>>> for i in range(10000):
... if random.random() > 0.95: v += 1
... pts.append((t + i, v))
...
>>> pts = numpy.array(pts)
>>> pts
array([[ 1.39111413e+09, 0.00000000e+00],
[ 1.39111413e+09, 0.00000000e+00],
[ 1.39111414e+09, 0.00000000e+00],
...,
[ 1.39112413e+09, 5.00000000e+02],
[ 1.39112413e+09, 5.00000000e+02],
[ 1.39112413e+09, 5.00000000e+02]])
>>> len(pts)
10000
>>> trimmed_pts = remove_redundant_points(pts)
>>> trimmed_pts
array([[ 1.39111413e+09, 0.00000000e+00],
[ 1.39111414e+09, 0.00000000e+00],
[ 1.39111415e+09, 1.00000000e+00],
...,
[ 1.39112412e+09, 4.99000000e+02],
[ 1.39112412e+09, 5.00000000e+02],
[ 1.39112413e+09, 5.00000000e+02]])
>>> len(trimmed_pts)
968
No comments:
Post a Comment