Here is a snippet which will remove redundant points from timeseries data using numpy. I recently had to do this and had no luck finding any help via google. Here is my solution:
def remove_redundant_points(points): """ Returns a point list with any redundant points (points where the value didn't change from the previous point) removed. The resulting list has the points before and after any value change. :param points: Array of points (time, value) :return: Trimmed Array of points with any points where value doesn't change before or after removed. """ changepoints = numpy.where(points[1:, 1] != points[:-1, 1])[0] keepindexes = numpy.unique(numpy.concatenate( ([0, len(points) - 1], changepoints, changepoints + 1))) return points[keepindexes]Examples...
>>> values = [1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 5, 6, 5, 5, 5, 5, 5, 5] >>> pts = zip(range(len(values)), values) >>> pts [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 2), (6, 2), (7, 2), (8, 3), (9, 4), (10, 5), (11, 6), (12, 5), (13, 5), (14, 5), (15, 5), (16, 5), (17, 5)] >>> trimmed_pts = remove_redundant_points(numpy.array(pts)) >>> trimmed_pts.tolist() [[0, 1], [4, 1], [5, 2], [7, 2], [8, 3], [9, 4], [10, 5], [11, 6], [12, 5], [17, 5]] >>> pts = [] >>> v = 0 >>> for i in range(10000): ... if random.random() > 0.95: v += 1 ... pts.append((t + i, v)) ... >>> pts = numpy.array(pts) >>> pts array([[ 1.39111413e+09, 0.00000000e+00], [ 1.39111413e+09, 0.00000000e+00], [ 1.39111414e+09, 0.00000000e+00], ..., [ 1.39112413e+09, 5.00000000e+02], [ 1.39112413e+09, 5.00000000e+02], [ 1.39112413e+09, 5.00000000e+02]]) >>> len(pts) 10000 >>> trimmed_pts = remove_redundant_points(pts) >>> trimmed_pts array([[ 1.39111413e+09, 0.00000000e+00], [ 1.39111414e+09, 0.00000000e+00], [ 1.39111415e+09, 1.00000000e+00], ..., [ 1.39112412e+09, 4.99000000e+02], [ 1.39112412e+09, 5.00000000e+02], [ 1.39112413e+09, 5.00000000e+02]]) >>> len(trimmed_pts) 968
No comments:
Post a Comment