Post

데이터프레임 구간 필터링

특정 컬럼이 연속적으로 0인 구간 추출하기

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def find_zero_intervals_indices(df, col, n=3):
    zero_intervals_indices = []
    current_start_index = None

    for i in range(len(df)):
        if df.loc[i, col] == 0:
            if current_start_index is None:
                current_start_index = i
        else:
            if current_start_index is not None:
                interval_length = i - current_start_index
                if interval_length >= n:
                    zero_intervals_indices.append((current_start_index, i - 1))
                current_start_index = None

    if current_start_index is not None:
        interval_length = len(df) - current_start_index
        if interval_length >= n:
            zero_intervals_indices.append((current_start_index, len(df) - 1))

    return zero_intervals_indices

예시

1
2
3
4
5
6
7
8
data = {'Time': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'Speed': [10, 0, 0, 0, 20, 0, 0, 0, 0, 30]}
df = pd.DataFrame(data)

intervals_indices = find_zero_intervals_indices(df, 'Speed')

for interval in intervals_indices:
    print(interval)

출력:
(1, 3)
(5, 8)

1
2
start, end = interval
df[start: end+1]

출력:

 TimeSpeed
560
670
780
890

특정 컬림이 연속적으로 증가하는 구간 추출하기

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def find_increasing_intervals_indices(df, col, n):
    increasing_intervals_indices = []
    current_start_index = None

    for i in range(len(df) - 1):
        if df.loc[i+1, col] > df.loc[i, col]:
            if current_start_index is None:
                current_start_index = i
        else:
            if current_start_index is not None:
                interval_length = i - current_start_index + 1
                if interval_length >= n:
                    increasing_intervals_indices.append((current_start_index, i))
                current_start_index = None

    if current_start_index is not None:
        interval_length = len(df) - current_start_index
        if interval_length >= n:
            increasing_intervals_indices.append((current_start_index, len(df) - 1))

    return increasing_intervals_indices

예시

1
2
3
4
5
6
7
data = {'x': [1, 2, 3, 5, 7, 9, 8, 12, 15, 18]}
df = pd.DataFrame(data)

intervals_indices = find_increasing_intervals_indices(df, 'x', 3)

for interval in intervals_indices:
    print(interval)

출력:
(0, 5)
(6, 9)

구간으로 데이터프레임 분할하기

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def split_intervals(df, intervals_indices):
    interval_dfs = []
    non_interval_dfs = []

    start_index = 0
    for start, end in intervals_indices:
        if start_index < start:
            non_interval_dfs.append(df.iloc[start_index:start])

        interval_dfs.append(df.iloc[start:end+1])
        start_index = end + 1

    if start_index < len(df):
        non_interval_dfs.append(df.iloc[start_index:])

    return interval_dfs, non_interval_dfs

예시

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
data = {'x': [1, 2, 3, 5, 7, 9, 8, 12, 15, 18]}
df = pd.DataFrame(data)

intervals_indices = [(0, 3), (7, 9)]
interval_dfs, non_interval_dfs = split_intervals(df, intervals_indices)

print("Intervals:")
for interval_df in interval_dfs:
    print(interval_df.to_markdown())
    print("="*20)

print("Non-Intervals:")
for non_interval_df in non_interval_dfs:
    print(non_interval_df.to_markdown())
    print("="*20)

출력:
Intervals:

 x
01
12
23
35

====================

 x
712
815
918

====================
Non-Intervals:

 x
47
59
68

====================

This post is licensed under CC BY 4.0 by the author.