GripGuard/generate_sample_data.py at main · ryanrahman27/GripGuard · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Generate sample training data for slip detection
Simulates piezoelectric sensor readings with slip and no-slip patterns
"""

import numpy as np
import pandas as pd
import os

def generate_slip_data(n_samples=1000, n_features=8, noise_level=0.1, random_state=42):
    """
    Generate synthetic slip detection data

    Features represent:
    - Feature 0: Peak amplitude
    - Feature 1: RMS value
    - Feature 2: Frequency domain energy
    - Feature 3: Signal variance
    - Feature 4: Zero crossing rate
    - Feature 5: Signal range (max - min)
    - Feature 6: Mean Absolute Value (MAV)
    - Feature 7: Short-window slope / envelope change

    Slip events have:
    - Higher peak amplitudes
    - Higher RMS values
    - Different frequency characteristics
    - Higher variance
    - Larger signal range
    - Higher MAV
    - More rapid envelope changes
    """
    np.random.seed(random_state)

    data = []
    labels = []

    # Generate no-slip samples (class 0)
    n_no_slip = n_samples // 2
    for _ in range(n_no_slip):
        # Normal operation: lower amplitude, stable signal
        features = [
            np.random.normal(0.3, 0.1),      # Peak amplitude (low)
            np.random.normal(0.2, 0.05),     # RMS (low)
            np.random.normal(0.15, 0.03),    # Frequency energy (low)
            np.random.normal(0.1, 0.02),     # Variance (low)
            np.random.normal(0.25, 0.05),    # Zero crossing rate (moderate)
            np.random.normal(0.2, 0.05),     # Signal range (low)
            np.random.normal(0.15, 0.03),    # MAV (low)
            np.random.normal(0.5, 0.1),      # Slope/envelope change (stable)
        ]
        features = np.array(features) + np.random.normal(0, noise_level, n_features)
        features = np.clip(features, 0, 1)  # Normalize to [0, 1]
        data.append(features)
        labels.append(0)

    # Generate slip samples (class 1)
    n_slip = n_samples - n_no_slip
    for _ in range(n_slip):
        # Slip event: higher amplitude, more variance
        features = [
            np.random.normal(0.8, 0.15),     # Peak amplitude (high)
            np.random.normal(0.6, 0.1),      # RMS (high)
            np.random.normal(0.5, 0.1),      # Frequency energy (high)
            np.random.normal(0.4, 0.1),      # Variance (high)
            np.random.normal(0.6, 0.1),      # Zero crossing rate (high)
            np.random.normal(0.7, 0.15),     # Signal range (high)
            np.random.normal(0.5, 0.1),      # MAV (high)
            np.random.normal(0.7, 0.15),     # Slope/envelope change (rapid)
        ]
        features = np.array(features) + np.random.normal(0, noise_level, n_features)
        features = np.clip(features, 0, 1)  # Normalize to [0, 1]
        data.append(features)
        labels.append(1)

    # Create DataFrame
    feature_names = [f'feature_{i}' for i in range(n_features)]
    df = pd.DataFrame(data, columns=feature_names)
    df['label'] = labels

    # Shuffle
    df = df.sample(frac=1, random_state=random_state).reset_index(drop=True)

    return df


def main():
    """Generate and save sample data"""
    import argparse

    parser = argparse.ArgumentParser(description='Generate sample training data')
    parser.add_argument('--n-samples', type=int, default=1000,
                       help='Number of samples to generate')
    parser.add_argument('--n-features', type=int, default=8,
                       help='Number of features')
    parser.add_argument('--noise', type=float, default=0.1,
                       help='Noise level')
    parser.add_argument('--output', type=str, default='data/training_data.csv',
                       help='Output CSV path')

    args = parser.parse_args()

    # Create data directory if needed
    os.makedirs(os.path.dirname(args.output), exist_ok=True)

    # Generate data
    print(f"Generating {args.n_samples} samples with {args.n_features} features...")
    df = generate_slip_data(
        n_samples=args.n_samples,
        n_features=args.n_features,
        noise_level=args.noise
    )

    # Save
    df.to_csv(args.output, index=False)
    print(f"\n✓ Sample data saved to {args.output}")
    print(f"\nData statistics:")
    print(df.describe())
    print(f"\nClass distribution:")
    print(df['label'].value_counts())


if __name__ == '__main__':
    main()