-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_sample_data.py
More file actions
124 lines (105 loc) · 4.3 KB
/
generate_sample_data.py
File metadata and controls
124 lines (105 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Generate sample training data for slip detection
Simulates piezoelectric sensor readings with slip and no-slip patterns
"""
import numpy as np
import pandas as pd
import os
def generate_slip_data(n_samples=1000, n_features=8, noise_level=0.1, random_state=42):
"""
Generate synthetic slip detection data
Features represent:
- Feature 0: Peak amplitude
- Feature 1: RMS value
- Feature 2: Frequency domain energy
- Feature 3: Signal variance
- Feature 4: Zero crossing rate
- Feature 5: Signal range (max - min)
- Feature 6: Mean Absolute Value (MAV)
- Feature 7: Short-window slope / envelope change
Slip events have:
- Higher peak amplitudes
- Higher RMS values
- Different frequency characteristics
- Higher variance
- Larger signal range
- Higher MAV
- More rapid envelope changes
"""
np.random.seed(random_state)
data = []
labels = []
# Generate no-slip samples (class 0)
n_no_slip = n_samples // 2
for _ in range(n_no_slip):
# Normal operation: lower amplitude, stable signal
features = [
np.random.normal(0.3, 0.1), # Peak amplitude (low)
np.random.normal(0.2, 0.05), # RMS (low)
np.random.normal(0.15, 0.03), # Frequency energy (low)
np.random.normal(0.1, 0.02), # Variance (low)
np.random.normal(0.25, 0.05), # Zero crossing rate (moderate)
np.random.normal(0.2, 0.05), # Signal range (low)
np.random.normal(0.15, 0.03), # MAV (low)
np.random.normal(0.5, 0.1), # Slope/envelope change (stable)
]
features = np.array(features) + np.random.normal(0, noise_level, n_features)
features = np.clip(features, 0, 1) # Normalize to [0, 1]
data.append(features)
labels.append(0)
# Generate slip samples (class 1)
n_slip = n_samples - n_no_slip
for _ in range(n_slip):
# Slip event: higher amplitude, more variance
features = [
np.random.normal(0.8, 0.15), # Peak amplitude (high)
np.random.normal(0.6, 0.1), # RMS (high)
np.random.normal(0.5, 0.1), # Frequency energy (high)
np.random.normal(0.4, 0.1), # Variance (high)
np.random.normal(0.6, 0.1), # Zero crossing rate (high)
np.random.normal(0.7, 0.15), # Signal range (high)
np.random.normal(0.5, 0.1), # MAV (high)
np.random.normal(0.7, 0.15), # Slope/envelope change (rapid)
]
features = np.array(features) + np.random.normal(0, noise_level, n_features)
features = np.clip(features, 0, 1) # Normalize to [0, 1]
data.append(features)
labels.append(1)
# Create DataFrame
feature_names = [f'feature_{i}' for i in range(n_features)]
df = pd.DataFrame(data, columns=feature_names)
df['label'] = labels
# Shuffle
df = df.sample(frac=1, random_state=random_state).reset_index(drop=True)
return df
def main():
"""Generate and save sample data"""
import argparse
parser = argparse.ArgumentParser(description='Generate sample training data')
parser.add_argument('--n-samples', type=int, default=1000,
help='Number of samples to generate')
parser.add_argument('--n-features', type=int, default=8,
help='Number of features')
parser.add_argument('--noise', type=float, default=0.1,
help='Noise level')
parser.add_argument('--output', type=str, default='data/training_data.csv',
help='Output CSV path')
args = parser.parse_args()
# Create data directory if needed
os.makedirs(os.path.dirname(args.output), exist_ok=True)
# Generate data
print(f"Generating {args.n_samples} samples with {args.n_features} features...")
df = generate_slip_data(
n_samples=args.n_samples,
n_features=args.n_features,
noise_level=args.noise
)
# Save
df.to_csv(args.output, index=False)
print(f"\n✓ Sample data saved to {args.output}")
print(f"\nData statistics:")
print(df.describe())
print(f"\nClass distribution:")
print(df['label'].value_counts())
if __name__ == '__main__':
main()