Skip to content

Commit f3444c7

Browse files
committed
Rebase updates
Signed-off-by: Ryan Nett <[email protected]>
1 parent 4e894cc commit f3444c7

File tree

28 files changed

+7634
-6284
lines changed

28 files changed

+7634
-6284
lines changed

tensorflow-core-kotlin/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
<parent>
2323
<groupId>org.tensorflow</groupId>
2424
<artifactId>tensorflow-java</artifactId>
25-
<version>0.3.0-SNAPSHOT</version>
25+
<version>0.4.0-SNAPSHOT</version>
2626
</parent>
2727
<artifactId>tensorflow-core-kotlin</artifactId>
2828
<packaging>pom</packaging>
@@ -44,7 +44,7 @@
4444
</dependencies>
4545

4646
<properties>
47-
<kotlin.version>1.4.21</kotlin.version>
47+
<kotlin.version>1.4.31</kotlin.version>
4848
<kotlin.jvmTarget>1.8</kotlin.jvmTarget>
4949
</properties>
5050

tensorflow-core-kotlin/tensorflow-core-kotlin-api/pom.xml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
<parent>
2323
<groupId>org.tensorflow</groupId>
2424
<artifactId>tensorflow-core-kotlin</artifactId>
25-
<version>0.3.0-SNAPSHOT</version>
25+
<version>0.4.0-SNAPSHOT</version>
2626
</parent>
2727
<artifactId>tensorflow-core-kotlin-api</artifactId>
2828
<packaging>jar</packaging>
@@ -154,24 +154,30 @@
154154
<artifactId>maven-antrun-plugin</artifactId>
155155
<version>1.8</version>
156156
<executions>
157-
<!--
158-
Disabled because of https://github.com/pinterest/ktlint/issues/1039
159157
<execution>
160158
<id>ktlint-format-generated</id>
161159
<phase>process-sources</phase>
162160
<configuration>
163161
<target name="ktlint">
164-
<java taskname="ktlint" dir="${basedir}" fork="true" failonerror="true"
162+
<java taskname="ktlint" dir="${basedir}" fork="true" failonerror="false"
165163
classpathref="maven.plugin.classpath" classname="com.pinterest.ktlint.Main">
166164
<arg value="-F"/>
165+
<arg value="--verbose"/>
166+
<arg value="src/gen/**/*.kt"/>
167+
</java>
168+
<!--Run twice because of https://github.com/pinterest/ktlint/issues/932-->
169+
<java taskname="ktlint2" dir="${basedir}" fork="true" failonerror="true"
170+
classpathref="maven.plugin.classpath" classname="com.pinterest.ktlint.Main">
171+
<arg value="-F"/>
172+
<arg value="--verbose"/>
167173
<arg value="src/gen/**/*.kt"/>
168174
</java>
169175
</target>
170176
</configuration>
171177
<goals>
172178
<goal>run</goal>
173179
</goals>
174-
</execution>-->
180+
</execution>
175181
<execution>
176182
<id>ktlint-format</id>
177183
<configuration>
@@ -211,7 +217,7 @@
211217
<dependency>
212218
<groupId>com.pinterest</groupId>
213219
<artifactId>ktlint</artifactId>
214-
<version>0.39.0</version>
220+
<version>0.41.0</version>
215221
</dependency>
216222
</dependencies>
217223
</plugin>

tensorflow-core-kotlin/tensorflow-core-kotlin-api/src/gen/annotations/org/tensorflow/op/kotlin/AudioOps.kt

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ import org.tensorflow.op.audio.Mfcc
2626
import org.tensorflow.types.TFloat32
2727
import org.tensorflow.types.TInt32
2828
import org.tensorflow.types.TString
29+
import kotlin.Boolean
30+
import kotlin.Float
31+
import kotlin.Long
2932

3033
/**
3134
* An API for building `audio` operations as [Op][org.tensorflow.op.Op]s
@@ -47,33 +50,33 @@ public class AudioOps(
4750

4851
/**
4952
* Produces a visualization of audio data over time.
50-
*
53+
*
5154
* Spectrograms are a standard way of representing audio information as a series of
5255
* slices of frequency information, one slice for each window of time. By joining
5356
* these together into a sequence, they form a distinctive fingerprint of the sound
5457
* over time.
55-
*
58+
*
5659
* This op expects to receive audio data as an input, stored as floats in the range
5760
* -1 to 1, together with a window width in samples, and a stride specifying how
5861
* far to move the window between slices. From this it generates a three
5962
* dimensional output. The first dimension is for the channels in the input, so a
6063
* stereo audio input would have two here for example. The second dimension is time,
6164
* with successive frequency slices. The third dimension has an amplitude value for
6265
* each frequency during that time slice.
63-
*
66+
*
6467
* This means the layout when converted and saved as an image is rotated 90 degrees
6568
* clockwise from a typical spectrogram. Time is descending down the Y axis, and
6669
* the frequency decreases from left to right.
67-
*
70+
*
6871
* Each value in the result represents the square root of the sum of the real and
6972
* imaginary parts of an FFT on the current window of samples. In this way, the
7073
* lowest dimension represents the power of each frequency in the current window,
7174
* and adjacent windows are concatenated in the next dimension.
72-
*
75+
*
7376
* To get a more intuitive and visual look at what this operation does, you can run
7477
* tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
7578
* resulting spectrogram as a PNG image.
76-
*
79+
*
7780
* @param input Float representation of audio data.
7881
* @param windowSize How wide the input window is in samples. For the highest efficiency
7982
* this should be a power of two, but other values are accepted.
@@ -89,33 +92,33 @@ public class AudioOps(
8992
windowSize: Long,
9093
stride: Long,
9194
magnitudeSquared: Boolean? = null
92-
): AudioSpectrogram = java.audioSpectrogram(
95+
): AudioSpectrogram = java.audioSpectrogram(
9396
input,
9497
windowSize,
9598
stride,
9699
*listOfNotNull(
97-
magnitudeSquared?.let{ org.tensorflow.op.audio.AudioSpectrogram.magnitudeSquared(it) }
100+
magnitudeSquared?.let { org.tensorflow.op.audio.AudioSpectrogram.magnitudeSquared(it) }
98101
).toTypedArray()
99-
)
102+
)
100103

101104
/**
102105
* Decode a 16-bit PCM WAV file to a float tensor.
103-
*
106+
*
104107
* The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
105-
*
108+
*
106109
* When desired_channels is set, if the input contains fewer channels than this
107110
* then the last channel will be duplicated to give the requested number, else if
108111
* the input has more channels than requested then the additional channels will be
109112
* ignored.
110-
*
113+
*
111114
* If desired_samples is set, then the audio will be cropped or padded with zeroes
112115
* to the requested length.
113-
*
116+
*
114117
* The first output contains a Tensor with the content of the audio samples. The
115118
* lowest dimension will be the number of channels, and the second will be the
116119
* number of samples. For example, a ten-sample-long stereo WAV file should give an
117120
* output shape of &#91;10, 2].
118-
*
121+
*
119122
* @param contents The WAV-encoded audio, usually from a file.
120123
* @param options carries optional attributes values
121124
* @return a new instance of DecodeWav
@@ -127,47 +130,47 @@ public class AudioOps(
127130
contents: Operand<TString>,
128131
desiredChannels: Long? = null,
129132
desiredSamples: Long? = null
130-
): DecodeWav = java.decodeWav(
133+
): DecodeWav = java.decodeWav(
131134
contents,
132135
*listOfNotNull(
133-
desiredChannels?.let{ org.tensorflow.op.audio.DecodeWav.desiredChannels(it) },
134-
desiredSamples?.let{ org.tensorflow.op.audio.DecodeWav.desiredSamples(it) }
136+
desiredChannels?.let { org.tensorflow.op.audio.DecodeWav.desiredChannels(it) },
137+
desiredSamples?.let { org.tensorflow.op.audio.DecodeWav.desiredSamples(it) }
135138
).toTypedArray()
136-
)
139+
)
137140

138141
/**
139142
* Encode audio data using the WAV file format.
140-
*
143+
*
141144
* This operation will generate a string suitable to be saved out to create a .wav
142145
* audio file. It will be encoded in the 16-bit PCM format. It takes in float
143146
* values in the range -1.0f to 1.0f, and any outside that value will be clamped to
144147
* that range.
145-
*
148+
*
146149
* `audio` is a 2-D float Tensor of shape `&#91;length, channels]`.
147150
* `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
148-
*
151+
*
149152
* @param audio 2-D with shape `&#91;length, channels]`.
150153
* @param sampleRate Scalar containing the sample frequency.
151154
* @return a new instance of EncodeWav
152155
* @see org.tensorflow.op.AudioOps.encodeWav
153156
*/
154157
public fun encodeWav(audio: Operand<TFloat32>, sampleRate: Operand<TInt32>): EncodeWav =
155-
java.encodeWav(
156-
audio,
157-
sampleRate
158+
java.encodeWav(
159+
audio,
160+
sampleRate
158161
)
159162

160163
/**
161164
* Transforms a spectrogram into a form that's useful for speech recognition.
162-
*
165+
*
163166
* Mel Frequency Cepstral Coefficients are a way of representing audio data that's
164167
* been effective as an input feature for machine learning. They are created by
165168
* taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
166169
* higher frequencies that are less significant to the human ear. They have a long
167170
* history in the speech recognition world, and
168171
* https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
169172
* is a good resource to learn more.
170-
*
173+
*
171174
* @param spectrogram Typically produced by the Spectrogram op, with magnitude_squared
172175
* set to true.
173176
* @param sampleRate How many samples per second the source audio used.
@@ -188,14 +191,14 @@ public class AudioOps(
188191
lowerFrequencyLimit: Float? = null,
189192
filterbankChannelCount: Long? = null,
190193
dctCoefficientCount: Long? = null
191-
): Mfcc = java.mfcc(
194+
): Mfcc = java.mfcc(
192195
spectrogram,
193196
sampleRate,
194197
*listOfNotNull(
195-
upperFrequencyLimit?.let{ org.tensorflow.op.audio.Mfcc.upperFrequencyLimit(it) },
196-
lowerFrequencyLimit?.let{ org.tensorflow.op.audio.Mfcc.lowerFrequencyLimit(it) },
197-
filterbankChannelCount?.let{ org.tensorflow.op.audio.Mfcc.filterbankChannelCount(it) },
198-
dctCoefficientCount?.let{ org.tensorflow.op.audio.Mfcc.dctCoefficientCount(it) }
198+
upperFrequencyLimit?.let { org.tensorflow.op.audio.Mfcc.upperFrequencyLimit(it) },
199+
lowerFrequencyLimit?.let { org.tensorflow.op.audio.Mfcc.lowerFrequencyLimit(it) },
200+
filterbankChannelCount?.let { org.tensorflow.op.audio.Mfcc.filterbankChannelCount(it) },
201+
dctCoefficientCount?.let { org.tensorflow.op.audio.Mfcc.dctCoefficientCount(it) }
199202
).toTypedArray()
200-
)
203+
)
201204
}

0 commit comments

Comments
 (0)