Skip to content

Commit

Permalink
Fix(Spectrogram): alignment between ticks and spectrogram (#4011)
Browse files Browse the repository at this point in the history
  • Loading branch information
lucastheis authored Jan 26, 2025
1 parent ef522ee commit bb62616
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 35 deletions.
31 changes: 31 additions & 0 deletions cypress/e2e/spectrogram.cy.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const id = '#waveform'
const scales = ['linear', 'mel', 'log', 'bark', 'erb']

xdescribe('WaveSurfer Spectrogram plugin tests', () => {
it('should render a spectrogram', () => {
Expand Down Expand Up @@ -81,4 +82,34 @@ xdescribe('WaveSurfer Spectrogram plugin tests', () => {
})
})
})

scales.forEach(scale => {
it(`should display correct frequency labels with 1kHz tone (${scale})`, () => {
cy.visit('cypress/e2e/index.html')
cy.window().then((win) => {
return new Promise((resolve) => {
win.wavesurfer = win.WaveSurfer.create({
container: id,
height: 200,
url: '../../examples/audio/1khz.mp3',
plugins: [
win.Spectrogram.create({
height: 200,
labels: true,
scale: scale,
frequencyMin: 0,
frequencyMax: 4000,
splitChannels: false
}),
],
})

win.wavesurfer.once('ready', () => {
cy.get(id).matchImageSnapshot(`spectrogram-1khz-${scale}`)
resolve()
})
})
})
})
})
})
Binary file added examples/audio/1khz.mp3
Binary file not shown.
104 changes: 69 additions & 35 deletions src/plugins/spectrogram.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ export type SpectrogramPluginOptions = {
alpha?: number
/** Min frequency to scale spectrogram. */
frequencyMin?: number
/** Max frequency to scale spectrogram. Set this to samplerate/4 to draw whole range of spectrogram. */
/** Max frequency to scale spectrogram. Set this to samplerate/2 to draw whole range of spectrogram. */
frequencyMax?: number
/**
* Based on: https://manual.audacityteam.org/man/spectrogram_settings.html
Expand Down Expand Up @@ -384,10 +384,12 @@ class SpectrogramPlugin extends BasePlugin<SpectrogramPluginEvents, SpectrogramP
this.gainDB = options.gainDB ?? 20
this.rangeDB = options.rangeDB ?? 80
this.scale = options.scale || 'mel'
this.numMelFilters = this.fftSamples / 4
this.numLogFilters = this.fftSamples / 8
this.numBarkFilters = this.fftSamples / 8
this.numErbFilters = this.fftSamples / 8

// Other values will currently cause a misalignment between labels and the spectrogram
this.numMelFilters = this.fftSamples / 2
this.numLogFilters = this.fftSamples / 2
this.numBarkFilters = this.fftSamples / 2
this.numErbFilters = this.fftSamples / 2

this.createWrapper()
this.createCanvas()
Expand Down Expand Up @@ -504,14 +506,25 @@ class SpectrogramPlugin extends BasePlugin<SpectrogramPluginEvents, SpectrogramP
const spectrCc = this.spectrCc
const height = this.height
const width = this.getWidth()

// Maximum frequency represented in `frequenciesData`
const freqFrom = this.buffer.sampleRate / 2

// Minimum and maximum frequency we want to draw
const freqMin = this.frequencyMin
const freqMax = this.frequencyMax

if (!spectrCc) {
return
}

if (freqMax > freqFrom) {
// Draw background since spectrogram will not fill the entire canvas
const bgColor = this.colorMap[this.colorMap.length - 1]
spectrCc.fillStyle = `rgba(${bgColor[0]}, ${bgColor[1]}, ${bgColor[2]}, ${bgColor[3]})`
spectrCc.fillRect(0, 0, width, height * frequenciesData.length)
}

for (let c = 0; c < frequenciesData.length; c++) {
// for each channel
const pixels = this.resample(frequenciesData[c])
Expand All @@ -521,23 +534,36 @@ class SpectrogramPlugin extends BasePlugin<SpectrogramPluginEvents, SpectrogramP
for (let i = 0; i < pixels.length; i++) {
for (let j = 0; j < pixels[i].length; j++) {
const colorMap = this.colorMap[pixels[i][j]]
const redIndex = ((bitmapHeight / 2 - j) * width + i) * 4
const redIndex = ((bitmapHeight - j - 1) * width + i) * 4
imageData.data[redIndex] = colorMap[0] * 255
imageData.data[redIndex + 1] = colorMap[1] * 255
imageData.data[redIndex + 2] = colorMap[2] * 255
imageData.data[redIndex + 3] = colorMap[3] * 255
}
}

// scale and stack spectrograms
// The relative positions of `freqMin` and `freqMax` in `imageData`
const rMin = this.hzToScale(freqMin) / this.hzToScale(freqFrom)
const rMax = this.hzToScale(freqMax) / this.hzToScale(freqFrom)

// Only relevant if `freqMax > freqFrom`
const rMax1 = Math.min(1, rMax)

// Crop, scale and stack spectrograms
createImageBitmap(
imageData,
0,
Math.round(bitmapHeight - bitmapHeight * (freqMax / freqFrom)) / 2,
Math.round(bitmapHeight * (1 - rMax1)),
width,
Math.round(bitmapHeight * ((freqMax - freqMin) / freqFrom)),
Math.round(bitmapHeight * (rMax1 - rMin)),
).then((bitmap) => {
spectrCc.drawImage(bitmap, 0, height * c, width, height * 2)
spectrCc.drawImage(
bitmap,
0,
height * (c + 1 - rMax1 / rMax),
width,
height * rMax1 / rMax,
)
})
}

Expand Down Expand Up @@ -637,6 +663,34 @@ class SpectrogramPlugin extends BasePlugin<SpectrogramPluginEvents, SpectrogramP
return this.createFilterBank(numErbFilters, sampleRate, this.hzToErb, this.erbToHz)
}

private hzToScale(hz: number) {
switch (this.scale) {
case 'mel':
return this.hzToMel(hz)
case 'logarithmic':
return this.hzToLog(hz)
case 'bark':
return this.hzToBark(hz)
case 'erb':
return this.hzToErb(hz)
}
return hz
}

private scaleToHz(scale: number) {
switch (this.scale) {
case 'mel':
return this.melToHz(scale)
case 'logarithmic':
return this.logToHz(scale)
case 'bark':
return this.barkToHz(scale)
case 'erb':
return this.erbToHz(scale)
}
return scale
}

private applyFilterBank(fftPoints: Float32Array, filterBank: number[][]): Float32Array {
const numFilters = filterBank.length
const logSpectrum = Float32Array.from({ length: numFilters }, () => 0)
Expand All @@ -656,7 +710,7 @@ class SpectrogramPlugin extends BasePlugin<SpectrogramPluginEvents, SpectrogramP
const fftSamples = this.fftSamples
const channels = this.options.splitChannels ?? this.wavesurfer?.options.splitChannels ? buffer.numberOfChannels : 1

this.frequencyMax = this.frequencyMax || buffer.sampleRate / 4
this.frequencyMax = this.frequencyMax || buffer.sampleRate / 2

if (!buffer) return

Expand Down Expand Up @@ -738,12 +792,10 @@ class SpectrogramPlugin extends BasePlugin<SpectrogramPluginEvents, SpectrogramP
private getLabelFrequency(
index: number,
labelIndex: number,
hzToScale: (hz: number) => number,
scaleToHz: (scale: number) => number,
) {
const scaleMin = hzToScale(0)
const scaleMax = hzToScale(this.frequencyMax)
return scaleToHz(scaleMin + (index / labelIndex) * (scaleMax - scaleMin))
const scaleMin = this.hzToScale(this.frequencyMin)
const scaleMax = this.hzToScale(this.frequencyMax)
return this.scaleToHz(scaleMin + (index / labelIndex) * (scaleMax - scaleMin))
}

private loadLabels(
Expand Down Expand Up @@ -796,25 +848,7 @@ class SpectrogramPlugin extends BasePlugin<SpectrogramPluginEvents, SpectrogramP
ctx.textAlign = textAlign
ctx.textBaseline = 'middle'

let freq
switch (this.scale) {
case 'mel':
freq = this.getLabelFrequency(i, labelIndex, this.hzToMel, this.melToHz)
break
case 'logarithmic':
freq = this.getLabelFrequency(i, labelIndex, this.hzToLog, this.logToHz)
break
case 'bark':
freq = this.getLabelFrequency(i, labelIndex, this.hzToBark, this.barkToHz)
break
case 'erb':
freq = this.getLabelFrequency(i, labelIndex, this.hzToErb, this.erbToHz)
break
default:
freq = freqStart + step * i
break
}

const freq = this.getLabelFrequency(i, labelIndex)
const label = this.freqType(freq)
const units = this.unitType(freq)
const x = 16
Expand Down

0 comments on commit bb62616

Please sign in to comment.