Modulating an oscillator by gain, pitch, and offset

up vote
1
down vote

favorite

I've an Audio Host application that sends block of samples to a DLL I'm building. It sends 44100 samples per seconds (i.e. Sample Rate 44100hz), per block, processing 16 distinct Voices.

It processes an Oscillator, which plays a basic sine wave, modulated at Audio Rate by Gain, Pitch and Offset.

Here's the C++ code I have (I'm on MSVC, Release/x86 Configuration with /02 (/Ot) optimized settings):

#include <iostream>

#include <cstring>

#include <cmath>

#include <chrono>



const int voiceSize = 16;

const int bufferSize = 256;

const double pi = 3.141592653589793238;

const double twopi = 2 * pi;

double sampleRate = 44100.0;

double noteFrequency = 130.81278;

double hostPitch = 1.0;



#define BOUNDED(x,lo,hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))



class Param

{

public:

    int mControlRate = 1;

    double mValue = 0.5;

    double mProcessedValues[voiceSize][bufferSize];

    double *pModValues;



    Param(double min, double max) {

        mMin = min;

        mMax = max;

        mRange = max - min;

    }



    inline double GetMin() { return mMin; }

    inline double GetRange() { return mRange; }

    inline double GetProcessedVoiceValue(int voiceIndex, int sampleIndex) { return mProcessedVoicesValues[voiceIndex][sampleIndex]; }



    inline void AddModulation(int voiceIndex, int blockSize) {

        double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

        double *pProcessedValues = mProcessedValues[voiceIndex];



        int i = 0;

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex += mControlRate, i++) {

            pProcessedValues[i] = BOUNDED(mValue + pModVoiceValues[sampleIndex], 0.0, 1.0);

        }

    }



private:

    double mMin, mMax, mRange;

    double mProcessedVoicesValues[voiceSize][bufferSize];

};

class Oscillator

{

public:

    double mRadiansPerSample = twopi / sampleRate;

    double ln2per12 = std::log(2.0) / 12.0;

    double mPhase[voiceSize];



    Param *pGain, *pOffset, *pPitch;



    Oscillator() { 

        pGain = new Param(0.0, 1.0);

        pOffset = new Param(-900.0, 900.0);

        pPitch = new Param(-48.0, 48.0);



        // reset osc phase (start at 0.0)

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            Reset(voiceIndex);

        }

    }

    ~Oscillator() {

        delete pGain;

        delete pOffset;

        delete pPitch;

    }



    void Reset(int voiceIndex) {

        mPhase[voiceIndex] = 0.0;

    }

    void ProcessVoiceBlock(int voiceIndex, int blockSize, double noteFrequency, double *left, double *right) {

        // local copy

        double phase = mPhase[voiceIndex];

        double offsetMin = pOffset->GetMin();

        double offsetRange = pOffset->GetRange();

        double pitchMin = pPitch->GetMin();

        double pitchRange = pPitch->GetRange();



        // precomputed data

        double bp0 = noteFrequency * hostPitch;



        // process block values

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex++) {

            double value = (sin(phase)) * pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



            *left++ += value;

            *right++ += value;



            // next phase

            phase += BOUNDED(mRadiansPerSample * (bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex, sampleIndex), pitchMin, pitchRange) + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex, sampleIndex), offsetMin, offsetRange)), 0, pi);

            while (phase >= twopi) { phase -= twopi; }

        }



        // revert local copy

        mPhase[voiceIndex] = phase;

    }

    inline double GetOffsetWarped(double normalizedValue, double min, double range) { return min + normalizedValue * range; }

    inline double GetPitchWarped(double normalizedValue, double min, double range) { return exp((min + normalizedValue * range) * ln2per12); }

};



class MyPlugin

{

public:

    double gainModValues[voiceSize][bufferSize];

    double offsetModValues[voiceSize][bufferSize];

    double pitchModValues[voiceSize][bufferSize];

    Oscillator oscillator;



    MyPlugin() {

        // link mod arrays to params

        oscillator.pGain->pModValues = gainModValues[0];

        oscillator.pOffset->pModValues = offsetModValues[0];

        oscillator.pPitch->pModValues = pitchModValues[0];



        // some fancy data for mod

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            for (int sampleIndex = 0; sampleIndex < bufferSize; sampleIndex++) {

                gainModValues[voiceIndex][sampleIndex] = sampleIndex / (double)bufferSize;

            }

        }

    }



    void ProcessDoubleReplace(int blockSize, double *bufferLeft, double *bufferRight) {

        // init buffer

        memset(bufferLeft, 0, blockSize * sizeof(double));

        memset(bufferRight, 0, blockSize * sizeof(double));



        // voices

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            // envelopes - here's where mod values will change, at audio rate



            // add mod to params

            oscillator.pGain->AddModulation(voiceIndex, blockSize);

            oscillator.pOffset->AddModulation(voiceIndex, blockSize);

            oscillator.pPitch->AddModulation(voiceIndex, blockSize);



            // osc buffer

            oscillator.ProcessVoiceBlock(voiceIndex, blockSize, noteFrequency, bufferLeft, bufferRight);

        }

    }

};



int main(int argc, const char *argv) {

    double bufferLeft[bufferSize];

    double bufferRight[bufferSize];



    MyPlugin myPlugin;



    // audio host call

    int numProcessing = 1024 * 50;

    int counterProcessing = 0;

    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

    while (counterProcessing++ < numProcessing) {

        int blockSize = 256;

        myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);



        // do somethings with buffer



    }

    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

}

Considering I'm running 16 Voices contemporaneously, it takes an huge amount of CPU for a simple Gain/Pitch/Offset modulation.

I hope I can do it better. Any tips/suggestions? Vectorizing?

Note: I could use std::clamp and C++17, but it doesn't matter a lot here: nothing really change with that expedient.

edited 2 days ago

asked 2 days ago

markzzz

1285

1

I changed the title so that it describes what the code does per site goals: "State what your code does in your title, not your main concerns about it.". Please check that I haven't misrepresented your code, and correct it if I have.
– Toby Speight
2 days ago

add a comment |

up vote
1
down vote

favorite

I've an Audio Host application that sends block of samples to a DLL I'm building. It sends 44100 samples per seconds (i.e. Sample Rate 44100hz), per block, processing 16 distinct Voices.

It processes an Oscillator, which plays a basic sine wave, modulated at Audio Rate by Gain, Pitch and Offset.

Here's the C++ code I have (I'm on MSVC, Release/x86 Configuration with /02 (/Ot) optimized settings):

#include <iostream>

#include <cstring>

#include <cmath>

#include <chrono>



const int voiceSize = 16;

const int bufferSize = 256;

const double pi = 3.141592653589793238;

const double twopi = 2 * pi;

double sampleRate = 44100.0;

double noteFrequency = 130.81278;

double hostPitch = 1.0;



#define BOUNDED(x,lo,hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))



class Param

{

public:

    int mControlRate = 1;

    double mValue = 0.5;

    double mProcessedValues[voiceSize][bufferSize];

    double *pModValues;



    Param(double min, double max) {

        mMin = min;

        mMax = max;

        mRange = max - min;

    }



    inline double GetMin() { return mMin; }

    inline double GetRange() { return mRange; }

    inline double GetProcessedVoiceValue(int voiceIndex, int sampleIndex) { return mProcessedVoicesValues[voiceIndex][sampleIndex]; }



    inline void AddModulation(int voiceIndex, int blockSize) {

        double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

        double *pProcessedValues = mProcessedValues[voiceIndex];



        int i = 0;

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex += mControlRate, i++) {

            pProcessedValues[i] = BOUNDED(mValue + pModVoiceValues[sampleIndex], 0.0, 1.0);

        }

    }



private:

    double mMin, mMax, mRange;

    double mProcessedVoicesValues[voiceSize][bufferSize];

};

class Oscillator

{

public:

    double mRadiansPerSample = twopi / sampleRate;

    double ln2per12 = std::log(2.0) / 12.0;

    double mPhase[voiceSize];



    Param *pGain, *pOffset, *pPitch;



    Oscillator() { 

        pGain = new Param(0.0, 1.0);

        pOffset = new Param(-900.0, 900.0);

        pPitch = new Param(-48.0, 48.0);



        // reset osc phase (start at 0.0)

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            Reset(voiceIndex);

        }

    }

    ~Oscillator() {

        delete pGain;

        delete pOffset;

        delete pPitch;

    }



    void Reset(int voiceIndex) {

        mPhase[voiceIndex] = 0.0;

    }

    void ProcessVoiceBlock(int voiceIndex, int blockSize, double noteFrequency, double *left, double *right) {

        // local copy

        double phase = mPhase[voiceIndex];

        double offsetMin = pOffset->GetMin();

        double offsetRange = pOffset->GetRange();

        double pitchMin = pPitch->GetMin();

        double pitchRange = pPitch->GetRange();



        // precomputed data

        double bp0 = noteFrequency * hostPitch;



        // process block values

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex++) {

            double value = (sin(phase)) * pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



            *left++ += value;

            *right++ += value;



            // next phase

            phase += BOUNDED(mRadiansPerSample * (bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex, sampleIndex), pitchMin, pitchRange) + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex, sampleIndex), offsetMin, offsetRange)), 0, pi);

            while (phase >= twopi) { phase -= twopi; }

        }



        // revert local copy

        mPhase[voiceIndex] = phase;

    }

    inline double GetOffsetWarped(double normalizedValue, double min, double range) { return min + normalizedValue * range; }

    inline double GetPitchWarped(double normalizedValue, double min, double range) { return exp((min + normalizedValue * range) * ln2per12); }

};



class MyPlugin

{

public:

    double gainModValues[voiceSize][bufferSize];

    double offsetModValues[voiceSize][bufferSize];

    double pitchModValues[voiceSize][bufferSize];

    Oscillator oscillator;



    MyPlugin() {

        // link mod arrays to params

        oscillator.pGain->pModValues = gainModValues[0];

        oscillator.pOffset->pModValues = offsetModValues[0];

        oscillator.pPitch->pModValues = pitchModValues[0];



        // some fancy data for mod

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            for (int sampleIndex = 0; sampleIndex < bufferSize; sampleIndex++) {

                gainModValues[voiceIndex][sampleIndex] = sampleIndex / (double)bufferSize;

            }

        }

    }



    void ProcessDoubleReplace(int blockSize, double *bufferLeft, double *bufferRight) {

        // init buffer

        memset(bufferLeft, 0, blockSize * sizeof(double));

        memset(bufferRight, 0, blockSize * sizeof(double));



        // voices

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            // envelopes - here's where mod values will change, at audio rate



            // add mod to params

            oscillator.pGain->AddModulation(voiceIndex, blockSize);

            oscillator.pOffset->AddModulation(voiceIndex, blockSize);

            oscillator.pPitch->AddModulation(voiceIndex, blockSize);



            // osc buffer

            oscillator.ProcessVoiceBlock(voiceIndex, blockSize, noteFrequency, bufferLeft, bufferRight);

        }

    }

};



int main(int argc, const char *argv) {

    double bufferLeft[bufferSize];

    double bufferRight[bufferSize];



    MyPlugin myPlugin;



    // audio host call

    int numProcessing = 1024 * 50;

    int counterProcessing = 0;

    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

    while (counterProcessing++ < numProcessing) {

        int blockSize = 256;

        myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);



        // do somethings with buffer



    }

    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

}

Considering I'm running 16 Voices contemporaneously, it takes an huge amount of CPU for a simple Gain/Pitch/Offset modulation.

I hope I can do it better. Any tips/suggestions? Vectorizing?

Note: I could use std::clamp and C++17, but it doesn't matter a lot here: nothing really change with that expedient.

edited 2 days ago

asked 2 days ago

markzzz

1285

1

I changed the title so that it describes what the code does per site goals: "State what your code does in your title, not your main concerns about it.". Please check that I haven't misrepresented your code, and correct it if I have.
– Toby Speight
2 days ago

add a comment |

up vote
1
down vote

favorite

I've an Audio Host application that sends block of samples to a DLL I'm building. It sends 44100 samples per seconds (i.e. Sample Rate 44100hz), per block, processing 16 distinct Voices.

It processes an Oscillator, which plays a basic sine wave, modulated at Audio Rate by Gain, Pitch and Offset.

Here's the C++ code I have (I'm on MSVC, Release/x86 Configuration with /02 (/Ot) optimized settings):

#include <iostream>

#include <cstring>

#include <cmath>

#include <chrono>



const int voiceSize = 16;

const int bufferSize = 256;

const double pi = 3.141592653589793238;

const double twopi = 2 * pi;

double sampleRate = 44100.0;

double noteFrequency = 130.81278;

double hostPitch = 1.0;



#define BOUNDED(x,lo,hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))



class Param

{

public:

    int mControlRate = 1;

    double mValue = 0.5;

    double mProcessedValues[voiceSize][bufferSize];

    double *pModValues;



    Param(double min, double max) {

        mMin = min;

        mMax = max;

        mRange = max - min;

    }



    inline double GetMin() { return mMin; }

    inline double GetRange() { return mRange; }

    inline double GetProcessedVoiceValue(int voiceIndex, int sampleIndex) { return mProcessedVoicesValues[voiceIndex][sampleIndex]; }



    inline void AddModulation(int voiceIndex, int blockSize) {

        double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

        double *pProcessedValues = mProcessedValues[voiceIndex];



        int i = 0;

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex += mControlRate, i++) {

            pProcessedValues[i] = BOUNDED(mValue + pModVoiceValues[sampleIndex], 0.0, 1.0);

        }

    }



private:

    double mMin, mMax, mRange;

    double mProcessedVoicesValues[voiceSize][bufferSize];

};

class Oscillator

{

public:

    double mRadiansPerSample = twopi / sampleRate;

    double ln2per12 = std::log(2.0) / 12.0;

    double mPhase[voiceSize];



    Param *pGain, *pOffset, *pPitch;



    Oscillator() { 

        pGain = new Param(0.0, 1.0);

        pOffset = new Param(-900.0, 900.0);

        pPitch = new Param(-48.0, 48.0);



        // reset osc phase (start at 0.0)

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            Reset(voiceIndex);

        }

    }

    ~Oscillator() {

        delete pGain;

        delete pOffset;

        delete pPitch;

    }



    void Reset(int voiceIndex) {

        mPhase[voiceIndex] = 0.0;

    }

    void ProcessVoiceBlock(int voiceIndex, int blockSize, double noteFrequency, double *left, double *right) {

        // local copy

        double phase = mPhase[voiceIndex];

        double offsetMin = pOffset->GetMin();

        double offsetRange = pOffset->GetRange();

        double pitchMin = pPitch->GetMin();

        double pitchRange = pPitch->GetRange();



        // precomputed data

        double bp0 = noteFrequency * hostPitch;



        // process block values

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex++) {

            double value = (sin(phase)) * pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



            *left++ += value;

            *right++ += value;



            // next phase

            phase += BOUNDED(mRadiansPerSample * (bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex, sampleIndex), pitchMin, pitchRange) + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex, sampleIndex), offsetMin, offsetRange)), 0, pi);

            while (phase >= twopi) { phase -= twopi; }

        }



        // revert local copy

        mPhase[voiceIndex] = phase;

    }

    inline double GetOffsetWarped(double normalizedValue, double min, double range) { return min + normalizedValue * range; }

    inline double GetPitchWarped(double normalizedValue, double min, double range) { return exp((min + normalizedValue * range) * ln2per12); }

};



class MyPlugin

{

public:

    double gainModValues[voiceSize][bufferSize];

    double offsetModValues[voiceSize][bufferSize];

    double pitchModValues[voiceSize][bufferSize];

    Oscillator oscillator;



    MyPlugin() {

        // link mod arrays to params

        oscillator.pGain->pModValues = gainModValues[0];

        oscillator.pOffset->pModValues = offsetModValues[0];

        oscillator.pPitch->pModValues = pitchModValues[0];



        // some fancy data for mod

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            for (int sampleIndex = 0; sampleIndex < bufferSize; sampleIndex++) {

                gainModValues[voiceIndex][sampleIndex] = sampleIndex / (double)bufferSize;

            }

        }

    }



    void ProcessDoubleReplace(int blockSize, double *bufferLeft, double *bufferRight) {

        // init buffer

        memset(bufferLeft, 0, blockSize * sizeof(double));

        memset(bufferRight, 0, blockSize * sizeof(double));



        // voices

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            // envelopes - here's where mod values will change, at audio rate



            // add mod to params

            oscillator.pGain->AddModulation(voiceIndex, blockSize);

            oscillator.pOffset->AddModulation(voiceIndex, blockSize);

            oscillator.pPitch->AddModulation(voiceIndex, blockSize);



            // osc buffer

            oscillator.ProcessVoiceBlock(voiceIndex, blockSize, noteFrequency, bufferLeft, bufferRight);

        }

    }

};



int main(int argc, const char *argv) {

    double bufferLeft[bufferSize];

    double bufferRight[bufferSize];



    MyPlugin myPlugin;



    // audio host call

    int numProcessing = 1024 * 50;

    int counterProcessing = 0;

    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

    while (counterProcessing++ < numProcessing) {

        int blockSize = 256;

        myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);



        // do somethings with buffer



    }

    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

}

Considering I'm running 16 Voices contemporaneously, it takes an huge amount of CPU for a simple Gain/Pitch/Offset modulation.

I hope I can do it better. Any tips/suggestions? Vectorizing?

Note: I could use std::clamp and C++17, but it doesn't matter a lot here: nothing really change with that expedient.

edited 2 days ago

asked 2 days ago

markzzz

1285

I've an Audio Host application that sends block of samples to a DLL I'm building. It sends 44100 samples per seconds (i.e. Sample Rate 44100hz), per block, processing 16 distinct Voices.

It processes an Oscillator, which plays a basic sine wave, modulated at Audio Rate by Gain, Pitch and Offset.

Here's the C++ code I have (I'm on MSVC, Release/x86 Configuration with /02 (/Ot) optimized settings):

#include <iostream>

#include <cstring>

#include <cmath>

#include <chrono>



const int voiceSize = 16;

const int bufferSize = 256;

const double pi = 3.141592653589793238;

const double twopi = 2 * pi;

double sampleRate = 44100.0;

double noteFrequency = 130.81278;

double hostPitch = 1.0;



#define BOUNDED(x,lo,hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))



class Param

{

public:

    int mControlRate = 1;

    double mValue = 0.5;

    double mProcessedValues[voiceSize][bufferSize];

    double *pModValues;



    Param(double min, double max) {

        mMin = min;

        mMax = max;

        mRange = max - min;

    }



    inline double GetMin() { return mMin; }

    inline double GetRange() { return mRange; }

    inline double GetProcessedVoiceValue(int voiceIndex, int sampleIndex) { return mProcessedVoicesValues[voiceIndex][sampleIndex]; }



    inline void AddModulation(int voiceIndex, int blockSize) {

        double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

        double *pProcessedValues = mProcessedValues[voiceIndex];



        int i = 0;

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex += mControlRate, i++) {

            pProcessedValues[i] = BOUNDED(mValue + pModVoiceValues[sampleIndex], 0.0, 1.0);

        }

    }



private:

    double mMin, mMax, mRange;

    double mProcessedVoicesValues[voiceSize][bufferSize];

};

class Oscillator

{

public:

    double mRadiansPerSample = twopi / sampleRate;

    double ln2per12 = std::log(2.0) / 12.0;

    double mPhase[voiceSize];



    Param *pGain, *pOffset, *pPitch;



    Oscillator() { 

        pGain = new Param(0.0, 1.0);

        pOffset = new Param(-900.0, 900.0);

        pPitch = new Param(-48.0, 48.0);



        // reset osc phase (start at 0.0)

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            Reset(voiceIndex);

        }

    }

    ~Oscillator() {

        delete pGain;

        delete pOffset;

        delete pPitch;

    }



    void Reset(int voiceIndex) {

        mPhase[voiceIndex] = 0.0;

    }

    void ProcessVoiceBlock(int voiceIndex, int blockSize, double noteFrequency, double *left, double *right) {

        // local copy

        double phase = mPhase[voiceIndex];

        double offsetMin = pOffset->GetMin();

        double offsetRange = pOffset->GetRange();

        double pitchMin = pPitch->GetMin();

        double pitchRange = pPitch->GetRange();



        // precomputed data

        double bp0 = noteFrequency * hostPitch;



        // process block values

        for (int sampleIndex = 0; sampleIndex < blockSize; sampleIndex++) {

            double value = (sin(phase)) * pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



            *left++ += value;

            *right++ += value;



            // next phase

            phase += BOUNDED(mRadiansPerSample * (bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex, sampleIndex), pitchMin, pitchRange) + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex, sampleIndex), offsetMin, offsetRange)), 0, pi);

            while (phase >= twopi) { phase -= twopi; }

        }



        // revert local copy

        mPhase[voiceIndex] = phase;

    }

    inline double GetOffsetWarped(double normalizedValue, double min, double range) { return min + normalizedValue * range; }

    inline double GetPitchWarped(double normalizedValue, double min, double range) { return exp((min + normalizedValue * range) * ln2per12); }

};



class MyPlugin

{

public:

    double gainModValues[voiceSize][bufferSize];

    double offsetModValues[voiceSize][bufferSize];

    double pitchModValues[voiceSize][bufferSize];

    Oscillator oscillator;



    MyPlugin() {

        // link mod arrays to params

        oscillator.pGain->pModValues = gainModValues[0];

        oscillator.pOffset->pModValues = offsetModValues[0];

        oscillator.pPitch->pModValues = pitchModValues[0];



        // some fancy data for mod

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            for (int sampleIndex = 0; sampleIndex < bufferSize; sampleIndex++) {

                gainModValues[voiceIndex][sampleIndex] = sampleIndex / (double)bufferSize;

            }

        }

    }



    void ProcessDoubleReplace(int blockSize, double *bufferLeft, double *bufferRight) {

        // init buffer

        memset(bufferLeft, 0, blockSize * sizeof(double));

        memset(bufferRight, 0, blockSize * sizeof(double));



        // voices

        for (int voiceIndex = 0; voiceIndex < voiceSize; voiceIndex++) {

            // envelopes - here's where mod values will change, at audio rate



            // add mod to params

            oscillator.pGain->AddModulation(voiceIndex, blockSize);

            oscillator.pOffset->AddModulation(voiceIndex, blockSize);

            oscillator.pPitch->AddModulation(voiceIndex, blockSize);



            // osc buffer

            oscillator.ProcessVoiceBlock(voiceIndex, blockSize, noteFrequency, bufferLeft, bufferRight);

        }

    }

};



int main(int argc, const char *argv) {

    double bufferLeft[bufferSize];

    double bufferRight[bufferSize];



    MyPlugin myPlugin;



    // audio host call

    int numProcessing = 1024 * 50;

    int counterProcessing = 0;

    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

    while (counterProcessing++ < numProcessing) {

        int blockSize = 256;

        myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);



        // do somethings with buffer



    }

    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

}

Considering I'm running 16 Voices contemporaneously, it takes an huge amount of CPU for a simple Gain/Pitch/Offset modulation.

I hope I can do it better. Any tips/suggestions? Vectorizing?

Note: I could use std::clamp and C++17, but it doesn't matter a lot here: nothing really change with that expedient.

c++ performance audio signal-processing

edited 2 days ago

asked 2 days ago

markzzz

1285

edited 2 days ago

asked 2 days ago

markzzz

1285

edited 2 days ago

asked 2 days ago

markzzz

1285

asked 2 days ago

markzzz

1285

asked 2 days ago

markzzz

1285

1

I changed the title so that it describes what the code does per site goals: "State what your code does in your title, not your main concerns about it.". Please check that I haven't misrepresented your code, and correct it if I have.
– Toby Speight
2 days ago

add a comment |

1

I changed the title so that it describes what the code does per site goals: "State what your code does in your title, not your main concerns about it.". Please check that I haven't misrepresented your code, and correct it if I have.
– Toby Speight
2 days ago

I changed the title so that it describes what the code does per site goals: "State what your code does in your title, not your main concerns about it.". Please check that I haven't misrepresented your code, and correct it if I have.
– Toby Speight
2 days ago

add a comment |

4 Answers
4

active

oldest

votes

up vote
5
down vote

Don't define pi and twopi yourself. They should be available from math.h as M_PI and M_2_PI if you're in the GNU stack.

Defining functions as inline is more or less useless. The compiler will do this (or not) as it sees fit, and generally knows better than you on when it's beneficial.

GetMin() should be GetMin() const. Same for GetRange, GetProcessedVoiceValue, and anything that doesn't modify a class member.

while (phase >= twopi) { phase -= twopi; } - This should not be a loop. It can be done in O(1), something like phase = fmod(phase, twopi) - but double-check this. For very small values of phase, fmod may actually be slower. You can also try phase -= twopi*int(phase/twopi) which is also O(1).

On this line:

double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

You're dereferencing and then re-referencing a pointer, which you don't need to do. Simply add:

double *pModVoiceValues = pModValues + voiceIndex*bufferSize;

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

M_PI and M_2_PI are not standard C++ identifiers. That said, your reasoning is correct; we can compute suitable values. const auto pi = 4.0 * std::atan(1), for example.
– Toby Speight
2 days ago

@TobySpeight Thanks; I guess those are Gnu-specific.
– Reinderien
2 days ago

@Reinderien; nice suggestions. But unfortunately they don't improve the speed of the program.
– markzzz
2 days ago

@markzzz The fmod will improve program speed, if only imperceptibly.
– Reinderien
2 days ago

curious: fmod slow down the process here :O
– markzzz
2 days ago

|
show 5 more comments

up vote
5
down vote

counting flops

Let's unpack the inner loop, where all the work is being done. I am repeating the whole thing here, wrapping to 80 columns and adding some indentation to make it easier to read.

double value = sin(phase) *

        pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



*left++ += value;

*right++ += value;



// next phase

phase += BOUNDED(

        mRadiansPerSample * (

            bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), pitchMin, pitchRange)

            + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), offsetMin, offsetRange)

        ),

        0, pi

    );



while (phase >= twopi) { phase -= twopi; }



// . . .



inline double GetOffsetWarped(double normalizedValue, double min, double range)

{

    return min + normalizedValue * range;

}

inline double GetPitchWarped(double normalizedValue, double min, double range)

{

    return exp((min + normalizedValue * range) * ln2per12);

}

Let's see if I've got this straight. This is your program, as I understand it. You have 16 independent oscillators, each with their own time-varying frequency and gain. The frequency is a function of a pitch and an offset frequency. For now, I'll define pitch as separate from frequency (omega) in more or less the same manner you have done, switching to python for brevity: omega = omega_C * 2**(pitch/12), where omega_C = TAU*440*2**(-2 + 3/12) is the frequency of the C below middle C. So pitch is in semitones and frequency is in rad/s. In numpy terms, you are doing the following for each oscillator:

# code sample 0



# Rescale user inputs.

pitches = pitch_min + pitches_raw*pitch_range

omega_offsets = omega_offset_min + omega_offsets_raw*omega_offset_range

gains = gain_min + gains_raw*gain_range



omegas = omega_C * 2**(pitches/12) + omega_offsets

values = gains*np.sin(phase0 + np.cumsum(omegas)*dt)

That is basically the entire program. First, the constants.

dt is the time in seconds between samples. dt = 1/sample_rate, sample_rate = 44100. I have written the cumulative sum using this dt to illustrate the direct correspondence to the integral over a time-varying omega that the sum is approximating.

phase0 is the oscillator's phase at the beginning of the block you are currently processing.

gains, pitches, and omega_offsets are three floating point user inputs, gains and pitches possibly being mapped to an after touch MIDI keyboard with a pitch bend and omega_offsets mapped to a sweet MIDI slider, all being sampled at the audio sample rate. They are being represented as numpy vectors, hence the esses. Gains ranges from 0 to 1, pitches ranges from 4 octaves below omega_C to 4 octaves above omega_C, and omega_offsets ranges from -900Hz to 900Hz. (Incidentally, your gain min and max are not being used!) That offset frequency is super weird and kinda cool. That makes no sense from a conventional music theory standpoint. With any offset at all, pitch will no longer match up with octaves. I'd actually like to hear what that sounds like.

You could have made it easier for the reader to figure all that out. If I didn't already have some idea what to look for, I would have been completely lost. You should find a way to get your code to look as much like code sample 0 as possible.

Anyway, how fast should this loop run? How many flops does this loop translate to? Counting the number of operations per sample and using code sample 0 as a guide, I count 5 +s, 7 *s, an exp and a sin. Adding another + for when the 16 voices are added, that's 6 +s. Using this benchmark as a rough guideline, that translates to 6 + 7 + 10 + 15 = 38 flops per sample per voice. Multiplying by 16 voices and 44100 samples per second, that's 27 megaflops. A 1 GHz processor has a thousand cycles to do just 27 of those calculations. You should be seeing almost zero CPU on your toaster. There's something else going on here.

In fact, there's nothing wrong with your code. It was your benchmark. See the epilogue. But I left all these suggestions here anyway if for some reason you want your code to go even faster.

small loops

Your code might be slower than it could be because your inner loop is too big. Loops run fastest when they are as small as possible. This is because the fastest memory your system has—registers—is in extremely limited supply. If your loop is too big, the system runs out of registers and it has to use slower memory to run the loop. You can break up that inner loop into smaller loops—one per line in code sample 0. In theory, that should make your code run faster. But the compiler is actually able to figure out most of that, since everything is contained in a single file. It is able to inline whatever it wants and reorder execution however it wants, because it knows what every function is actually doing. This idea is closely related to vectorisation.

vectorising

You're doing the same operations blockSize times. You should be able to speed that up with simd. The Intel MKL has simd-accelerated vectorised sin(), etc. that you can use. Code sample 0 gives you a basic idea of what the vectorised code is going to look like. The MKL docs don't give an example, so I'll give one now. As a reminder, you really don't have to do this. See the epilogue.

#include <mkl.h>



// determines the size of a static array.

#define SASIZE(sa) (sizeof(sa)/sizeof(*sa))



// . . .



float phases = {1.f, 2.f, 3.f, 4.f, 5.f};

float sin_phases[SASIZE(phases)];

vsSin(SASIZE(phases), phases, sin_phases);

// Now, sin_phases[i] = sin(phases[i]). All of the sin()s were computed

// all at once, about 4 or 8 at a time using simd instructions.

float

This is all audio data, so you don't need doubles. Floats will work just fine, and they'll be way way faster, especially after you get simd working.

integral phase

You can store and manipulate phase using uint32_ts. It is naturally modular with modulus 2**32, so you never need to do any fmods or subtractions, especially repeated subtractions in a loop.

            while (phase >= twopi) { phase -= twopi; }  // idk about this...

That there is some weirdness. The mod 2**32 happens automatically when converting from float, adding, subtracting, whatever, so it should be a lot faster and a lot more accurate. You should only convert back to float phase before a sin operation or something like that. Here's an example. I also included an example of how to do the same modulo operation in all floating point numbers.

#include <cstdio>  // Hey, I like *printf().

#include <cinttypes>



// These are actually templated C++ math functions. The c doesn't

// stand for C, I guess...

#include <cmath>



using namespace std;



#define TAU 6.283185307179586





// fmodf, but always rounding toward negative infinity.

// The output will always be in [0, y). This is not the case

// with fmod(). See man fmod.

float fmodf_rd(float x, float y)

{

    float r = fmod(x, y);

    return r < 0.f? r + y : r;

}

uint32_t integral_phase(float phi)

{

    return phi / (float)TAU * 0x1p32f;

}

float float_phase(uint32_t phi_i)

{

    return (float)phi_i * (float)TAU * 0x1p-32f;

}



int main(int argc, const char *argv)

{

    // Phases, in radians.

    float alpha = TAU/12., beta = atan(2.f);

    uint32_t alpha_i = integral_phase(alpha), beta_i = integral_phase(beta);



    // Phase calculations in radians and floating point:

    float gamma = fmodf_rd(5.f*alpha - 7.f*beta, (float)TAU);

    // The same phase calculation in integral phase. Note there is no

    // mod operation at all.

    uint32_t gamma_i = 5*alpha_i - 7*beta_i;





    printf("difference = %.9gn", (double)(float_phase(gamma_i) - gamma));

    return 0;

}

Epilogue: An M. Night Shyamalan twist.

This has been bothering me enough that I finally ran your code. You were saying you were getting slow results, but your code, though a little funky, did not look particularly slow actually. So I assumed it was something wrong with your benchmark. I nuked the C++ chrono badness and replaced it with dtime(), a wrapper I wrote around clock_gettime(). dtime() returns the current value of CLOCK_MONOTONIC in seconds as a double. I also redid the benchmark math, outputting the performance in terms of %CPU. This is the diff:

--- mod.cpp-    2018-11-20 03:19:11.091296221 -0500

+++ mod.cpp 2018-11-20 03:39:39.529689861 -0500

@@ -1,7 +1,21 @@

-#include <iostream>

+#include <stdio.h>

+#include <time.h>

+#include <assert.h>

+

 #include <cstring>

 #include <cmath>

-#include <chrono>

+

+double timespec_to_d(struct timespec *ts)

+{

+    return (double)ts->tv_sec + 1e-9*(double)ts->tv_nsec;

+}

+double dtime(void)

+{

+    struct timespec ts;

+    int res = clock_gettime(CLOCK_MONOTONIC, &ts);

+    assert(res == 0);

+    return timespec_to_d(&ts);

+}



 const int voiceSize = 16;

 const int bufferSize = 256;

@@ -154,7 +168,7 @@

     // audio host call

     int numProcessing = 1024 * 50;

     int counterProcessing = 0;

-    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

+    double t0 = dtime();

     while (counterProcessing++ < numProcessing) {

         int blockSize = 256;

         myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);

@@ -162,6 +176,8 @@

         // do somethings with buffer



     }

-    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

-    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

+    double dt_busy = (dtime() - t0)/numProcessing/bufferSize,

+           dt_total = 1./sampleRate;

+    printf("CPU = %.6g %%n", dt_busy/dt_total);

+    return 0;

 }

dt_busy is the total amount of time your code takes to process a single sample. dt_total, the time between samples, is the total amount of time your code has to process a single sample. Divide the two and you predict %CPU usage while your DLL is running.

aaaaaaand this is the output:

% g++ -o mod mod.cpp

% ./mod                 

CPU = 0.105791 %

%

When running as a plugin with realtime user input streams, your code will use .1% CPU. It was your benchmark the entire time.

edited yesterday

answered 2 days ago

enigmaticPhysicist

1513

New contributor

Frequency offset, when set to 0, doesn't add offset, so pitch is perfectly tuned in octaves.
– markzzz
yesterday

Can you give to me an example with simd sin and vectorization? I've no idea how to use it.
– markzzz
yesterday

Yeah, when set to zero. For every other value, though, lol
– enigmaticPhysicist
yesterday

for electronic music and "kickdrum synth" is a good tweak :)
– markzzz
yesterday

also: why on code sample 0 do you use "pow" instead of exp? Its way slower...
– markzzz
yesterday

|
show 3 more comments

up vote
4
down vote

General overview

I found this hard to follow. The logic is scattered across several coupled classes without a clear idea of a specific single responsibility for each class.

The class, function and variable names aren't as helpful as they could be - for example, GetProcessedVoiceValue() doesn't indicate what kind of processing was done; ln2per12 only captures how it was computed, rather than what it's for, and MyPlugin looks like a generic temporary name.

Specific issues

BOUNDED() is a dangerous macro - it expands its arguments more than once, meaning that any side-effects can be multiply executed, unlike the equivalent function call. Prefer to use a function - either std::clamp() or your own version of it (if you need to build with old compilers). Such a functions is generally a zero-cost abstraction in C++.

Variable-length arrays are not standard C++. Fix this by making voiceSize and bufferSize compile-time constants, aka constexpr (rather than plain const).

Prefer initialisation to assignment. In Param, for example, we could make mMin, mMax and mRange const if we initialize:

    Param(double min, double max)

        : mMin{min},

          // mMax was never used, so don't store it

          mRange{max - min}

    {

    }



    // make them public, so we don't need `get_min()` etc.

    const double mMin, mRange;

The uninitialised pModValues pointer is especially dangerous here.

I don't understand why we're updating left and right identically. We can get better special locality by writing just one of them, and (if we really need two identical buffers) using std::copy() to duplicate it afterwards.

In Oscillator, there's no need for pGain, pOffset and pPitch to be pointers. Eliminate the memory allocation (and risk of leaks when one of them throws std::bad_alloc) by making them simple member variables - don't write Java code!

Param pGain, pOffset, pPitch;



Oscillator()

    : pGain{0.0, 1.0},

      pOffset{-900.0, 900.0},

      pPitch{-48.0, 48.0}

{ /* ... */ }



// destructor is now defaulted

Some functions from <cmath> wrongly accessed in the global namespace (namely std::sin() and std::exp()); also std::memset() from <cstring>. These should be correctly qualified for portable code.

Prefer std::fill() or std::fill_n() over std::memset() - the former takes care of conversions and constructors where necessary, and is equivalent to the latter in simple cases, so it's a good habit to acquire:

    std::fill_n(bufferLeft, blockSize, 0);

    std::fill_n(bufferRight, blockSize, 0);

Declare main() with no arguments when not intending to process command-line options.

If you can, make sure your compiler targets the particular hardware you'll be using (for me the speed more than doubles when I add -O3 -march=native to my compilation command).

edited 2 days ago

answered 2 days ago

Toby Speight

22.2k536108

BOUNDED: What do you mean "evalutate its arguments more than once"?
– markzzz
2 days ago

BOUNDED expands x three times. That's misleading if x is an expression with side-effects (e.g. *p++) - it's very different from a function call. Similarly for the other arguments that appear multiple times in the expansion. That's why we use all-caps for macro names, but it's better to avoid the problem by using a function instead. There's usually no cost to that in C++.
– Toby Speight
2 days ago

I see. anyway, I've applied most of your suggestions, nothing change :) Compiler targets its the max I can (O2, or Ox in MSVC). Dunno its that!
– markzzz
2 days ago

The logic of the program is: having a DLL (MyPlugin), call the ProcessDoubleReplace from a Host that load that DLL. The plugin need to take each param value and sum modulations values (which change across time). Once processed, I must apply these params (3 in this case: gain, pitch, offset) and produce audio (sine wave in this case). That's all, considering it process per voices :) Note: left and right is because signal is stereo, and in the future I could process them separately).
– markzzz
2 days ago

add a comment |

up vote
1
down vote

I'm filing this as a separate answer because it's on a very different topic from my previous one. You have a constant for note frequency, but (a) it isn't precise and (b) you should compute it rather than writing the constant.

$$12 log_2 frac {130.81278} {55} approx 15$$

$$f = 55 times 2^{15/12} = 110 times 2^{1/4}$$

$$f approx 130.812,782,650,299...$$

In C,

const double noteFrequency = 110*pow(2, 0.25);

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f207967%2fmodulating-an-oscillator-by-gain-pitch-and-offset%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

4 Answers
4

active

oldest

votes

4 Answers
4

active

oldest

votes

up vote
5
down vote

Don't define pi and twopi yourself. They should be available from math.h as M_PI and M_2_PI if you're in the GNU stack.

Defining functions as inline is more or less useless. The compiler will do this (or not) as it sees fit, and generally knows better than you on when it's beneficial.

GetMin() should be GetMin() const. Same for GetRange, GetProcessedVoiceValue, and anything that doesn't modify a class member.

while (phase >= twopi) { phase -= twopi; } - This should not be a loop. It can be done in O(1), something like phase = fmod(phase, twopi) - but double-check this. For very small values of phase, fmod may actually be slower. You can also try phase -= twopi*int(phase/twopi) which is also O(1).

On this line:

double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

You're dereferencing and then re-referencing a pointer, which you don't need to do. Simply add:

double *pModVoiceValues = pModValues + voiceIndex*bufferSize;

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

M_PI and M_2_PI are not standard C++ identifiers. That said, your reasoning is correct; we can compute suitable values. const auto pi = 4.0 * std::atan(1), for example.
– Toby Speight
2 days ago

@TobySpeight Thanks; I guess those are Gnu-specific.
– Reinderien
2 days ago

@Reinderien; nice suggestions. But unfortunately they don't improve the speed of the program.
– markzzz
2 days ago

@markzzz The fmod will improve program speed, if only imperceptibly.
– Reinderien
2 days ago

curious: fmod slow down the process here :O
– markzzz
2 days ago

|
show 5 more comments

up vote
5
down vote

Don't define pi and twopi yourself. They should be available from math.h as M_PI and M_2_PI if you're in the GNU stack.

Defining functions as inline is more or less useless. The compiler will do this (or not) as it sees fit, and generally knows better than you on when it's beneficial.

GetMin() should be GetMin() const. Same for GetRange, GetProcessedVoiceValue, and anything that doesn't modify a class member.

while (phase >= twopi) { phase -= twopi; } - This should not be a loop. It can be done in O(1), something like phase = fmod(phase, twopi) - but double-check this. For very small values of phase, fmod may actually be slower. You can also try phase -= twopi*int(phase/twopi) which is also O(1).

On this line:

double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

You're dereferencing and then re-referencing a pointer, which you don't need to do. Simply add:

double *pModVoiceValues = pModValues + voiceIndex*bufferSize;

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

M_PI and M_2_PI are not standard C++ identifiers. That said, your reasoning is correct; we can compute suitable values. const auto pi = 4.0 * std::atan(1), for example.
– Toby Speight
2 days ago

@TobySpeight Thanks; I guess those are Gnu-specific.
– Reinderien
2 days ago

@Reinderien; nice suggestions. But unfortunately they don't improve the speed of the program.
– markzzz
2 days ago

@markzzz The fmod will improve program speed, if only imperceptibly.
– Reinderien
2 days ago

curious: fmod slow down the process here :O
– markzzz
2 days ago

|
show 5 more comments

up vote
5
down vote

Don't define pi and twopi yourself. They should be available from math.h as M_PI and M_2_PI if you're in the GNU stack.

Defining functions as inline is more or less useless. The compiler will do this (or not) as it sees fit, and generally knows better than you on when it's beneficial.

GetMin() should be GetMin() const. Same for GetRange, GetProcessedVoiceValue, and anything that doesn't modify a class member.

while (phase >= twopi) { phase -= twopi; } - This should not be a loop. It can be done in O(1), something like phase = fmod(phase, twopi) - but double-check this. For very small values of phase, fmod may actually be slower. You can also try phase -= twopi*int(phase/twopi) which is also O(1).

On this line:

double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

You're dereferencing and then re-referencing a pointer, which you don't need to do. Simply add:

double *pModVoiceValues = pModValues + voiceIndex*bufferSize;

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

Don't define pi and twopi yourself. They should be available from math.h as M_PI and M_2_PI if you're in the GNU stack.

Defining functions as inline is more or less useless. The compiler will do this (or not) as it sees fit, and generally knows better than you on when it's beneficial.

GetMin() should be GetMin() const. Same for GetRange, GetProcessedVoiceValue, and anything that doesn't modify a class member.

while (phase >= twopi) { phase -= twopi; } - This should not be a loop. It can be done in O(1), something like phase = fmod(phase, twopi) - but double-check this. For very small values of phase, fmod may actually be slower. You can also try phase -= twopi*int(phase/twopi) which is also O(1).

On this line:

double *pModVoiceValues = &pModValues[voiceIndex * bufferSize];

You're dereferencing and then re-referencing a pointer, which you don't need to do. Simply add:

double *pModVoiceValues = pModValues + voiceIndex*bufferSize;

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

answered 2 days ago

Reinderien

1,272516

answered 2 days ago

Reinderien

1,272516

M_PI and M_2_PI are not standard C++ identifiers. That said, your reasoning is correct; we can compute suitable values. const auto pi = 4.0 * std::atan(1), for example.
– Toby Speight
2 days ago

@TobySpeight Thanks; I guess those are Gnu-specific.
– Reinderien
2 days ago

@Reinderien; nice suggestions. But unfortunately they don't improve the speed of the program.
– markzzz
2 days ago

@markzzz The fmod will improve program speed, if only imperceptibly.
– Reinderien
2 days ago

curious: fmod slow down the process here :O
– markzzz
2 days ago

|
show 5 more comments

M_PI and M_2_PI are not standard C++ identifiers. That said, your reasoning is correct; we can compute suitable values. const auto pi = 4.0 * std::atan(1), for example.
– Toby Speight
2 days ago

@TobySpeight Thanks; I guess those are Gnu-specific.
– Reinderien
2 days ago

@Reinderien; nice suggestions. But unfortunately they don't improve the speed of the program.
– markzzz
2 days ago

@markzzz The fmod will improve program speed, if only imperceptibly.
– Reinderien
2 days ago

curious: fmod slow down the process here :O
– markzzz
2 days ago

M_PI and M_2_PI are not standard C++ identifiers. That said, your reasoning is correct; we can compute suitable values. const auto pi = 4.0 * std::atan(1), for example.
– Toby Speight
2 days ago

@TobySpeight Thanks; I guess those are Gnu-specific.
– Reinderien
2 days ago

@Reinderien; nice suggestions. But unfortunately they don't improve the speed of the program.
– markzzz
2 days ago

@markzzz The fmod will improve program speed, if only imperceptibly.
– Reinderien
2 days ago

curious: fmod slow down the process here :O
– markzzz
2 days ago

|
show 5 more comments

up vote
5
down vote

counting flops

Let's unpack the inner loop, where all the work is being done. I am repeating the whole thing here, wrapping to 80 columns and adding some indentation to make it easier to read.

double value = sin(phase) *

        pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



*left++ += value;

*right++ += value;



// next phase

phase += BOUNDED(

        mRadiansPerSample * (

            bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), pitchMin, pitchRange)

            + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), offsetMin, offsetRange)

        ),

        0, pi

    );



while (phase >= twopi) { phase -= twopi; }



// . . .



inline double GetOffsetWarped(double normalizedValue, double min, double range)

{

    return min + normalizedValue * range;

}

inline double GetPitchWarped(double normalizedValue, double min, double range)

{

    return exp((min + normalizedValue * range) * ln2per12);

}

# code sample 0



# Rescale user inputs.

pitches = pitch_min + pitches_raw*pitch_range

omega_offsets = omega_offset_min + omega_offsets_raw*omega_offset_range

gains = gain_min + gains_raw*gain_range



omegas = omega_C * 2**(pitches/12) + omega_offsets

values = gains*np.sin(phase0 + np.cumsum(omegas)*dt)

That is basically the entire program. First, the constants.

phase0 is the oscillator's phase at the beginning of the block you are currently processing.

In fact, there's nothing wrong with your code. It was your benchmark. See the epilogue. But I left all these suggestions here anyway if for some reason you want your code to go even faster.

small loops

vectorising

#include <mkl.h>



// determines the size of a static array.

#define SASIZE(sa) (sizeof(sa)/sizeof(*sa))



// . . .



float phases = {1.f, 2.f, 3.f, 4.f, 5.f};

float sin_phases[SASIZE(phases)];

vsSin(SASIZE(phases), phases, sin_phases);

// Now, sin_phases[i] = sin(phases[i]). All of the sin()s were computed

// all at once, about 4 or 8 at a time using simd instructions.

float

This is all audio data, so you don't need doubles. Floats will work just fine, and they'll be way way faster, especially after you get simd working.

integral phase

You can store and manipulate phase using uint32_ts. It is naturally modular with modulus 2**32, so you never need to do any fmods or subtractions, especially repeated subtractions in a loop.

            while (phase >= twopi) { phase -= twopi; }  // idk about this...

#include <cstdio>  // Hey, I like *printf().

#include <cinttypes>



// These are actually templated C++ math functions. The c doesn't

// stand for C, I guess...

#include <cmath>



using namespace std;



#define TAU 6.283185307179586





// fmodf, but always rounding toward negative infinity.

// The output will always be in [0, y). This is not the case

// with fmod(). See man fmod.

float fmodf_rd(float x, float y)

{

    float r = fmod(x, y);

    return r < 0.f? r + y : r;

}

uint32_t integral_phase(float phi)

{

    return phi / (float)TAU * 0x1p32f;

}

float float_phase(uint32_t phi_i)

{

    return (float)phi_i * (float)TAU * 0x1p-32f;

}



int main(int argc, const char *argv)

{

    // Phases, in radians.

    float alpha = TAU/12., beta = atan(2.f);

    uint32_t alpha_i = integral_phase(alpha), beta_i = integral_phase(beta);



    // Phase calculations in radians and floating point:

    float gamma = fmodf_rd(5.f*alpha - 7.f*beta, (float)TAU);

    // The same phase calculation in integral phase. Note there is no

    // mod operation at all.

    uint32_t gamma_i = 5*alpha_i - 7*beta_i;





    printf("difference = %.9gn", (double)(float_phase(gamma_i) - gamma));

    return 0;

}

Epilogue: An M. Night Shyamalan twist.

--- mod.cpp-    2018-11-20 03:19:11.091296221 -0500

+++ mod.cpp 2018-11-20 03:39:39.529689861 -0500

@@ -1,7 +1,21 @@

-#include <iostream>

+#include <stdio.h>

+#include <time.h>

+#include <assert.h>

+

 #include <cstring>

 #include <cmath>

-#include <chrono>

+

+double timespec_to_d(struct timespec *ts)

+{

+    return (double)ts->tv_sec + 1e-9*(double)ts->tv_nsec;

+}

+double dtime(void)

+{

+    struct timespec ts;

+    int res = clock_gettime(CLOCK_MONOTONIC, &ts);

+    assert(res == 0);

+    return timespec_to_d(&ts);

+}



 const int voiceSize = 16;

 const int bufferSize = 256;

@@ -154,7 +168,7 @@

     // audio host call

     int numProcessing = 1024 * 50;

     int counterProcessing = 0;

-    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

+    double t0 = dtime();

     while (counterProcessing++ < numProcessing) {

         int blockSize = 256;

         myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);

@@ -162,6 +176,8 @@

         // do somethings with buffer



     }

-    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

-    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

+    double dt_busy = (dtime() - t0)/numProcessing/bufferSize,

+           dt_total = 1./sampleRate;

+    printf("CPU = %.6g %%n", dt_busy/dt_total);

+    return 0;

 }

aaaaaaand this is the output:

% g++ -o mod mod.cpp

% ./mod                 

CPU = 0.105791 %

%

When running as a plugin with realtime user input streams, your code will use .1% CPU. It was your benchmark the entire time.

edited yesterday

answered 2 days ago

enigmaticPhysicist

1513

New contributor

Frequency offset, when set to 0, doesn't add offset, so pitch is perfectly tuned in octaves.
– markzzz
yesterday

Can you give to me an example with simd sin and vectorization? I've no idea how to use it.
– markzzz
yesterday

Yeah, when set to zero. For every other value, though, lol
– enigmaticPhysicist
yesterday

for electronic music and "kickdrum synth" is a good tweak :)
– markzzz
yesterday

also: why on code sample 0 do you use "pow" instead of exp? Its way slower...
– markzzz
yesterday

|
show 3 more comments

up vote
5
down vote

counting flops

Let's unpack the inner loop, where all the work is being done. I am repeating the whole thing here, wrapping to 80 columns and adding some indentation to make it easier to read.

double value = sin(phase) *

        pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



*left++ += value;

*right++ += value;



// next phase

phase += BOUNDED(

        mRadiansPerSample * (

            bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), pitchMin, pitchRange)

            + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), offsetMin, offsetRange)

        ),

        0, pi

    );



while (phase >= twopi) { phase -= twopi; }



// . . .



inline double GetOffsetWarped(double normalizedValue, double min, double range)

{

    return min + normalizedValue * range;

}

inline double GetPitchWarped(double normalizedValue, double min, double range)

{

    return exp((min + normalizedValue * range) * ln2per12);

}

# code sample 0



# Rescale user inputs.

pitches = pitch_min + pitches_raw*pitch_range

omega_offsets = omega_offset_min + omega_offsets_raw*omega_offset_range

gains = gain_min + gains_raw*gain_range



omegas = omega_C * 2**(pitches/12) + omega_offsets

values = gains*np.sin(phase0 + np.cumsum(omegas)*dt)

That is basically the entire program. First, the constants.

phase0 is the oscillator's phase at the beginning of the block you are currently processing.

In fact, there's nothing wrong with your code. It was your benchmark. See the epilogue. But I left all these suggestions here anyway if for some reason you want your code to go even faster.

small loops

vectorising

#include <mkl.h>



// determines the size of a static array.

#define SASIZE(sa) (sizeof(sa)/sizeof(*sa))



// . . .



float phases = {1.f, 2.f, 3.f, 4.f, 5.f};

float sin_phases[SASIZE(phases)];

vsSin(SASIZE(phases), phases, sin_phases);

// Now, sin_phases[i] = sin(phases[i]). All of the sin()s were computed

// all at once, about 4 or 8 at a time using simd instructions.

float

This is all audio data, so you don't need doubles. Floats will work just fine, and they'll be way way faster, especially after you get simd working.

integral phase

You can store and manipulate phase using uint32_ts. It is naturally modular with modulus 2**32, so you never need to do any fmods or subtractions, especially repeated subtractions in a loop.

            while (phase >= twopi) { phase -= twopi; }  // idk about this...

#include <cstdio>  // Hey, I like *printf().

#include <cinttypes>



// These are actually templated C++ math functions. The c doesn't

// stand for C, I guess...

#include <cmath>



using namespace std;



#define TAU 6.283185307179586





// fmodf, but always rounding toward negative infinity.

// The output will always be in [0, y). This is not the case

// with fmod(). See man fmod.

float fmodf_rd(float x, float y)

{

    float r = fmod(x, y);

    return r < 0.f? r + y : r;

}

uint32_t integral_phase(float phi)

{

    return phi / (float)TAU * 0x1p32f;

}

float float_phase(uint32_t phi_i)

{

    return (float)phi_i * (float)TAU * 0x1p-32f;

}



int main(int argc, const char *argv)

{

    // Phases, in radians.

    float alpha = TAU/12., beta = atan(2.f);

    uint32_t alpha_i = integral_phase(alpha), beta_i = integral_phase(beta);



    // Phase calculations in radians and floating point:

    float gamma = fmodf_rd(5.f*alpha - 7.f*beta, (float)TAU);

    // The same phase calculation in integral phase. Note there is no

    // mod operation at all.

    uint32_t gamma_i = 5*alpha_i - 7*beta_i;





    printf("difference = %.9gn", (double)(float_phase(gamma_i) - gamma));

    return 0;

}

Epilogue: An M. Night Shyamalan twist.

--- mod.cpp-    2018-11-20 03:19:11.091296221 -0500

+++ mod.cpp 2018-11-20 03:39:39.529689861 -0500

@@ -1,7 +1,21 @@

-#include <iostream>

+#include <stdio.h>

+#include <time.h>

+#include <assert.h>

+

 #include <cstring>

 #include <cmath>

-#include <chrono>

+

+double timespec_to_d(struct timespec *ts)

+{

+    return (double)ts->tv_sec + 1e-9*(double)ts->tv_nsec;

+}

+double dtime(void)

+{

+    struct timespec ts;

+    int res = clock_gettime(CLOCK_MONOTONIC, &ts);

+    assert(res == 0);

+    return timespec_to_d(&ts);

+}



 const int voiceSize = 16;

 const int bufferSize = 256;

@@ -154,7 +168,7 @@

     // audio host call

     int numProcessing = 1024 * 50;

     int counterProcessing = 0;

-    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

+    double t0 = dtime();

     while (counterProcessing++ < numProcessing) {

         int blockSize = 256;

         myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);

@@ -162,6 +176,8 @@

         // do somethings with buffer



     }

-    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

-    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

+    double dt_busy = (dtime() - t0)/numProcessing/bufferSize,

+           dt_total = 1./sampleRate;

+    printf("CPU = %.6g %%n", dt_busy/dt_total);

+    return 0;

 }

aaaaaaand this is the output:

% g++ -o mod mod.cpp

% ./mod                 

CPU = 0.105791 %

%

When running as a plugin with realtime user input streams, your code will use .1% CPU. It was your benchmark the entire time.

edited yesterday

answered 2 days ago

enigmaticPhysicist

1513

New contributor

Frequency offset, when set to 0, doesn't add offset, so pitch is perfectly tuned in octaves.
– markzzz
yesterday

Can you give to me an example with simd sin and vectorization? I've no idea how to use it.
– markzzz
yesterday

Yeah, when set to zero. For every other value, though, lol
– enigmaticPhysicist
yesterday

for electronic music and "kickdrum synth" is a good tweak :)
– markzzz
yesterday

also: why on code sample 0 do you use "pow" instead of exp? Its way slower...
– markzzz
yesterday

|
show 3 more comments

up vote
5
down vote

counting flops

Let's unpack the inner loop, where all the work is being done. I am repeating the whole thing here, wrapping to 80 columns and adding some indentation to make it easier to read.

double value = sin(phase) *

        pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



*left++ += value;

*right++ += value;



// next phase

phase += BOUNDED(

        mRadiansPerSample * (

            bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), pitchMin, pitchRange)

            + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), offsetMin, offsetRange)

        ),

        0, pi

    );



while (phase >= twopi) { phase -= twopi; }



// . . .



inline double GetOffsetWarped(double normalizedValue, double min, double range)

{

    return min + normalizedValue * range;

}

inline double GetPitchWarped(double normalizedValue, double min, double range)

{

    return exp((min + normalizedValue * range) * ln2per12);

}

# code sample 0



# Rescale user inputs.

pitches = pitch_min + pitches_raw*pitch_range

omega_offsets = omega_offset_min + omega_offsets_raw*omega_offset_range

gains = gain_min + gains_raw*gain_range



omegas = omega_C * 2**(pitches/12) + omega_offsets

values = gains*np.sin(phase0 + np.cumsum(omegas)*dt)

That is basically the entire program. First, the constants.

phase0 is the oscillator's phase at the beginning of the block you are currently processing.

In fact, there's nothing wrong with your code. It was your benchmark. See the epilogue. But I left all these suggestions here anyway if for some reason you want your code to go even faster.

small loops

vectorising

#include <mkl.h>



// determines the size of a static array.

#define SASIZE(sa) (sizeof(sa)/sizeof(*sa))



// . . .



float phases = {1.f, 2.f, 3.f, 4.f, 5.f};

float sin_phases[SASIZE(phases)];

vsSin(SASIZE(phases), phases, sin_phases);

// Now, sin_phases[i] = sin(phases[i]). All of the sin()s were computed

// all at once, about 4 or 8 at a time using simd instructions.

float

This is all audio data, so you don't need doubles. Floats will work just fine, and they'll be way way faster, especially after you get simd working.

integral phase

You can store and manipulate phase using uint32_ts. It is naturally modular with modulus 2**32, so you never need to do any fmods or subtractions, especially repeated subtractions in a loop.

            while (phase >= twopi) { phase -= twopi; }  // idk about this...

#include <cstdio>  // Hey, I like *printf().

#include <cinttypes>



// These are actually templated C++ math functions. The c doesn't

// stand for C, I guess...

#include <cmath>



using namespace std;



#define TAU 6.283185307179586





// fmodf, but always rounding toward negative infinity.

// The output will always be in [0, y). This is not the case

// with fmod(). See man fmod.

float fmodf_rd(float x, float y)

{

    float r = fmod(x, y);

    return r < 0.f? r + y : r;

}

uint32_t integral_phase(float phi)

{

    return phi / (float)TAU * 0x1p32f;

}

float float_phase(uint32_t phi_i)

{

    return (float)phi_i * (float)TAU * 0x1p-32f;

}



int main(int argc, const char *argv)

{

    // Phases, in radians.

    float alpha = TAU/12., beta = atan(2.f);

    uint32_t alpha_i = integral_phase(alpha), beta_i = integral_phase(beta);



    // Phase calculations in radians and floating point:

    float gamma = fmodf_rd(5.f*alpha - 7.f*beta, (float)TAU);

    // The same phase calculation in integral phase. Note there is no

    // mod operation at all.

    uint32_t gamma_i = 5*alpha_i - 7*beta_i;





    printf("difference = %.9gn", (double)(float_phase(gamma_i) - gamma));

    return 0;

}

Epilogue: An M. Night Shyamalan twist.

--- mod.cpp-    2018-11-20 03:19:11.091296221 -0500

+++ mod.cpp 2018-11-20 03:39:39.529689861 -0500

@@ -1,7 +1,21 @@

-#include <iostream>

+#include <stdio.h>

+#include <time.h>

+#include <assert.h>

+

 #include <cstring>

 #include <cmath>

-#include <chrono>

+

+double timespec_to_d(struct timespec *ts)

+{

+    return (double)ts->tv_sec + 1e-9*(double)ts->tv_nsec;

+}

+double dtime(void)

+{

+    struct timespec ts;

+    int res = clock_gettime(CLOCK_MONOTONIC, &ts);

+    assert(res == 0);

+    return timespec_to_d(&ts);

+}



 const int voiceSize = 16;

 const int bufferSize = 256;

@@ -154,7 +168,7 @@

     // audio host call

     int numProcessing = 1024 * 50;

     int counterProcessing = 0;

-    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

+    double t0 = dtime();

     while (counterProcessing++ < numProcessing) {

         int blockSize = 256;

         myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);

@@ -162,6 +176,8 @@

         // do somethings with buffer



     }

-    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

-    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

+    double dt_busy = (dtime() - t0)/numProcessing/bufferSize,

+           dt_total = 1./sampleRate;

+    printf("CPU = %.6g %%n", dt_busy/dt_total);

+    return 0;

 }

aaaaaaand this is the output:

% g++ -o mod mod.cpp

% ./mod                 

CPU = 0.105791 %

%

When running as a plugin with realtime user input streams, your code will use .1% CPU. It was your benchmark the entire time.

edited yesterday

answered 2 days ago

enigmaticPhysicist

1513

New contributor

counting flops

Let's unpack the inner loop, where all the work is being done. I am repeating the whole thing here, wrapping to 80 columns and adding some indentation to make it easier to read.

double value = sin(phase) *

        pGain->GetProcessedVoiceValue(voiceIndex, sampleIndex);



*left++ += value;

*right++ += value;



// next phase

phase += BOUNDED(

        mRadiansPerSample * (

            bp0 * GetPitchWarped(pPitch->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), pitchMin, pitchRange)

            + GetOffsetWarped(pOffset->GetProcessedVoiceValue(voiceIndex,

                    sampleIndex), offsetMin, offsetRange)

        ),

        0, pi

    );



while (phase >= twopi) { phase -= twopi; }



// . . .



inline double GetOffsetWarped(double normalizedValue, double min, double range)

{

    return min + normalizedValue * range;

}

inline double GetPitchWarped(double normalizedValue, double min, double range)

{

    return exp((min + normalizedValue * range) * ln2per12);

}

# code sample 0



# Rescale user inputs.

pitches = pitch_min + pitches_raw*pitch_range

omega_offsets = omega_offset_min + omega_offsets_raw*omega_offset_range

gains = gain_min + gains_raw*gain_range



omegas = omega_C * 2**(pitches/12) + omega_offsets

values = gains*np.sin(phase0 + np.cumsum(omegas)*dt)

That is basically the entire program. First, the constants.

phase0 is the oscillator's phase at the beginning of the block you are currently processing.

In fact, there's nothing wrong with your code. It was your benchmark. See the epilogue. But I left all these suggestions here anyway if for some reason you want your code to go even faster.

small loops

vectorising

#include <mkl.h>



// determines the size of a static array.

#define SASIZE(sa) (sizeof(sa)/sizeof(*sa))



// . . .



float phases = {1.f, 2.f, 3.f, 4.f, 5.f};

float sin_phases[SASIZE(phases)];

vsSin(SASIZE(phases), phases, sin_phases);

// Now, sin_phases[i] = sin(phases[i]). All of the sin()s were computed

// all at once, about 4 or 8 at a time using simd instructions.

float

This is all audio data, so you don't need doubles. Floats will work just fine, and they'll be way way faster, especially after you get simd working.

integral phase

You can store and manipulate phase using uint32_ts. It is naturally modular with modulus 2**32, so you never need to do any fmods or subtractions, especially repeated subtractions in a loop.

            while (phase >= twopi) { phase -= twopi; }  // idk about this...

#include <cstdio>  // Hey, I like *printf().

#include <cinttypes>



// These are actually templated C++ math functions. The c doesn't

// stand for C, I guess...

#include <cmath>



using namespace std;



#define TAU 6.283185307179586





// fmodf, but always rounding toward negative infinity.

// The output will always be in [0, y). This is not the case

// with fmod(). See man fmod.

float fmodf_rd(float x, float y)

{

    float r = fmod(x, y);

    return r < 0.f? r + y : r;

}

uint32_t integral_phase(float phi)

{

    return phi / (float)TAU * 0x1p32f;

}

float float_phase(uint32_t phi_i)

{

    return (float)phi_i * (float)TAU * 0x1p-32f;

}



int main(int argc, const char *argv)

{

    // Phases, in radians.

    float alpha = TAU/12., beta = atan(2.f);

    uint32_t alpha_i = integral_phase(alpha), beta_i = integral_phase(beta);



    // Phase calculations in radians and floating point:

    float gamma = fmodf_rd(5.f*alpha - 7.f*beta, (float)TAU);

    // The same phase calculation in integral phase. Note there is no

    // mod operation at all.

    uint32_t gamma_i = 5*alpha_i - 7*beta_i;





    printf("difference = %.9gn", (double)(float_phase(gamma_i) - gamma));

    return 0;

}

Epilogue: An M. Night Shyamalan twist.

--- mod.cpp-    2018-11-20 03:19:11.091296221 -0500

+++ mod.cpp 2018-11-20 03:39:39.529689861 -0500

@@ -1,7 +1,21 @@

-#include <iostream>

+#include <stdio.h>

+#include <time.h>

+#include <assert.h>

+

 #include <cstring>

 #include <cmath>

-#include <chrono>

+

+double timespec_to_d(struct timespec *ts)

+{

+    return (double)ts->tv_sec + 1e-9*(double)ts->tv_nsec;

+}

+double dtime(void)

+{

+    struct timespec ts;

+    int res = clock_gettime(CLOCK_MONOTONIC, &ts);

+    assert(res == 0);

+    return timespec_to_d(&ts);

+}



 const int voiceSize = 16;

 const int bufferSize = 256;

@@ -154,7 +168,7 @@

     // audio host call

     int numProcessing = 1024 * 50;

     int counterProcessing = 0;

-    std::chrono::high_resolution_clock::time_point pStart = std::chrono::high_resolution_clock::now();

+    double t0 = dtime();

     while (counterProcessing++ < numProcessing) {

         int blockSize = 256;

         myPlugin.ProcessDoubleReplace(blockSize, bufferLeft, bufferRight);

@@ -162,6 +176,8 @@

         // do somethings with buffer



     }

-    std::chrono::high_resolution_clock::time_point pEnd = std::chrono::high_resolution_clock::now();

-    std::cout << "execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(pEnd - pStart).count() << " ms" << std::endl;

+    double dt_busy = (dtime() - t0)/numProcessing/bufferSize,

+           dt_total = 1./sampleRate;

+    printf("CPU = %.6g %%n", dt_busy/dt_total);

+    return 0;

 }

aaaaaaand this is the output:

% g++ -o mod mod.cpp

% ./mod                 

CPU = 0.105791 %

%

When running as a plugin with realtime user input streams, your code will use .1% CPU. It was your benchmark the entire time.

edited yesterday

answered 2 days ago

enigmaticPhysicist

1513

New contributor

edited yesterday

answered 2 days ago

enigmaticPhysicist

1513

New contributor

answered 2 days ago

enigmaticPhysicist

1513

answered 2 days ago

enigmaticPhysicist

1513

New contributor

enigmaticPhysicist is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.

Frequency offset, when set to 0, doesn't add offset, so pitch is perfectly tuned in octaves.
– markzzz
yesterday

Can you give to me an example with simd sin and vectorization? I've no idea how to use it.
– markzzz
yesterday

Yeah, when set to zero. For every other value, though, lol
– enigmaticPhysicist
yesterday

for electronic music and "kickdrum synth" is a good tweak :)
– markzzz
yesterday

also: why on code sample 0 do you use "pow" instead of exp? Its way slower...
– markzzz
yesterday

|
show 3 more comments

Frequency offset, when set to 0, doesn't add offset, so pitch is perfectly tuned in octaves.
– markzzz
yesterday

Can you give to me an example with simd sin and vectorization? I've no idea how to use it.
– markzzz
yesterday

Yeah, when set to zero. For every other value, though, lol
– enigmaticPhysicist
yesterday

for electronic music and "kickdrum synth" is a good tweak :)
– markzzz
yesterday

also: why on code sample 0 do you use "pow" instead of exp? Its way slower...
– markzzz
yesterday

Frequency offset, when set to 0, doesn't add offset, so pitch is perfectly tuned in octaves.
– markzzz
yesterday

Can you give to me an example with simd sin and vectorization? I've no idea how to use it.
– markzzz
yesterday

Yeah, when set to zero. For every other value, though, lol
– enigmaticPhysicist
yesterday

for electronic music and "kickdrum synth" is a good tweak :)
– markzzz
yesterday

also: why on code sample 0 do you use "pow" instead of exp? Its way slower...
– markzzz
yesterday

|
show 3 more comments

up vote
4
down vote

General overview

I found this hard to follow. The logic is scattered across several coupled classes without a clear idea of a specific single responsibility for each class.

Specific issues

Variable-length arrays are not standard C++. Fix this by making voiceSize and bufferSize compile-time constants, aka constexpr (rather than plain const).

Prefer initialisation to assignment. In Param, for example, we could make mMin, mMax and mRange const if we initialize:

    Param(double min, double max)

        : mMin{min},

          // mMax was never used, so don't store it

          mRange{max - min}

    {

    }



    // make them public, so we don't need `get_min()` etc.

    const double mMin, mRange;

The uninitialised pModValues pointer is especially dangerous here.

Param pGain, pOffset, pPitch;



Oscillator()

    : pGain{0.0, 1.0},

      pOffset{-900.0, 900.0},

      pPitch{-48.0, 48.0}

{ /* ... */ }



// destructor is now defaulted

    std::fill_n(bufferLeft, blockSize, 0);

    std::fill_n(bufferRight, blockSize, 0);

Declare main() with no arguments when not intending to process command-line options.

If you can, make sure your compiler targets the particular hardware you'll be using (for me the speed more than doubles when I add -O3 -march=native to my compilation command).

edited 2 days ago

answered 2 days ago

Toby Speight

22.2k536108

BOUNDED: What do you mean "evalutate its arguments more than once"?
– markzzz
2 days ago

BOUNDED expands x three times. That's misleading if x is an expression with side-effects (e.g. *p++) - it's very different from a function call. Similarly for the other arguments that appear multiple times in the expansion. That's why we use all-caps for macro names, but it's better to avoid the problem by using a function instead. There's usually no cost to that in C++.
– Toby Speight
2 days ago

I see. anyway, I've applied most of your suggestions, nothing change :) Compiler targets its the max I can (O2, or Ox in MSVC). Dunno its that!
– markzzz
2 days ago

The logic of the program is: having a DLL (MyPlugin), call the ProcessDoubleReplace from a Host that load that DLL. The plugin need to take each param value and sum modulations values (which change across time). Once processed, I must apply these params (3 in this case: gain, pitch, offset) and produce audio (sine wave in this case). That's all, considering it process per voices :) Note: left and right is because signal is stereo, and in the future I could process them separately).
– markzzz
2 days ago

add a comment |

up vote
4
down vote

General overview

I found this hard to follow. The logic is scattered across several coupled classes without a clear idea of a specific single responsibility for each class.

Specific issues

Variable-length arrays are not standard C++. Fix this by making voiceSize and bufferSize compile-time constants, aka constexpr (rather than plain const).

Prefer initialisation to assignment. In Param, for example, we could make mMin, mMax and mRange const if we initialize:

    Param(double min, double max)

        : mMin{min},

          // mMax was never used, so don't store it

          mRange{max - min}

    {

    }



    // make them public, so we don't need `get_min()` etc.

    const double mMin, mRange;

The uninitialised pModValues pointer is especially dangerous here.

Param pGain, pOffset, pPitch;



Oscillator()

    : pGain{0.0, 1.0},

      pOffset{-900.0, 900.0},

      pPitch{-48.0, 48.0}

{ /* ... */ }



// destructor is now defaulted

    std::fill_n(bufferLeft, blockSize, 0);

    std::fill_n(bufferRight, blockSize, 0);

Declare main() with no arguments when not intending to process command-line options.

If you can, make sure your compiler targets the particular hardware you'll be using (for me the speed more than doubles when I add -O3 -march=native to my compilation command).

edited 2 days ago

answered 2 days ago

Toby Speight

22.2k536108

BOUNDED: What do you mean "evalutate its arguments more than once"?
– markzzz
2 days ago

BOUNDED expands x three times. That's misleading if x is an expression with side-effects (e.g. *p++) - it's very different from a function call. Similarly for the other arguments that appear multiple times in the expansion. That's why we use all-caps for macro names, but it's better to avoid the problem by using a function instead. There's usually no cost to that in C++.
– Toby Speight
2 days ago

I see. anyway, I've applied most of your suggestions, nothing change :) Compiler targets its the max I can (O2, or Ox in MSVC). Dunno its that!
– markzzz
2 days ago

The logic of the program is: having a DLL (MyPlugin), call the ProcessDoubleReplace from a Host that load that DLL. The plugin need to take each param value and sum modulations values (which change across time). Once processed, I must apply these params (3 in this case: gain, pitch, offset) and produce audio (sine wave in this case). That's all, considering it process per voices :) Note: left and right is because signal is stereo, and in the future I could process them separately).
– markzzz
2 days ago

add a comment |

up vote
4
down vote

General overview

I found this hard to follow. The logic is scattered across several coupled classes without a clear idea of a specific single responsibility for each class.

Specific issues

Variable-length arrays are not standard C++. Fix this by making voiceSize and bufferSize compile-time constants, aka constexpr (rather than plain const).

Prefer initialisation to assignment. In Param, for example, we could make mMin, mMax and mRange const if we initialize:

    Param(double min, double max)

        : mMin{min},

          // mMax was never used, so don't store it

          mRange{max - min}

    {

    }



    // make them public, so we don't need `get_min()` etc.

    const double mMin, mRange;

The uninitialised pModValues pointer is especially dangerous here.

Param pGain, pOffset, pPitch;



Oscillator()

    : pGain{0.0, 1.0},

      pOffset{-900.0, 900.0},

      pPitch{-48.0, 48.0}

{ /* ... */ }



// destructor is now defaulted

    std::fill_n(bufferLeft, blockSize, 0);

    std::fill_n(bufferRight, blockSize, 0);

Declare main() with no arguments when not intending to process command-line options.

If you can, make sure your compiler targets the particular hardware you'll be using (for me the speed more than doubles when I add -O3 -march=native to my compilation command).

edited 2 days ago

answered 2 days ago

Toby Speight

22.2k536108

General overview

I found this hard to follow. The logic is scattered across several coupled classes without a clear idea of a specific single responsibility for each class.

Specific issues

Variable-length arrays are not standard C++. Fix this by making voiceSize and bufferSize compile-time constants, aka constexpr (rather than plain const).

Prefer initialisation to assignment. In Param, for example, we could make mMin, mMax and mRange const if we initialize:

    Param(double min, double max)

        : mMin{min},

          // mMax was never used, so don't store it

          mRange{max - min}

    {

    }



    // make them public, so we don't need `get_min()` etc.

    const double mMin, mRange;

The uninitialised pModValues pointer is especially dangerous here.

Param pGain, pOffset, pPitch;



Oscillator()

    : pGain{0.0, 1.0},

      pOffset{-900.0, 900.0},

      pPitch{-48.0, 48.0}

{ /* ... */ }



// destructor is now defaulted

    std::fill_n(bufferLeft, blockSize, 0);

    std::fill_n(bufferRight, blockSize, 0);

Declare main() with no arguments when not intending to process command-line options.

If you can, make sure your compiler targets the particular hardware you'll be using (for me the speed more than doubles when I add -O3 -march=native to my compilation command).

edited 2 days ago

answered 2 days ago

Toby Speight

22.2k536108

edited 2 days ago

answered 2 days ago

Toby Speight

22.2k536108

answered 2 days ago

Toby Speight

22.2k536108

answered 2 days ago

Toby Speight

22.2k536108

BOUNDED: What do you mean "evalutate its arguments more than once"?
– markzzz
2 days ago

BOUNDED expands x three times. That's misleading if x is an expression with side-effects (e.g. *p++) - it's very different from a function call. Similarly for the other arguments that appear multiple times in the expansion. That's why we use all-caps for macro names, but it's better to avoid the problem by using a function instead. There's usually no cost to that in C++.
– Toby Speight
2 days ago

I see. anyway, I've applied most of your suggestions, nothing change :) Compiler targets its the max I can (O2, or Ox in MSVC). Dunno its that!
– markzzz
2 days ago

The logic of the program is: having a DLL (MyPlugin), call the ProcessDoubleReplace from a Host that load that DLL. The plugin need to take each param value and sum modulations values (which change across time). Once processed, I must apply these params (3 in this case: gain, pitch, offset) and produce audio (sine wave in this case). That's all, considering it process per voices :) Note: left and right is because signal is stereo, and in the future I could process them separately).
– markzzz
2 days ago

add a comment |

BOUNDED: What do you mean "evalutate its arguments more than once"?
– markzzz
2 days ago

BOUNDED expands x three times. That's misleading if x is an expression with side-effects (e.g. *p++) - it's very different from a function call. Similarly for the other arguments that appear multiple times in the expansion. That's why we use all-caps for macro names, but it's better to avoid the problem by using a function instead. There's usually no cost to that in C++.
– Toby Speight
2 days ago

I see. anyway, I've applied most of your suggestions, nothing change :) Compiler targets its the max I can (O2, or Ox in MSVC). Dunno its that!
– markzzz
2 days ago

The logic of the program is: having a DLL (MyPlugin), call the ProcessDoubleReplace from a Host that load that DLL. The plugin need to take each param value and sum modulations values (which change across time). Once processed, I must apply these params (3 in this case: gain, pitch, offset) and produce audio (sine wave in this case). That's all, considering it process per voices :) Note: left and right is because signal is stereo, and in the future I could process them separately).
– markzzz
2 days ago

BOUNDED: What do you mean "evalutate its arguments more than once"?
– markzzz
2 days ago

BOUNDED expands x three times. That's misleading if x is an expression with side-effects (e.g. *p++) - it's very different from a function call. Similarly for the other arguments that appear multiple times in the expansion. That's why we use all-caps for macro names, but it's better to avoid the problem by using a function instead. There's usually no cost to that in C++.
– Toby Speight
2 days ago

I see. anyway, I've applied most of your suggestions, nothing change :) Compiler targets its the max I can (O2, or Ox in MSVC). Dunno its that!
– markzzz
2 days ago

The logic of the program is: having a DLL (MyPlugin), call the ProcessDoubleReplace from a Host that load that DLL. The plugin need to take each param value and sum modulations values (which change across time). Once processed, I must apply these params (3 in this case: gain, pitch, offset) and produce audio (sine wave in this case). That's all, considering it process per voices :) Note: left and right is because signal is stereo, and in the future I could process them separately).
– markzzz
2 days ago

add a comment |

up vote
1
down vote

$$12 log_2 frac {130.81278} {55} approx 15$$

$$f = 55 times 2^{15/12} = 110 times 2^{1/4}$$

$$f approx 130.812,782,650,299...$$

In C,

const double noteFrequency = 110*pow(2, 0.25);

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

add a comment |

up vote
1
down vote

$$12 log_2 frac {130.81278} {55} approx 15$$

$$f = 55 times 2^{15/12} = 110 times 2^{1/4}$$

$$f approx 130.812,782,650,299...$$

In C,

const double noteFrequency = 110*pow(2, 0.25);

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

add a comment |

up vote
1
down vote

$$12 log_2 frac {130.81278} {55} approx 15$$

$$f = 55 times 2^{15/12} = 110 times 2^{1/4}$$

$$f approx 130.812,782,650,299...$$

In C,

const double noteFrequency = 110*pow(2, 0.25);

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

$$12 log_2 frac {130.81278} {55} approx 15$$

$$f = 55 times 2^{15/12} = 110 times 2^{1/4}$$

$$f approx 130.812,782,650,299...$$

In C,

const double noteFrequency = 110*pow(2, 0.25);

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

edited 2 days ago

answered 2 days ago

Reinderien

1,272516

answered 2 days ago

Reinderien

1,272516

answered 2 days ago

Reinderien

1,272516

add a comment |

draft saved

draft discarded

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

eYS jBXZA3FPAJD3kPo4PYkm7h 7PVvkxP5,NnY3 r4Ci,ZDXoOxMpoVpUIJCS75LeyXodobnZzZZTGuyY

Modulating an oscillator by gain, pitch, and offset

4 Answers 4

counting flops

small loops

vectorising

float

integral phase

Epilogue: An M. Night Shyamalan twist.

General overview

Specific issues

Your Answer

Sign up or log in

Post as a guest

Post as a guest

4 Answers 4

4 Answers 4

counting flops

small loops

vectorising

float

integral phase

Epilogue: An M. Night Shyamalan twist.

counting flops

small loops

vectorising

float

integral phase

Epilogue: An M. Night Shyamalan twist.

counting flops

small loops

vectorising

float

integral phase

Epilogue: An M. Night Shyamalan twist.

counting flops

small loops

vectorising

float

integral phase

Epilogue: An M. Night Shyamalan twist.

General overview

Specific issues

General overview

Specific issues

General overview

Specific issues

General overview

Specific issues

Sign up or log in

Post as a guest

Post as a guest

Sign up or log in

Post as a guest

Sign up or log in

Post as a guest

Sign up or log in

Post as a guest

Popular posts from this blog

Orthoptera

Ellipse (mathématiques)

Quarter-circle Tiles

4 Answers
4

4 Answers
4

4 Answers
4