Skip to content

Commit

Permalink
added GetFrameSkip() / SetFrameSkip() to actionNet
Browse files Browse the repository at this point in the history
  • Loading branch information
dusty-nv committed Mar 7, 2023
1 parent aa268d6 commit 7deda0a
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 56 deletions.
52 changes: 39 additions & 13 deletions c/actionNet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,19 @@
// constructor
actionNet::actionNet() : tensorNet()
{
mNumClasses = 0;
mNumFrames = 0;
mThreshold = 0.01f;
mNumClasses = 0;
mNumFrames = 0;
mSkipFrames = 1;
mFramesSkipped = 10000; // so the very first frame always gets processed

mInputBuffers[0] = NULL;
mInputBuffers[1] = NULL;

mCurrentInputBuffer = 0;
mCurrentFrameIndex = 0;
mLastClassification = 0;
mLastConfidence = 0.0f;
}


Expand Down Expand Up @@ -118,6 +123,10 @@ actionNet* actionNet::Create( const commandLine& cmdLine )
if( cmdLine.GetFlag("profile") )
net->EnableLayerProfiler();

// parse additional arguments
net->SetThreshold(cmdLine.GetFloat("threshold", net->GetThreshold()));
net->SetSkipFrames(cmdLine.GetUnsignedInt("skip_frames", net->GetSkipFrames()));

return net;
}

Expand Down Expand Up @@ -285,7 +294,7 @@ int actionNet::Classify( void* image, uint32_t width, uint32_t height, imageForm
if( !image || width == 0 || height == 0 )
{
LogError(LOG_TRT "actionNet::Classify( 0x%p, %u, %u ) -> invalid parameters\n", image, width, height);
return -1;
return -2;
}

if( !imageFormatIsRGB(format) )
Expand All @@ -300,18 +309,32 @@ int actionNet::Classify( void* image, uint32_t width, uint32_t height, imageForm
return false;
}

// skip frames as needed
if( mFramesSkipped < mSkipFrames )
{
//LogVerbose(LOG_TRT "actionNet::Classify() -- skipping frame (framesSkipped=%u skipFrames=%u)\n", mFramesSkipped, mSkipFrames);

if( confidence != NULL )
*confidence = mLastConfidence;

mFramesSkipped++;
return mLastClassification;
}

mFramesSkipped = 0;

// apply input pre-processing
if( !preProcess(image, width, height, format) )
{
LogError(LOG_TRT "actionNet::Classify() -- tensor pre-processing failed\n");
return -1;
return -2;
}

// process with TRT
PROFILER_BEGIN(PROFILER_NETWORK);

if( !ProcessNetwork() )
return -1;
return -2;

PROFILER_END(PROFILER_NETWORK);
PROFILER_BEGIN(PROFILER_POSTPROCESS);
Expand All @@ -321,26 +344,29 @@ int actionNet::Classify( void* image, uint32_t width, uint32_t height, imageForm

// determine the maximum class
int classIndex = -1;
float classMax = -1.0f;
float classMax = 0.0f;

for( size_t n=0; n < mNumClasses; n++ )
{
const float value = mOutputs[0].CPU[n];
const float conf = mOutputs[0].CPU[n];

if( value >= 0.01f )
LogDebug("class %04zu - %f (%s)\n", n, value, mClassDesc[n].c_str());
if( conf < mThreshold )
continue;

if( value > classMax )
if( conf > classMax )
{
classIndex = n;
classMax = value;
classMax = conf;
}
}

PROFILER_END(PROFILER_POSTPROCESS);

if( confidence != NULL )
*confidence = classMax;

//printf("\nmaximum class: #%i (%f) (%s)\n", classIndex, classMax, mClassDesc[classIndex].c_str());
PROFILER_END(PROFILER_POSTPROCESS);
mLastConfidence = classMax;
mLastClassification = classIndex;

return classIndex;
}
61 changes: 55 additions & 6 deletions c/actionNet.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
" --labels=LABELS path to text file containing the labels for each class\n" \
" --input-blob=INPUT name of the input layer (default is '" ACTIONNET_DEFAULT_INPUT "')\n" \
" --output-blob=OUTPUT name of the output layer (default is '" ACTIONNET_DEFAULT_OUTPUT "')\n" \
" --threshold=CONF minimum confidence threshold for classification (default is 0.01)\n" \
" --skip-frames=SKIP how many frames to skip between classifications (default is 1)\n" \
" --profile enable layer profiling in TensorRT\n\n"


Expand Down Expand Up @@ -112,7 +114,13 @@ class actionNet : public tensorNet
virtual ~actionNet();

/**
* Append an image to the sequence and classify the action.
* Append an image to the sequence and classify the action, returning the index of the top class.
* Either the class with the maximum confidence will be returned, or -1 if no class meets
* the threshold set by SetThreshold() or the `--threshold` command-line argument.
*
* If this frame was skipped due to SetSkipFrames() being used, then the last frame's results will
* be returned. By default, every other frame is skipped in order to lengthen the action's window.
*
* @param image input image in CUDA device memory.
* @param width width of the input image in pixels.
* @param height height of the input image in pixels.
Expand All @@ -122,7 +130,13 @@ class actionNet : public tensorNet
template<typename T> int Classify( T* image, uint32_t width, uint32_t height, float* confidence=NULL ) { return Classify((void*)image, width, height, imageFormatFromType<T>(), confidence); }

/**
* Append an image to the sequence and classify the action.
* Append an image to the sequence and classify the action, returning the index of the top class.
* Either the class with the maximum confidence will be returned, or -1 if no class meets
* the threshold set by SetThreshold() or the `--threshold` command-line argument.
*
* If this frame was skipped due to SetSkipFrames() being used, then the last frame's results will
* be returned. By default, every other frame is skipped in order to lengthen the action's window.
*
* @param image input image in CUDA device memory.
* @param width width of the input image in pixels.
* @param height height of the input image in pixels.
Expand All @@ -139,18 +153,47 @@ class actionNet : public tensorNet
/**
* Retrieve the description of a particular class.
*/
inline const char* GetClassLabel( uint32_t index ) const { return mClassDesc[index].c_str(); }
inline const char* GetClassLabel( int index ) const { return GetClassDesc(index); }

/**
* Retrieve the description of a particular class.
*/
inline const char* GetClassDesc( uint32_t index ) const { return mClassDesc[index].c_str(); }
inline const char* GetClassDesc( int index ) const { return index >= 0 ? mClassDesc[index].c_str() : "none"; }

/**
* Retrieve the path to the file containing the class descriptions.
*/
inline const char* GetClassPath() const { return mClassPath.c_str(); }

/**
* Return the confidence threshold used for classification.
*/
inline float GetThreshold() const { return mThreshold; }

/**
* Set the confidence threshold used for classification.
* Classes with a confidence below this threshold will be ignored.
* @note this can also be set using the `--threshold=N` command-line argument.
*/
inline void SetThreshold( float threshold ) { mThreshold = threshold; }

/**
* Return the number of frames that are skipped in between classifications.
* @see SetFrameSkip for more info.
*/
inline uint32_t GetSkipFrames() const { return mSkipFrames; }

/**
* Set the number of frames that are skipped in between classifications.
* Since actionNet operates on video sequences, it's often helpful to skip frames
* to lengthen the window of time the model gets to 'see' an action being performed.
*
* The default setting is 1, where every other frame is skipped.
* Setting this to 0 will disable it, and every frame will be processed.
* When a frame is skipped, the classification results from the last frame are returned.
*/
inline void SetSkipFrames( uint32_t frames ) { mSkipFrames = frames; }

protected:
actionNet();

Expand All @@ -160,11 +203,17 @@ class actionNet : public tensorNet
float* mInputBuffers[2];

uint32_t mNumClasses;
uint32_t mNumFrames;
uint32_t mNumFrames; // number of frames fed into the model
uint32_t mSkipFrames; // number of frames to skip when processing
uint32_t mFramesSkipped; // frame skip counter

uint32_t mCurrentInputBuffer;
uint32_t mCurrentFrameIndex;


float mThreshold;
float mLastConfidence;
int mLastClassification;

std::vector<std::string> mClassDesc;

std::string mClassPath;
Expand Down
27 changes: 8 additions & 19 deletions examples/actionnet/actionnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ int usage()
printf("optional arguments:\n");
printf(" --help show this help message and exit\n");
printf(" --network=NETWORK pre-trained model to load (see below for options)\n");
printf(" --skip-frames=N how many frames to skip between classifications (default: 2)\n");
printf("positional arguments:\n");
printf(" input_URI resource URI of input stream (see videoSource below)\n");
printf(" output_URI resource URI of output stream (see videoOutput below)\n\n");
Expand Down Expand Up @@ -127,11 +126,6 @@ int main( int argc, char** argv )
return 1;
}

const uint32_t skip_frames = cmdLine.GetInt("skip-frames", 2);

uint32_t skipped = 0;
float confidence = 0.0f;
int class_id = 0;

/*
* processing loop
Expand All @@ -151,19 +145,14 @@ int main( int argc, char** argv )
continue;
}

// run inference every N frames
skipped += 1;

if( skipped % skip_frames == 0 )
{
class_id = net->Classify(image, input->GetWidth(), input->GetHeight(), &confidence);
skipped = 0;

if( class_id >= 0 )
LogVerbose("actionnet: %2.5f%% class #%i (%s)\n", confidence * 100.0f, class_id, net->GetClassDesc(class_id));
else
LogError("actionnet: failed to classify frame\n");
}
// classify the action sequence
float confidence = 0.0f;
const int class_id = net->Classify(image, input->GetWidth(), input->GetHeight(), &confidence);

if( class_id >= 0 )
LogVerbose("actionnet: %2.5f%% class #%i (%s)\n", confidence * 100.0f, class_id, net->GetClassDesc(class_id));
else
LogError("actionnet: failed to classify frame\n");

// overlay the results
if( class_id >= 0 )
Expand Down
99 changes: 95 additions & 4 deletions python/bindings/PyActionNet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ static PyObject* PyActionNet_Classify( PyActionNet_Object* self, PyObject* args,
img_class = self->net->Classify(ptr, width, height, format, &confidence);
Py_END_ALLOW_THREADS

if( img_class < 0 )
if( img_class < -1 )
{
PyErr_SetString(PyExc_Exception, LOG_PY_INFERENCE "actionNet.Classify() encountered an error classifying the image");
return NULL;
Expand All @@ -204,9 +204,7 @@ static PyObject* PyActionNet_Classify( PyActionNet_Object* self, PyObject* args,
// create output objects
PyObject* pyClass = PYLONG_FROM_LONG(img_class);
PyObject* pyConf = PyFloat_FromDouble(confidence);

// return tuple
PyObject* tuple = PyTuple_Pack(2, pyClass, pyConf);
PyObject* tuple = PyTuple_Pack(2, pyClass, pyConf);

Py_DECREF(pyClass);
Py_DECREF(pyConf);
Expand Down Expand Up @@ -283,6 +281,95 @@ PyObject* PyActionNet_GetClassDesc( PyActionNet_Object* self, PyObject* args )
}


#define DOC_GET_THRESHOLD "Return the minimum confidence threshold for classification.\n\n" \
"Parameters: (none)\n\n" \
"Returns:\n" \
" (float) -- the confidence threshold for classification"

// GetThreshold
static PyObject* PyActionNet_GetThreshold( PyActionNet_Object* self )
{
if( !self || !self->net )
{
PyErr_SetString(PyExc_Exception, LOG_PY_INFERENCE "actionNet invalid object instance");
return NULL;
}

return PyFloat_FromDouble(self->net->GetThreshold());
}


#define DOC_SET_THRESHOLD "Set the minimum confidence threshold for classification.\n\n" \
"Parameters:\n" \
" (float) -- confidence threshold\n\n" \
"Returns: (none)"

// SetThreshold
PyObject* PyActionNet_SetThreshold( PyActionNet_Object* self, PyObject* args )
{
if( !self || !self->net )
{
PyErr_SetString(PyExc_Exception, LOG_PY_INFERENCE "actionNet invalid object instance");
return NULL;
}

float threshold = 0.0f;

if( !PyArg_ParseTuple(args, "f", &threshold) )
return NULL;

self->net->SetThreshold(threshold);
Py_RETURN_NONE;
}


#define DOC_GET_SKIP_FRAMES "Return the number of frames that are skipped in between classifications.\n\n" \
"Parameters: (none)\n\n" \
"Returns:\n" \
" (int) -- the number of frames skipped in between classifications"

// GetSkipFrames
static PyObject* PyActionNet_GetSkipFrames( PyActionNet_Object* self )
{
if( !self || !self->net )
{
PyErr_SetString(PyExc_Exception, LOG_PY_INFERENCE "actionNet invalid object instance");
return NULL;
}

return PYLONG_FROM_UNSIGNED_LONG(self->net->GetSkipFrames());
}


#define DOC_SET_SKIP_FRAMES "Set the number of frames that are skipped in between classifications.\n" \
"Since actionNet operates on video sequences, it's often helpful to skip frames\n" \
"to lengthen the window of time the model gets to 'see' an action being performed.\n\n" \
"The default setting is 1, where every other frame is skipped.\n" \
"Setting this to 0 will disable it, and every frame will be processed.\n" \
"When a frame is skipped, the classification results from the last frame are returned.\n\n" \
"Parameters:\n" \
" (int) -- the number of frames skipped in between classifications\n\n" \
"Returns: (none)"

// SetSkipFrames
PyObject* PyActionNet_SetSkipFrames( PyActionNet_Object* self, PyObject* args )
{
if( !self || !self->net )
{
PyErr_SetString(PyExc_Exception, LOG_PY_INFERENCE "actionNet invalid object instance");
return NULL;
}

int skipFrames = 0;

if( !PyArg_ParseTuple(args, "i", &skipFrames) )
return NULL;

self->net->SetSkipFrames(skipFrames);
Py_RETURN_NONE;
}


#define DOC_USAGE_STRING "Return the command line parameters accepted by __init__()\n\n" \
"Parameters: (none)\n\n" \
"Returns:\n" \
Expand All @@ -307,6 +394,10 @@ static PyMethodDef pyActionNet_Methods[] =
{ "GetNumClasses", (PyCFunction)PyActionNet_GetNumClasses, METH_NOARGS, DOC_GET_NUM_CLASSES},
{ "GetClassLabel", (PyCFunction)PyActionNet_GetClassDesc, METH_VARARGS, DOC_GET_CLASS_DESC},
{ "GetClassDesc", (PyCFunction)PyActionNet_GetClassDesc, METH_VARARGS, DOC_GET_CLASS_DESC},
{ "GetThreshold", (PyCFunction)PyActionNet_GetThreshold, METH_NOARGS, DOC_GET_THRESHOLD},
{ "SetThreshold", (PyCFunction)PyActionNet_SetThreshold, METH_VARARGS, DOC_SET_THRESHOLD},
{ "GetSkipFrames", (PyCFunction)PyActionNet_GetSkipFrames, METH_NOARGS, DOC_GET_SKIP_FRAMES},
{ "SetSkipFrames", (PyCFunction)PyActionNet_SetSkipFrames, METH_VARARGS, DOC_SET_SKIP_FRAMES},
{ "Usage", (PyCFunction)PyActionNet_Usage, METH_NOARGS|METH_STATIC, DOC_USAGE_STRING},
{NULL} /* Sentinel */
};
Expand Down
Loading

0 comments on commit 7deda0a

Please sign in to comment.