diff --git a/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h b/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h index 4d3e3e1bf..59750ea42 100644 --- a/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h +++ b/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -78,7 +78,9 @@ class NEOML_API CFullyConnectedSourceLayer : public CFullyConnectedLayer { bool isBatchLoaded( int index ) const; }; -NEOML_API CLayerWrapper FullyConnectedSource( - TBlobType labelType, int batchSize, int maxBatchCount, IProblem* problem ); +// Creates CFullyConnectedSourceLayer with the name +NEOML_API CFullyConnectedSourceLayer* FullyConnectedSource( CDnn& dnn, const char* name, + TBlobType labelType, int batchSize, int maxBatchCount, IProblem* problem, + int numberOfElements = 1, bool isZeroFreeTerm = false ); } // namespace NeoML diff --git a/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h b/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h index a8fb814a0..c6f89d015 100644 --- a/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h +++ b/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,31 +27,46 @@ class CSourceLayer; class CSinkLayer; class CDnnTrainingModelWrapper; +struct IShuffledBatchGenerator { + virtual ~IShuffledBatchGenerator() = default; + + virtual const CArray& GenerateBatchIndexes( int batchSize, bool batchShuffled ) = 0; + virtual bool HasUnseenElements() const = 0; + virtual void DeleteUnseenElement( int index ) = 0; +}; + +//--------------------------------------------------------------------------------------------------------------------- + // CProblemSourceLayer is a wrapper over the IProblem interface. // On each iteration, it passes BatchSize vectors into the network for processing. class NEOML_API CProblemSourceLayer : public CBaseLayer { NEOML_DNN_LAYER( CProblemSourceLayer ) public: - explicit CProblemSourceLayer( IMathEngine& mathEngine ); + explicit CProblemSourceLayer( IMathEngine& mathEngine ) : + CBaseLayer( mathEngine, "CCnnProblemSourceLayer", /*isLearnable*/false ) {} void Serialize( CArchive& archive ) override; int GetBatchSize() const { return batchSize; } - void SetBatchSize(int _batchSize); + void SetBatchSize( int batchSize ); // The filler for empty values that are not present in a sparse vector float GetEmptyFill() const { return emptyFill; } - void SetEmptyFill(float _emptyFill) { NeoAssert(GetDnn() == 0); emptyFill = _emptyFill; } + void SetEmptyFill( float _emptyFill ) { NeoAssert( GetDnn() == nullptr ); emptyFill = _emptyFill; } // You may only change the problem for the layer that is connected to a network // if the number of classes and the number of input vectors stay the same CPtr GetProblem() const { return problem; } - void SetProblem(const CPtr& _problem); + void SetProblem( const CPtr& problem, bool shuffle = false, unsigned seed = 42 ); // Retrieves and sets the data type for class labels TBlobType GetLabelType() const { return labelType; } void SetLabelType( TBlobType newLabelType ); + // Still not the end of an epoch + bool HasUnseenElements() const + { return ( shuffled && shuffled->HasUnseenElements() ) || nextProblemIndex < problem->GetVectorCount(); } + protected: ~CProblemSourceLayer() override = default; @@ -60,22 +75,31 @@ class NEOML_API CProblemSourceLayer : public CBaseLayer { void BackwardOnce() override; private: - float emptyFill; // the empty values filler (for values not represented in a sparse vector) - int batchSize; // the size of the batch passed to the network - int nextProblemIndex; // the index of the next element in the problem to be passed - CPtr problem; // the classification problem the network is solving - TBlobType labelType; // the data type for labels - CArray exchangeBufs[3]; + float emptyFill = 0; // the empty values filler (for values not represented in a sparse vector) + int batchSize = 1; // the size of the batch passed to the network + int nextProblemIndex = NotFound; // the index of the next element in the problem to be passed + TBlobType labelType = CT_Float; // the data type for labels + CPtr problem; // the classification problem the network is solving + CPtrOwner shuffled; // if a shuffled batch input + + enum { EB_Data, EB_Label, EB_Weight, EB_Count_ }; + CArray exchangeBufs[EB_Count_]{}; + + void fillExchangeBuffers( int shift, int index ); }; -/////////////////////////////////////////////////////////////////////////////////////////////////////// +// Creates CProblemSourceLayer with the name +NEOML_API CProblemSourceLayer* ProblemSource( CDnn& dnn, const char* name, + TBlobType labelType, int batchSize, const CPtr& problem, bool shuffle = false, unsigned seed = 42 ); + +//--------------------------------------------------------------------------------------------------------------------- // CDnnModelWrapper is the base class wrapping the trained neural network into the IModel interface class NEOML_API CDnnModelWrapper : public IModel { public: explicit CDnnModelWrapper(IMathEngine& mathEngine, unsigned int seed = 0xDEADFACE); - int GetClassCount() const override; + int GetClassCount() const override { return ClassCount; } bool Classify(const CFloatVectorDesc& data, CClassificationResult& result) const override; void Serialize(CArchive& archive) override; @@ -83,11 +107,11 @@ class NEOML_API CDnnModelWrapper : public IModel { int ClassCount; float SourceEmptyFill; mutable CRandom Random; - mutable CDnn Dnn; // the network - CPtr SourceLayer; // the reference to the source layer - CPtr SinkLayer; // the reference to the terminator layer - CPtr SourceBlob; // the source data blob - mutable CArray tempExp; // the temporary array for exponent values to calculate softmax + mutable CDnn Dnn; // the network + CPtr SourceLayer; // the reference to the source layer + CPtr SinkLayer; // the reference to the terminator layer + CPtr SourceBlob; // the source data blob + mutable CArray tempExp; // the temporary array for exponent values to calculate softmax static const char* const SourceLayerName; static const char* const SinkLayerName; @@ -101,7 +125,7 @@ class NEOML_API CDnnModelWrapper : public IModel { bool classify( CClassificationResult& result ) const; }; -/////////////////////////////////////////////////////////////////////////////////////////////////////// +//--------------------------------------------------------------------------------------------------------------------- // CDnnTrainingModelWrapper is the base class wrapping the neural network // into an ITrainingModel interface so the network can be trained using the Train method diff --git a/NeoML/include/NeoML/TraditionalML/Shuffler.h b/NeoML/include/NeoML/TraditionalML/Shuffler.h index 2b8bcf8f6..4b57fa940 100644 --- a/NeoML/include/NeoML/TraditionalML/Shuffler.h +++ b/NeoML/include/NeoML/TraditionalML/Shuffler.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ namespace NeoML { // The shuffler class // Uses the standard shuffling algorithm, not all at once but sequentially; as a result, the first N positions are only shuffled among themselves // For example, you can use it to get the random indices in an array -class NEOML_API CShuffler { +class NEOML_API CShuffler final { public: CShuffler( CRandom& _random, int count ); @@ -34,6 +34,10 @@ class NEOML_API CShuffler { int SetNext( int index ); // Finishes shuffling and returns all indices const CArray& GetAllIndices(); + // Is shuffling finished + bool IsFinished() const { return nextIndex == indices.Size(); } + // Reset state to use shuffler again for the same array + void Reset() { nextIndex = 0; } private: CRandom& random; diff --git a/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp b/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp index 2766fb917..b151f70e7 100644 --- a/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp +++ b/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -310,15 +310,20 @@ bool CFullyConnectedSourceLayer::isBatchLoaded( int index ) const return ( batchFirstLoadedIndex <= index && index <= batchLastLoadedIndex ); } -CLayerWrapper FullyConnectedSource( TBlobType labelType, - int batchSize, int maxBatchCount, IProblem* problem ) +// Creates CFullyConnectedSourceLayer with the name +CFullyConnectedSourceLayer* FullyConnectedSource( CDnn& dnn, const char* name, + TBlobType labelType, int batchSize, int maxBatchCount, IProblem* problem, int numberOfElements, bool isZeroFreeTerm ) { - return CLayerWrapper( "FullyConnectedSource", [=, &problem]( CFullyConnectedSourceLayer* result ) { - result->SetLabelType( labelType ); - result->SetBatchSize( batchSize ); - result->SetMaxBatchCount( maxBatchCount ); - result->SetProblem( problem ); - } ); + CPtr result = new CFullyConnectedSourceLayer( dnn.GetMathEngine() ); + result->SetLabelType( labelType ); + result->SetBatchSize( batchSize ); + result->SetMaxBatchCount( maxBatchCount ); + result->SetProblem( problem ); + result->SetNumberOfElements( numberOfElements ); + result->SetZeroFreeTerm( isZeroFreeTerm ); + result->SetName( name ); + dnn.AddLayer( *result ); + return result; } } // namespace NeoML diff --git a/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp b/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp index 4ff19fbe0..2447f2708 100644 --- a/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp +++ b/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,18 +21,163 @@ limitations under the License. #include #include #include +#include namespace NeoML { -CProblemSourceLayer::CProblemSourceLayer( IMathEngine& mathEngine ) : - CBaseLayer( mathEngine, "CCnnProblemSourceLayer", false ), - emptyFill(0), - batchSize(1), - nextProblemIndex(0), - labelType(CT_Float) +// Shuffles the elements array. +static void shuffle( CArray& elements, CRandom& random ) { + CShuffler indexGenerator( random, elements.Size() ); + CArray oldElements; + elements.CopyTo( oldElements ); + for( int i = 0; i < elements.Size(); ++i ) { + elements[i] = oldElements[indexGenerator.Next()]; + } +} + +//--------------------------------------------------------------------------------------------------------------------- + +// Shuffles the elements of an array and returns them one by one. +// Unlike CShuffler, it returns the elements of the array, not the indices of the elements, +// and also supports cyclicity (when the end of the sequence is reached, it will be shuffled again). +template +class CShuffledElements final { +public: + CShuffledElements( CArray&& _elements, CRandom& _random ) : + random( _random ), elements( std::move( _elements ) ), shuffler( random, elements.Size() ) {} + CShuffledElements( CShuffledElements&& ) = default; + + CShuffledElements& operator=( CShuffledElements&& ) = default; + + // The number of elements in the elements array. + int Size() const { return elements.Size(); } + // Generates the next element of the sequence. + T Next() { if( shuffler.IsFinished() ) { shuffler.Reset(); } return elements[shuffler.Next()]; } + +private: + CRandom& random; // The random generator + CArray elements; // The shuffled elements array + CShuffler shuffler; // The index shuffled generator +}; + +//--------------------------------------------------------------------------------------------------------------------- + +// Forms a batch in such a way that it contains the same number of positive and negative pairs, and +// all positive elements are collected from one class, and all negative examples are from different non-intersecting ones. +// For example, let there be 4 classes and batchSize = 12. +// The number of matchings in the batch is C_12^2 = 66, respectively, the number of positive and negative pairs is 33 each. +// We sample a random "positive" class from which we will collect positive elements, let's say this is class 0. +// Then we randomly sample 9 elements from class 0, because C_9^2 = 36 pairs are closest to 33 (for +// comparison, 8 elements give 28 pairs). The remaining 12 - 9 = 3 elements are taken each from a separate class - +// from the 1st, 2nd and 3rd classes, respectively. +// As a result, we obtained a batch, the matching elements of which give 36 positive and 30 negative pairs. +class CBalancedPairBatchGenerator : public IShuffledBatchGenerator { +public: + CBalancedPairBatchGenerator( const IProblem& problem, unsigned seed ); + + // IShuffledBatchGenerator + const CArray& GenerateBatchIndexes( int batchSize, bool batchShuffled ) override; + bool HasUnseenElements() const override { return !unseenElementsIndices.IsEmpty(); } + void DeleteUnseenElement( int index ) override { unseenElementsIndices.Delete( index ); } + +private: + CRandom random; + // The map of "label -> this label's elements indexes generator". + CMap> labelToIndexGenerator; + // The labels generator + CShuffledElements labelGenerator; + // Indexes of elements, which aren't in any batch, to detect epoch's end + CHashTable unseenElementsIndices; + // Return value of indices set + CArray batchIndexes; + + CArray getLabels( const IProblem& ); +}; + +CBalancedPairBatchGenerator::CBalancedPairBatchGenerator( const IProblem& problem, unsigned seed ) : + random( seed ), + labelGenerator( getLabels( problem ), random ) +{ + unseenElementsIndices.SetBufferSize( problem.GetVectorCount() ); + for( int i = 0; i < problem.GetVectorCount(); ++i ) { + unseenElementsIndices.Add( i ); + } } +CArray CBalancedPairBatchGenerator::getLabels( const IProblem& problem ) +{ + CMap> labelToIndexes; + for( int i = 0; i < problem.GetVectorCount(); ++i ) { + labelToIndexes.GetOrCreateValue( problem.GetClass( i ) ).Add( i ); + } + + CArray labelsUnique; + labelsUnique.SetBufferSize( labelToIndexes.Size() ); + for( auto& item : labelToIndexes ) { + labelToIndexGenerator.Add( item.Key, CShuffledElements( std::move( item.Value ), random ) ); + labelsUnique.Add( item.Key ); + } + return labelsUnique; +} + +const CArray& CBalancedPairBatchGenerator::GenerateBatchIndexes( int batchSize, bool batchShuffled ) +{ + // The number of all matchings in the batch. + const int numOfCombinations = batchSize * ( batchSize - 1 ) / 2; + // The desired number of positive matchings in the batch. + const int targetNumOfPositiveCombinations = numOfCombinations / 2; + // How many elements of one class should be taken to get the desired number of positive matchings. + // The formula below is one of the roots of the square equation C_n^2 = targetNumOfPositiveCombinations with respect to n. + const double idealNumOfSingleClassSamples = 1.0 / 2.0 + sqrt( 1.0 / 4.0 + 2.0 * targetNumOfPositiveCombinations ); + // Round to the nearest integer - this will be the best approximation of the number of elements from one class. + const int numOfSingleClassSamples = static_cast( round( idealNumOfSingleClassSamples ) ); + + // Sample a random class and collect numOfSingleClassSamples elements from it into a batch. + const int majorLabel = labelGenerator.Next(); + NeoAssert( numOfSingleClassSamples <= labelToIndexGenerator.Get( majorLabel ).Size() ); + // Sample a random class and collect numOfSingleClassSamples elements from it into a batch. + batchIndexes.DeleteAll(); + batchIndexes.SetBufferSize( numOfSingleClassSamples ); + for( int i = 0; i < numOfSingleClassSamples; ++i ) { + batchIndexes.Add( labelToIndexGenerator.Get( majorLabel ).Next() ); + } + + // The number of remaining elements, also the number of remaining classes, because for the remaining elements + // the equality "one element = one class" is satisfied. + const int numOfOtherClasses = batchSize - numOfSingleClassSamples; + // Important! It is expected that all remaining elements in the batch will be from DIFFERENT clusters, + // so their number should not exceed the total number of clusters, taking into account one already sampled. + // If this is not the case, then we fail - let the user add more classes or reduce the batch size. + NeoAssert( numOfOtherClasses + 1 <= labelToIndexGenerator.Size() ); + // Table of used classes to skip duplicates. + CHashTable usedClasses; + usedClasses.Add( majorLabel ); + // Will never loop if numOfOtherClasses + 1 <= totalNumOfClasses, because the generator shuffles classes without duplicates. + while( usedClasses.Size() < numOfOtherClasses + 1 ) { + // Sample the class. + const int label = labelGenerator.Next(); + // Skip the class if it has already been sampled. + if( usedClasses.Has( label ) ) { + continue; + } + usedClasses.Add( label ); + // Sample an element from this class. + const int negativeSample = labelToIndexGenerator.Get( label ).Next(); + batchIndexes.Add( negativeSample ); + } + if( batchShuffled ) { + // Just in case, the batch is mixed before giving it to the model + // So that it doesn’t accidentally learn the structure of the batch. + shuffle( batchIndexes, random ); + } + + NeoAssert( batchIndexes.Size() == batchSize ); + return batchIndexes; +} + +//--------------------------------------------------------------------------------------------------------------------- + void CProblemSourceLayer::SetBatchSize(int _batchSize) { if(_batchSize == batchSize) { @@ -42,15 +187,19 @@ void CProblemSourceLayer::SetBatchSize(int _batchSize) ForceReshape(); } -void CProblemSourceLayer::SetProblem(const CPtr& _problem) +void CProblemSourceLayer::SetProblem( const CPtr& _problem, bool shuffle, unsigned seed ) { - NeoAssert( _problem != 0 ); - NeoAssert( GetDnn() == 0 || problem == 0 + NeoAssert( _problem != nullptr ); + NeoAssert( GetDnn() == nullptr || problem == nullptr || ( problem->GetFeatureCount() == _problem->GetFeatureCount() && problem->GetClassCount() == _problem->GetClassCount() ) ); problem = _problem; nextProblemIndex = 0; + + if( shuffle ) { + shuffled = new CBalancedPairBatchGenerator( *problem, seed ); + } } void CProblemSourceLayer::SetLabelType( TBlobType newLabelType ) @@ -68,128 +217,150 @@ void CProblemSourceLayer::SetLabelType( TBlobType newLabelType ) void CProblemSourceLayer::Reshape() { - NeoAssert(!GetDnn()->IsRecurrentMode()); + NeoAssert( !GetDnn()->IsRecurrentMode() ); - CheckLayerArchitecture( problem.Ptr() != 0, "source problem is null" ); + CheckLayerArchitecture( problem.Ptr() != nullptr, "source problem is null" ); CheckOutputs(); CheckLayerArchitecture( GetOutputCount() >= 2, "problem source layer has less than 2 outputs" ); // The data - outputDescs[0] = CBlobDesc( CT_Float ); - outputDescs[0].SetDimSize( BD_BatchWidth, batchSize ); - outputDescs[0].SetDimSize( BD_Channels, problem->GetFeatureCount() ); - exchangeBufs[0].SetSize(outputDescs[0].BlobSize()); + outputDescs[EB_Data] = CBlobDesc( CT_Float ); + outputDescs[EB_Data].SetDimSize( BD_BatchWidth, batchSize ); + outputDescs[EB_Data].SetDimSize( BD_Channels, problem->GetFeatureCount() ); + exchangeBufs[EB_Data].SetSize( outputDescs[EB_Data].BlobSize() ); // The labels int labelSize = problem->GetClassCount(); - if(labelSize == 2) { + if( labelSize == 2 ) { labelSize = 1; } - outputDescs[1] = CBlobDesc( labelType ); - outputDescs[1].SetDimSize( BD_BatchWidth, batchSize ); + outputDescs[EB_Label] = CBlobDesc( labelType ); + outputDescs[EB_Label].SetDimSize( BD_BatchWidth, batchSize ); if( labelType != CT_Int ) { - outputDescs[1].SetDimSize( BD_Channels, labelSize ); + outputDescs[EB_Label].SetDimSize( BD_Channels, labelSize ); } - exchangeBufs[1].SetSize(outputDescs[1].BlobSize()); + exchangeBufs[EB_Label].SetSize( outputDescs[EB_Label].BlobSize() ); // The weights - outputDescs[2] = CBlobDesc( CT_Float ); - outputDescs[2].SetDimSize( BD_BatchWidth, batchSize ); - exchangeBufs[2].SetSize(outputDescs[2].BlobSize()); + outputDescs[EB_Weight] = CBlobDesc( CT_Float ); + outputDescs[EB_Weight].SetDimSize( BD_BatchWidth, batchSize ); + exchangeBufs[EB_Weight].SetSize( outputDescs[EB_Weight].BlobSize() ); } void CProblemSourceLayer::RunOnce() { - NeoAssert(problem.Ptr() != 0); + NeoAssert( problem != nullptr ); - for(int i = 0; i < exchangeBufs[0].Size(); ++i) { - exchangeBufs[0][i] = emptyFill; - } - for(int i = 0; i < exchangeBufs[1].Size(); ++i) { - exchangeBufs[1][i] = 0; + ::memset( exchangeBufs[EB_Label].GetPtr(), 0, exchangeBufs[EB_Label].Size() * sizeof( float ) ); + if( emptyFill == 0.f ) { + ::memset( exchangeBufs[EB_Data].GetPtr(), 0, exchangeBufs[EB_Data].Size() * sizeof( float ) ); + } else { + for( int i = 0; i < exchangeBufs[EB_Data].Size(); ++i ) { + exchangeBufs[EB_Data][i] = emptyFill; + } } - float* data = exchangeBufs[0].GetPtr(); - float* labels = exchangeBufs[1].GetPtr(); - float* weights = exchangeBufs[2].GetPtr(); - - int vectorCount = problem->GetVectorCount(); - CFloatMatrixDesc matrix = problem->GetMatrix(); - CFloatVectorDesc vector; - - for(int i = 0; i < batchSize; ++i) { - // The data - matrix.GetRow( nextProblemIndex, vector ); - for(int j = 0; j < vector.Size; ++j) { - data[vector.Indexes == nullptr ? j : vector.Indexes[j]] = static_cast( vector.Values[j] ); + if( shuffled != nullptr ) { + const CArray& batchIndexes = shuffled->GenerateBatchIndexes( batchSize, /*batchShuffled*/true ); + for( int i = 0; i < batchSize; ++i ) { + shuffled->DeleteUnseenElement( batchIndexes[i] ); + fillExchangeBuffers( i, batchIndexes[i] ); } - - // The labels - // Update labels - if( labelType == CT_Float ) { - if( outputBlobs[1]->GetChannelsCount() == 1 ) { - *labels = static_cast< float >( problem->GetBinaryClass( nextProblemIndex ) ); - } else { - int classLabel = problem->GetClass( nextProblemIndex ); - NeoAssert( 0 <= classLabel && classLabel < outputBlobs[1]->GetChannelsCount() ); - ::memset( labels, 0, outputBlobs[1]->GetChannelsCount() * sizeof( float ) ); - labels[classLabel] = 1; - } - } else { - static_assert( sizeof( float ) == sizeof( int ), "sizeof( float ) != sizeof( int )" ); - NeoAssert( outputBlobs[1]->GetChannelsCount() == 1 ); - *reinterpret_cast( labels ) = problem->GetClass( nextProblemIndex ); + } else { + const int vectorCount = problem->GetVectorCount(); + for( int i = 0; i < batchSize; ++i, ++nextProblemIndex ) { + fillExchangeBuffers( i, nextProblemIndex % vectorCount ); } - - // The weights - *weights = static_cast(problem->GetVectorWeight(nextProblemIndex)); - - ++nextProblemIndex; - nextProblemIndex %= vectorCount; - - data += outputBlobs[0]->GetObjectSize(); - labels += outputBlobs[1]->GetObjectSize(); - weights += outputBlobs[2]->GetObjectSize(); } - outputBlobs[0]->CopyFrom(exchangeBufs[0].GetPtr()); + outputBlobs[EB_Data]->CopyFrom( exchangeBufs[EB_Data].GetPtr() ); if( labelType == CT_Float ) { - outputBlobs[1]->CopyFrom( exchangeBufs[1].GetPtr() ); + outputBlobs[EB_Label]->CopyFrom( exchangeBufs[EB_Label].GetPtr() ); } else { - outputBlobs[1]->CopyFrom( reinterpret_cast( exchangeBufs[1].GetPtr() ) ); + outputBlobs[EB_Label]->CopyFrom( reinterpret_cast( exchangeBufs[EB_Label].GetPtr() ) ); } - outputBlobs[2]->CopyFrom(exchangeBufs[2].GetPtr()); + outputBlobs[EB_Weight]->CopyFrom( exchangeBufs[EB_Weight].GetPtr() ); } void CProblemSourceLayer::BackwardOnce() { - NeoAssert(0); + NeoAssert( false ); } -static const int ProblemSourceLayerVersion = 2000; +constexpr int ProblemSourceLayerVersion = 2001; void CProblemSourceLayer::Serialize( CArchive& archive ) { - archive.SerializeVersion( ProblemSourceLayerVersion, CDnn::ArchiveMinSupportedVersion ); + const int version = archive.SerializeVersion( ProblemSourceLayerVersion, CDnn::ArchiveMinSupportedVersion ); CBaseLayer::Serialize( archive ); - if( archive.IsStoring() ) { - archive << batchSize; - archive << static_cast( labelType ); - } else if( archive.IsLoading() ) { - archive >> batchSize; + if( version >= 2001 ) { + archive.Serialize( emptyFill ); + } else { // loading + emptyFill = 0; + } + archive.Serialize( batchSize ); + int labelTypeInt = static_cast( labelType ); + archive.Serialize( labelTypeInt ); + + if( archive.IsLoading() ) { nextProblemIndex = NotFound; - problem = 0; - int labelTypeInt = 0; - archive >> labelTypeInt; labelType = static_cast( labelTypeInt ); + problem = nullptr; + shuffled = nullptr; + } +} + +void CProblemSourceLayer::fillExchangeBuffers( int shift, int index ) +{ + float* data = exchangeBufs[EB_Data].GetPtr() + shift * outputBlobs[EB_Data]->GetObjectSize(); + float* labels = exchangeBufs[EB_Label].GetPtr() + shift * outputBlobs[EB_Label]->GetObjectSize(); + float* weights = exchangeBufs[EB_Weight].GetPtr() + shift * outputBlobs[EB_Weight]->GetObjectSize(); + + // The data + const CFloatMatrixDesc matrix = problem->GetMatrix(); + CFloatVectorDesc vector; + matrix.GetRow( index, vector ); + for( int j = 0; j < vector.Size; ++j ) { + data[( vector.Indexes == nullptr ) ? j : vector.Indexes[j]] = static_cast( vector.Values[j] ); + } + + // The labels + // Update labels + if( labelType == CT_Float ) { + if( outputBlobs[EB_Label]->GetChannelsCount() == 1 ) { + *labels = static_cast( problem->GetBinaryClass( index ) ); + } else { + const int classLabel = problem->GetClass( index ); + NeoAssert( 0 <= classLabel && classLabel < outputBlobs[EB_Label]->GetChannelsCount() ); + ::memset( labels, 0, outputBlobs[EB_Label]->GetChannelsCount() * sizeof( float ) ); + labels[classLabel] = 1; + } } else { - NeoAssert( false ); + static_assert( sizeof( float ) == sizeof( int ), "sizeof( float ) != sizeof( int )" ); + NeoAssert( outputBlobs[EB_Label]->GetChannelsCount() == 1 ); + *reinterpret_cast( labels ) = problem->GetClass( index ); } + + // The weights + *weights = static_cast( problem->GetVectorWeight( index ) ); +} + +// Creates CProblemSourceLayer with the name +CProblemSourceLayer* ProblemSource( CDnn& dnn, const char* name, + TBlobType labelType, int batchSize, const CPtr& problem, bool shuffle, unsigned seed ) +{ + CPtr result = new CProblemSourceLayer( dnn.GetMathEngine() ); + result->SetProblem( problem, shuffle, seed ); + result->SetLabelType( labelType ); + result->SetBatchSize( batchSize ); + result->SetName( name ); + dnn.AddLayer( *result ); + return result; } -/////////////////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////////////////// +//--------------------------------------------------------------------------------------------------------------------- + const char* const CDnnModelWrapper::SourceLayerName = "CCnnModelWrapper::SourceLayer"; const char* const CDnnModelWrapper::SinkLayerName = "CCnnModelWrapper::SinkLayer"; @@ -207,83 +378,78 @@ CDnnModelWrapper::CDnnModelWrapper(IMathEngine& _mathEngine, unsigned int seed) SinkLayer->SetName(SinkLayerName); } -int CDnnModelWrapper::GetClassCount() const -{ - return ClassCount; -} - bool CDnnModelWrapper::Classify(const CFloatVectorDesc& desc, CClassificationResult& result) const { - NeoAssert(SourceBlob.Ptr() != 0); - NeoPresume(SourceBlob.Ptr() == SourceLayer->GetBlob().Ptr()); + NeoAssert( SourceBlob != nullptr ); + NeoPresume( SourceBlob == SourceLayer->GetBlob() ); - exchangeBuffer.SetSize(SourceBlob->GetDataSize()); - - for(int i = 0; i < exchangeBuffer.Size(); ++i) { - exchangeBuffer[i] = SourceEmptyFill; + exchangeBuffer.SetSize( SourceBlob->GetDataSize() ); + if( SourceEmptyFill == 0.f ) { + ::memset( exchangeBuffer.GetPtr(), 0, exchangeBuffer.Size() * sizeof( float ) ); + } else { + for( int i = 0; i < exchangeBuffer.Size(); ++i ) { + exchangeBuffer[i] = SourceEmptyFill; + } } for(int i = 0; i < desc.Size; ++i) { - exchangeBuffer[desc.Indexes == nullptr ? i : desc.Indexes[i]] = desc.Values[i]; + exchangeBuffer[( desc.Indexes == nullptr ) ? i : desc.Indexes[i]] = desc.Values[i]; } - SourceBlob->CopyFrom(exchangeBuffer.GetPtr()); + SourceBlob->CopyFrom( exchangeBuffer.GetPtr() ); return classify( result ); } -static const int DnnModelWrapperVersion = 2000; +constexpr int DnnModelWrapperVersion = 2001; -void CDnnModelWrapper::Serialize(CArchive& archive) +void CDnnModelWrapper::Serialize( CArchive& archive ) { - archive.SerializeVersion( DnnModelWrapperVersion, CDnn::ArchiveMinSupportedVersion ); - - if( archive.IsStoring() ) { - archive << ClassCount << Random << Dnn; - archive << CString(SourceLayer->GetName()); - archive << CString(SinkLayer->GetName()); - CBlobDesc sourceDesc( CT_Float ); - sourceDesc.SetDimSize(BD_BatchWidth, 0); - if( SourceBlob.Ptr() != 0 ) { - sourceDesc = SourceBlob->GetDesc(); - } - for( int i = 0; i < CBlobDesc::MaxDimensions; i++ ) { - archive << sourceDesc.DimSize(i); - } - } else if( archive.IsLoading() ) { - archive >> ClassCount >> Random >> Dnn; + const int version = archive.SerializeVersion( DnnModelWrapperVersion, CDnn::ArchiveMinSupportedVersion ); - CString name; + archive.Serialize( ClassCount ); + if( version >= 2001 ) { + archive.Serialize( SourceEmptyFill ); + } else { // loading + SourceEmptyFill = 0; + } + archive.Serialize( Random ); + archive.Serialize( Dnn ); + + CString sourceName = SourceLayer->GetName(); + archive.Serialize( sourceName ); + CString sinkName = SinkLayer->GetName(); + archive.Serialize( sinkName ); + + CBlobDesc sourceDesc( CT_Float ); + sourceDesc.SetDimSize( BD_BatchWidth, 0 ); // set zero + if( SourceBlob != nullptr ) { + sourceDesc = SourceBlob->GetDesc(); + } + for( int i = 0; i < CBlobDesc::MaxDimensions; ++i ) { + int size = sourceDesc.DimSize( i ); + archive.Serialize( size ); + sourceDesc.SetDimSize( i, size ); + } - archive >> name; - if( Dnn.HasLayer(name) ) { - SourceLayer = CheckCast(Dnn.GetLayer(name).Ptr()); + if( archive.IsLoading() ) { + if( Dnn.HasLayer( sourceName ) ) { + SourceLayer = CheckCast( Dnn.GetLayer( sourceName ).Ptr() ); } else { - SourceLayer->SetName(name); + SourceLayer->SetName( sourceName ); } - - archive >> name; - if( Dnn.HasLayer(name) ) { - SinkLayer = CheckCast(Dnn.GetLayer(name).Ptr()); + if( Dnn.HasLayer( sinkName ) ) { + SinkLayer = CheckCast( Dnn.GetLayer( sinkName ).Ptr() ); } else { - SinkLayer->SetName(name); + SinkLayer->SetName( sinkName ); } - CBlobDesc sourceDesc( CT_Float ); - for( int i = 0; i < CBlobDesc::MaxDimensions; i++ ) { - int size; - archive >> size; - sourceDesc.SetDimSize(i, size); - } - if( sourceDesc.BlobSize() == 0 ) { - SourceBlob = 0; + if( sourceDesc.BlobSize() == 0 ) { // is zero + SourceBlob = nullptr; } else { SourceBlob = CDnnBlob::CreateBlob(mathEngine, CT_Float, sourceDesc); SourceLayer->SetBlob(SourceBlob); } - exchangeBuffer.SetSize(0); tempExp.SetSize(0); - } else { - NeoAssert( false ); } } @@ -339,8 +505,8 @@ bool CDnnModelWrapper::classify( CClassificationResult& result ) const return true; } -/////////////////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////////////////// +//--------------------------------------------------------------------------------------------------------------------- + CPtr CDnnTrainingModelWrapper::Train(const IProblem& trainingClassificationData) { CPtr model = FINE_DEBUG_NEW CDnnModelWrapper( mathEngine ); diff --git a/NeoML/src/TraditionalML/Shuffler.cpp b/NeoML/src/TraditionalML/Shuffler.cpp index 563ea6c05..75873dd74 100644 --- a/NeoML/src/TraditionalML/Shuffler.cpp +++ b/NeoML/src/TraditionalML/Shuffler.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,9 +20,11 @@ limitations under the License. namespace NeoML { -CShuffler::CShuffler( CRandom& _random, int count ) - : random( _random ), nextIndex( 0 ) +CShuffler::CShuffler( CRandom& _random, int count ) : + random( _random ), + nextIndex( 0 ) { + NeoAssert( count > 1 ); indices.SetSize( count ); for( int i = 0; i < indices.Size(); ++i ) { indices[i] = i; @@ -32,19 +34,15 @@ CShuffler::CShuffler( CRandom& _random, int count ) inline int CShuffler::getSwapIndex( int swapIndex ) { if( swapIndex != nextIndex ) { - int tmp = indices[swapIndex]; - indices[swapIndex] = indices[nextIndex]; - indices[nextIndex] = tmp; + FObj::swap( indices[swapIndex], indices[nextIndex] ); } - return indices[nextIndex++]; } int CShuffler::Next() { NeoPresume( nextIndex < indices.Size() ); - - int swapIndex = random.UniformInt( nextIndex, indices.Size() - 1 ); + const int swapIndex = random.UniformInt( nextIndex, indices.Size() - 1 ); return getSwapIndex( swapIndex ); } @@ -64,7 +62,6 @@ int CShuffler::SetNext( int index ) } NeoAssert( swapIndex != NotFound ); } - return getSwapIndex( swapIndex ); } @@ -73,8 +70,7 @@ const CArray& CShuffler::GetAllIndices() while( nextIndex < indices.Size() ) { Next(); } - return indices; } -} +} // namespace NeoML diff --git a/NeoML/test/src/CMakeLists.txt b/NeoML/test/src/CMakeLists.txt index 7599ee194..6076daa4d 100644 --- a/NeoML/test/src/CMakeLists.txt +++ b/NeoML/test/src/CMakeLists.txt @@ -29,6 +29,7 @@ target_sources(${PROJECT_NAME} INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/OptimizerFunctionsTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ParameterLayerTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/PCATest.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ProblemSourceLayerTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/RandomProblem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/RandomProblem.h ${CMAKE_CURRENT_SOURCE_DIR}/ReferenceDnnTest.cpp diff --git a/NeoML/test/src/ProblemSourceLayerTest.cpp b/NeoML/test/src/ProblemSourceLayerTest.cpp new file mode 100644 index 000000000..dcd2a11e1 --- /dev/null +++ b/NeoML/test/src/ProblemSourceLayerTest.cpp @@ -0,0 +1,127 @@ +/* Copyright © 2024 ABBYY + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--------------------------------------------------------------------------------------------------------------*/ + +#include +#pragma hdrstop + +#include + +using namespace NeoML; +using namespace NeoMLTest; + +namespace NeoMLTest { + +template +static void recreateLayer( IMathEngine& mathEngine, CPtr& layer, + const char* name, int batchSize, TBlobType type, CMemoryProblem* problem, CDnn& dnn ) +{ + layer->SetName( name ); + layer->SetBatchSize( batchSize ); + + CMemoryFile file; + CArchive archive( &file, CArchive::SD_Storing ); + layer->Serialize( archive ); + archive.Close(); + layer = new T( mathEngine ); + file.SeekToBegin(); + archive.Open( &file, CArchive::SD_Loading ); + layer->Serialize( archive ); + + EXPECT_EQ( type, layer->GetLabelType() ); + layer->SetProblem( problem ); + dnn.AddLayer( *layer ); +} + +// Check for float labels == enumBinarization(int labels). +// As a type T could be CFullyConnectedSourceLayer or CProblemSourceLayer. +template +static void testLabelTypes( CDnn& dnn, CPtr intLayer, CPtr floatLayer ) +{ + const int featureCount = 5; + const int vectorCount = 10; + const int classCount = 3; + const int runCount = 10; + const int batchSize = ( vectorCount / 2 ) - 1; + + static_assert( vectorCount >= classCount, "" ); + static_assert( classCount > 2, "" ); + + CPtr problem = new CMemoryProblem( featureCount, classCount ); + for( int i = 0; i < vectorCount; ++i ) { + CSparseFloatVector vector; + const int index = dnn.Random().UniformInt( 0, featureCount - 1 ); + vector.SetAt( index, 1.f ); + problem->Add( vector, i % classCount ); + } + + intLayer->SetLabelType( CT_Int ); + recreateLayer( dnn.GetMathEngine(), intLayer, "intLayer", batchSize, CT_Int, problem.Ptr(), dnn ); + recreateLayer( dnn.GetMathEngine(), floatLayer, "floatLayer", batchSize, CT_Float, problem.Ptr(), dnn ); + + CPtr enumBin = EnumBinarization( classCount )( CDnnLayerLink( intLayer.Ptr(), 1 ) ); + + Sink( CDnnLayerLink( floatLayer.Ptr(), 0 ), "floatData" ); + CPtr floatLabel = Sink( CDnnLayerLink( floatLayer.Ptr(), 1 ), "floatLabel" ); + Sink( CDnnLayerLink( floatLayer.Ptr(), 2 ), "floatWeights" ); + + Sink( CDnnLayerLink( intLayer.Ptr(), 0 ), "intData" ); + CPtr intLabel = Sink( enumBin.Ptr(), "intLabel" ); + Sink( CDnnLayerLink( intLayer.Ptr(), 2 ), "intWeights" ); + + for( int run = 0; run < runCount; ++run ) { + dnn.RunOnce(); + + CArray expected; + expected.SetSize( floatLabel->GetBlob()->GetDataSize() ); + floatLabel->GetBlob()->CopyTo( expected.GetPtr() ); + + CArray result; + result.SetSize( intLabel->GetBlob()->GetDataSize() ); + intLabel->GetBlob()->CopyTo( result.GetPtr() ); + + EXPECT_EQ( classCount * batchSize, expected.Size() ); + EXPECT_EQ( expected.Size(), result.Size() ); + for( int i = 0; i < expected.Size(); ++i ) { + EXPECT_FLOAT_EQ( expected[i], result[i] ); + } + } +} + +} // namespace NeoMLTest + +//--------------------------------------------------------------------------------------------------------------------- + +TEST( CDnnProblemTest, LabelTypes ) +{ + CRandom random( 0x0CA ); + { + CDnn dnn( random, MathEngine() ); + + CPtr intSource = new CProblemSourceLayer( MathEngine() ); + CPtr floatSource = new CProblemSourceLayer( MathEngine() ); + + testLabelTypes( dnn, intSource, floatSource ); + } + { + CDnn dnn( random, MathEngine() ); + + CPtr intFc = new CFullyConnectedSourceLayer( MathEngine() ); + intFc->SetNumberOfElements( 3 ); + CPtr floatFc = new CFullyConnectedSourceLayer( MathEngine() ); + floatFc->SetNumberOfElements( 3 ); + + testLabelTypes( dnn, intFc, floatFc ); + } +}