diff --git a/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h b/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h
index 4d3e3e1bf..59750ea42 100644
--- a/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h
+++ b/NeoML/include/NeoML/Dnn/Layers/FullyConnectedSourceLayer.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -78,7 +78,9 @@ class NEOML_API CFullyConnectedSourceLayer : public CFullyConnectedLayer {
 	bool isBatchLoaded( int index ) const;
 };
 
-NEOML_API CLayerWrapper<CFullyConnectedSourceLayer> FullyConnectedSource(
-	TBlobType labelType, int batchSize, int maxBatchCount, IProblem* problem );
+// Creates CFullyConnectedSourceLayer with the name
+NEOML_API CFullyConnectedSourceLayer* FullyConnectedSource( CDnn& dnn, const char* name,
+	TBlobType labelType, int batchSize, int maxBatchCount, IProblem* problem,
+	int numberOfElements = 1, bool isZeroFreeTerm = false );
 
 } // namespace NeoML
diff --git a/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h b/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h
index a8fb814a0..c6f89d015 100644
--- a/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h
+++ b/NeoML/include/NeoML/Dnn/Layers/ModelWrapperLayer.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -27,31 +27,46 @@ class CSourceLayer;
 class CSinkLayer;
 class CDnnTrainingModelWrapper;
 
+struct IShuffledBatchGenerator {
+	virtual ~IShuffledBatchGenerator() = default;
+
+	virtual const CArray<int>& GenerateBatchIndexes( int batchSize, bool batchShuffled ) = 0;
+	virtual bool HasUnseenElements() const = 0;
+	virtual void DeleteUnseenElement( int index ) = 0;
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+
 // CProblemSourceLayer is a wrapper over the IProblem interface. 
 // On each iteration, it passes BatchSize vectors into the network for processing.
 class NEOML_API CProblemSourceLayer : public CBaseLayer {
 	NEOML_DNN_LAYER( CProblemSourceLayer )
 public:
-	explicit CProblemSourceLayer( IMathEngine& mathEngine );
+	explicit CProblemSourceLayer( IMathEngine& mathEngine ) :
+		CBaseLayer( mathEngine, "CCnnProblemSourceLayer", /*isLearnable*/false ) {}
 
 	void Serialize( CArchive& archive ) override;
 
 	int GetBatchSize() const { return batchSize; }
-	void SetBatchSize(int _batchSize);
+	void SetBatchSize( int batchSize );
 
 	// The filler for empty values that are not present in a sparse vector
 	float GetEmptyFill() const { return emptyFill; }
-	void SetEmptyFill(float _emptyFill) { NeoAssert(GetDnn() == 0); emptyFill = _emptyFill; }
+	void SetEmptyFill( float _emptyFill ) { NeoAssert( GetDnn() == nullptr ); emptyFill = _emptyFill; }
 
 	// You may only change the problem for the layer that is connected to a network
 	// if the number of classes and the number of input vectors stay the same
 	CPtr<const IProblem> GetProblem() const { return problem; }
-	void SetProblem(const CPtr<const IProblem>& _problem);
+	void SetProblem( const CPtr<const IProblem>& problem, bool shuffle = false, unsigned seed = 42 );
 
 	// Retrieves and sets the data type for class labels
 	TBlobType GetLabelType() const { return labelType; }
 	void SetLabelType( TBlobType newLabelType );
 
+	// Still not the end of an epoch
+	bool HasUnseenElements() const
+		{ return ( shuffled && shuffled->HasUnseenElements() ) || nextProblemIndex < problem->GetVectorCount(); }
+
 protected:
 	~CProblemSourceLayer() override = default;
 
@@ -60,22 +75,31 @@ class NEOML_API CProblemSourceLayer : public CBaseLayer {
 	void BackwardOnce() override;
 
 private:
-	float emptyFill;		// the empty values filler (for values not represented in a sparse vector)
-	int batchSize;			// the size of the batch passed to the network
-	int nextProblemIndex;	// the index of the next element in the problem to be passed
-	CPtr<const IProblem> problem;	// the classification problem the network is solving
-	TBlobType labelType;		// the data type for labels
-	CArray<float> exchangeBufs[3];
+	float emptyFill = 0; // the empty values filler (for values not represented in a sparse vector)
+	int batchSize = 1; // the size of the batch passed to the network
+	int nextProblemIndex = NotFound; // the index of the next element in the problem to be passed
+	TBlobType labelType = CT_Float; // the data type for labels
+	CPtr<const IProblem> problem; // the classification problem the network is solving
+	CPtrOwner<IShuffledBatchGenerator> shuffled; // if a shuffled batch input
+
+	enum { EB_Data, EB_Label, EB_Weight, EB_Count_ };
+	CArray<float> exchangeBufs[EB_Count_]{};
+
+	void fillExchangeBuffers( int shift, int index );
 };
 
-///////////////////////////////////////////////////////////////////////////////////////////////////////
+// Creates CProblemSourceLayer with the name
+NEOML_API CProblemSourceLayer* ProblemSource( CDnn& dnn, const char* name,
+	TBlobType labelType, int batchSize, const CPtr<const IProblem>& problem, bool shuffle = false, unsigned seed = 42 );
+
+//---------------------------------------------------------------------------------------------------------------------
 
 // CDnnModelWrapper is the base class wrapping the trained neural network into the IModel interface
 class NEOML_API CDnnModelWrapper : public IModel {
 public:
 	explicit CDnnModelWrapper(IMathEngine& mathEngine, unsigned int seed = 0xDEADFACE);
 
-	int GetClassCount() const override;
+	int GetClassCount() const override { return ClassCount; }
 	bool Classify(const CFloatVectorDesc& data, CClassificationResult& result) const override;
 	void Serialize(CArchive& archive) override;
 
@@ -83,11 +107,11 @@ class NEOML_API CDnnModelWrapper : public IModel {
 	int ClassCount;
 	float SourceEmptyFill;
 	mutable CRandom Random;
-	mutable CDnn Dnn;	// the network
-	CPtr<CSourceLayer> SourceLayer;	// the reference to the source layer
-	CPtr<CSinkLayer> SinkLayer;		// the reference to the terminator layer
-	CPtr<CDnnBlob> SourceBlob;			// the source data blob
-	mutable CArray<float> tempExp;		// the temporary array for exponent values to calculate softmax
+	mutable CDnn Dnn; // the network
+	CPtr<CSourceLayer> SourceLayer; // the reference to the source layer
+	CPtr<CSinkLayer> SinkLayer; // the reference to the terminator layer
+	CPtr<CDnnBlob> SourceBlob; // the source data blob
+	mutable CArray<float> tempExp; // the temporary array for exponent values to calculate softmax
 
 	static const char* const SourceLayerName;
 	static const char* const SinkLayerName;
@@ -101,7 +125,7 @@ class NEOML_API CDnnModelWrapper : public IModel {
 	bool classify( CClassificationResult& result ) const;
 };
 
-///////////////////////////////////////////////////////////////////////////////////////////////////////
+//---------------------------------------------------------------------------------------------------------------------
 
 // CDnnTrainingModelWrapper is the base class wrapping the neural network 
 // into an ITrainingModel interface so the network can be trained using the Train method
diff --git a/NeoML/include/NeoML/TraditionalML/Shuffler.h b/NeoML/include/NeoML/TraditionalML/Shuffler.h
index 2b8bcf8f6..4b57fa940 100644
--- a/NeoML/include/NeoML/TraditionalML/Shuffler.h
+++ b/NeoML/include/NeoML/TraditionalML/Shuffler.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@ namespace NeoML {
 // The shuffler class
 // Uses the standard shuffling algorithm, not all at once but sequentially; as a result, the first N positions are only shuffled among themselves
 // For example, you can use it to get the random indices in an array
-class NEOML_API CShuffler {
+class NEOML_API CShuffler final {
 public:
 	CShuffler( CRandom& _random, int count );
 
@@ -34,6 +34,10 @@ class NEOML_API CShuffler {
 	int SetNext( int index );
 	// Finishes shuffling and returns all indices
 	const CArray<int>& GetAllIndices();
+	// Is shuffling finished
+	bool IsFinished() const { return nextIndex == indices.Size(); }
+	// Reset state to use shuffler again for the same array
+	void Reset() { nextIndex = 0; }
 
 private:
 	CRandom& random;
diff --git a/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp b/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp
index 2766fb917..b151f70e7 100644
--- a/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp
+++ b/NeoML/src/Dnn/Layers/FullyConnectedSourceLayer.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -310,15 +310,20 @@ bool CFullyConnectedSourceLayer::isBatchLoaded( int index ) const
 	return ( batchFirstLoadedIndex <= index && index <= batchLastLoadedIndex );
 }
 
-CLayerWrapper<CFullyConnectedSourceLayer> FullyConnectedSource( TBlobType labelType,
-	int batchSize, int maxBatchCount, IProblem* problem )
+// Creates CFullyConnectedSourceLayer with the name
+CFullyConnectedSourceLayer* FullyConnectedSource( CDnn& dnn, const char* name,
+	TBlobType labelType, int batchSize, int maxBatchCount, IProblem* problem, int numberOfElements, bool isZeroFreeTerm )
 {
-	return CLayerWrapper<CFullyConnectedSourceLayer>( "FullyConnectedSource", [=, &problem]( CFullyConnectedSourceLayer* result ) {
-		result->SetLabelType( labelType );
-		result->SetBatchSize( batchSize );
-		result->SetMaxBatchCount( maxBatchCount );
-		result->SetProblem( problem );
-	} );
+	CPtr<CFullyConnectedSourceLayer> result = new CFullyConnectedSourceLayer( dnn.GetMathEngine() );
+	result->SetLabelType( labelType );
+	result->SetBatchSize( batchSize );
+	result->SetMaxBatchCount( maxBatchCount );
+	result->SetProblem( problem );
+	result->SetNumberOfElements( numberOfElements );
+	result->SetZeroFreeTerm( isZeroFreeTerm );
+	result->SetName( name );
+	dnn.AddLayer( *result );
+	return result;
 }
 
 } // namespace NeoML
diff --git a/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp b/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp
index 4ff19fbe0..2447f2708 100644
--- a/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp
+++ b/NeoML/src/Dnn/Layers/ModelWrapperLayer.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,18 +21,163 @@ limitations under the License.
 #include <NeoMathEngine/NeoMathEngine.h>
 #include <NeoML/Dnn/Layers/SourceLayer.h>
 #include <NeoML/Dnn/Layers/SinkLayer.h>
+#include <NeoML/TraditionalML/Shuffler.h>
 
 namespace NeoML {
 
-CProblemSourceLayer::CProblemSourceLayer( IMathEngine& mathEngine ) :
-	CBaseLayer( mathEngine, "CCnnProblemSourceLayer", false ),
-	emptyFill(0),
-	batchSize(1),
-	nextProblemIndex(0),
-	labelType(CT_Float)
+// Shuffles the elements array.
+static void shuffle( CArray<int>& elements, CRandom& random )
 {
+	CShuffler indexGenerator( random, elements.Size() );
+	CArray<int> oldElements;
+	elements.CopyTo( oldElements );
+	for( int i = 0; i < elements.Size(); ++i ) {
+		elements[i] = oldElements[indexGenerator.Next()];
+	}
+}
+
+//---------------------------------------------------------------------------------------------------------------------
+
+// Shuffles the elements of an array and returns them one by one.
+// Unlike CShuffler, it returns the elements of the array, not the indices of the elements, 
+// and also supports cyclicity (when the end of the sequence is reached, it will be shuffled again).
+template <typename T>
+class CShuffledElements final {
+public:
+	CShuffledElements( CArray<T>&& _elements, CRandom& _random ) :
+		random( _random ), elements( std::move( _elements ) ), shuffler( random, elements.Size() ) {}
+	CShuffledElements( CShuffledElements&& ) = default;
+
+	CShuffledElements& operator=( CShuffledElements&& ) = default;
+
+	// The number of elements in the elements array.
+	int Size() const { return elements.Size(); }
+	// Generates the next element of the sequence.
+	T Next() { if( shuffler.IsFinished() ) { shuffler.Reset(); } return elements[shuffler.Next()]; }
+
+private:
+	CRandom& random; // The random generator
+	CArray<T> elements; // The shuffled elements array
+	CShuffler shuffler; // The index shuffled generator
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+
+// Forms a batch in such a way that it contains the same number of positive and negative pairs, and
+// all positive elements are collected from one class, and all negative examples are from different non-intersecting ones.
+// For example, let there be 4 classes and batchSize = 12.
+// The number of matchings in the batch is C_12^2 = 66, respectively, the number of positive and negative pairs is 33 each.
+// We sample a random "positive" class from which we will collect positive elements, let's say this is class 0.
+// Then we randomly sample 9 elements from class 0, because C_9^2 = 36 pairs are closest to 33 (for
+// comparison, 8 elements give 28 pairs). The remaining 12 - 9 = 3 elements are taken each from a separate class - 
+// from the 1st, 2nd and 3rd classes, respectively.
+// As a result, we obtained a batch, the matching elements of which give 36 positive and 30 negative pairs.
+class CBalancedPairBatchGenerator : public IShuffledBatchGenerator {
+public:
+	CBalancedPairBatchGenerator( const IProblem& problem, unsigned seed );
+
+	// IShuffledBatchGenerator
+	const CArray<int>& GenerateBatchIndexes( int batchSize, bool batchShuffled ) override;
+	bool HasUnseenElements() const override { return !unseenElementsIndices.IsEmpty(); }
+	void DeleteUnseenElement( int index ) override { unseenElementsIndices.Delete( index ); }
+
+private:
+	CRandom random;
+	// The map of "label -> this label's elements indexes generator".
+	CMap<int, CShuffledElements<int>> labelToIndexGenerator;
+	// The labels generator
+	CShuffledElements<int> labelGenerator;
+	// Indexes of elements, which aren't in any batch, to detect epoch's end
+	CHashTable<int> unseenElementsIndices;
+	// Return value of indices set
+	CArray<int> batchIndexes;
+
+	CArray<int> getLabels( const IProblem& );
+};
+
+CBalancedPairBatchGenerator::CBalancedPairBatchGenerator( const IProblem& problem, unsigned seed ) :
+	random( seed ),
+	labelGenerator( getLabels( problem ), random )
+{
+	unseenElementsIndices.SetBufferSize( problem.GetVectorCount() );
+	for( int i = 0; i < problem.GetVectorCount(); ++i ) {
+		unseenElementsIndices.Add( i );
+	}
 }
 
+CArray<int> CBalancedPairBatchGenerator::getLabels( const IProblem& problem )
+{
+	CMap<int, CArray<int>> labelToIndexes;
+	for( int i = 0; i < problem.GetVectorCount(); ++i ) {
+		labelToIndexes.GetOrCreateValue( problem.GetClass( i ) ).Add( i );
+	}
+
+	CArray<int> labelsUnique;
+	labelsUnique.SetBufferSize( labelToIndexes.Size() );
+	for( auto& item : labelToIndexes ) {
+		labelToIndexGenerator.Add( item.Key, CShuffledElements<int>( std::move( item.Value ), random ) );
+		labelsUnique.Add( item.Key );
+	}
+	return labelsUnique;
+}
+
+const CArray<int>& CBalancedPairBatchGenerator::GenerateBatchIndexes( int batchSize, bool batchShuffled )
+{
+	// The number of all matchings in the batch.
+	const int numOfCombinations = batchSize * ( batchSize - 1 ) / 2;
+	// The desired number of positive matchings in the batch.
+	const int targetNumOfPositiveCombinations = numOfCombinations / 2;
+	// How many elements of one class should be taken to get the desired number of positive matchings.
+	// The formula below is one of the roots of the square equation C_n^2 = targetNumOfPositiveCombinations with respect to n.
+	const double idealNumOfSingleClassSamples = 1.0 / 2.0 + sqrt( 1.0 / 4.0 + 2.0 * targetNumOfPositiveCombinations );
+	// Round to the nearest integer - this will be the best approximation of the number of elements from one class.
+	const int numOfSingleClassSamples = static_cast<int>( round( idealNumOfSingleClassSamples ) );
+
+	// Sample a random class and collect numOfSingleClassSamples elements from it into a batch.
+	const int majorLabel = labelGenerator.Next();
+	NeoAssert( numOfSingleClassSamples <= labelToIndexGenerator.Get( majorLabel ).Size() );
+	// Sample a random class and collect numOfSingleClassSamples elements from it into a batch.
+	batchIndexes.DeleteAll();
+	batchIndexes.SetBufferSize( numOfSingleClassSamples );
+	for( int i = 0; i < numOfSingleClassSamples; ++i ) {
+		batchIndexes.Add( labelToIndexGenerator.Get( majorLabel ).Next() );
+	}
+
+	// The number of remaining elements, also the number of remaining classes, because for the remaining elements
+	// the equality "one element = one class" is satisfied.
+	const int numOfOtherClasses = batchSize - numOfSingleClassSamples;
+	// Important! It is expected that all remaining elements in the batch will be from DIFFERENT clusters,
+	// so their number should not exceed the total number of clusters, taking into account one already sampled.
+	// If this is not the case, then we fail - let the user add more classes or reduce the batch size.
+	NeoAssert( numOfOtherClasses + 1 <= labelToIndexGenerator.Size() );
+	// Table of used classes to skip duplicates.
+	CHashTable<int> usedClasses;
+	usedClasses.Add( majorLabel );
+	// Will never loop if numOfOtherClasses + 1 <= totalNumOfClasses, because the generator shuffles classes without duplicates.
+	while( usedClasses.Size() < numOfOtherClasses + 1 ) {
+		// Sample the class.
+		const int label = labelGenerator.Next();
+		// Skip the class if it has already been sampled.
+		if( usedClasses.Has( label ) ) {
+			continue;
+		}
+		usedClasses.Add( label );
+		// Sample an element from this class.
+		const int negativeSample = labelToIndexGenerator.Get( label ).Next();
+		batchIndexes.Add( negativeSample );
+	}
+	if( batchShuffled ) {
+		// Just in case, the batch is mixed before giving it to the model
+		// So that it doesn’t accidentally learn the structure of the batch.
+		shuffle( batchIndexes, random );
+	}
+
+	NeoAssert( batchIndexes.Size() == batchSize );
+	return batchIndexes;
+}
+
+//---------------------------------------------------------------------------------------------------------------------
+
 void CProblemSourceLayer::SetBatchSize(int _batchSize)
 {
 	if(_batchSize == batchSize) {
@@ -42,15 +187,19 @@ void CProblemSourceLayer::SetBatchSize(int _batchSize)
 	ForceReshape();
 }
 
-void CProblemSourceLayer::SetProblem(const CPtr<const IProblem>& _problem)
+void CProblemSourceLayer::SetProblem( const CPtr<const IProblem>& _problem, bool shuffle, unsigned seed )
 {
-	NeoAssert( _problem != 0 );
-	NeoAssert( GetDnn() == 0 || problem == 0
+	NeoAssert( _problem != nullptr );
+	NeoAssert( GetDnn() == nullptr || problem == nullptr
 		|| ( problem->GetFeatureCount() == _problem->GetFeatureCount()
 			&& problem->GetClassCount() == _problem->GetClassCount() ) );
 
 	problem = _problem;
 	nextProblemIndex = 0;
+
+	if( shuffle ) {
+		shuffled = new CBalancedPairBatchGenerator( *problem, seed );
+	}
 }
 
 void CProblemSourceLayer::SetLabelType( TBlobType newLabelType )
@@ -68,128 +217,150 @@ void CProblemSourceLayer::SetLabelType( TBlobType newLabelType )
 
 void CProblemSourceLayer::Reshape()
 {
-	NeoAssert(!GetDnn()->IsRecurrentMode());
+	NeoAssert( !GetDnn()->IsRecurrentMode() );
 
-	CheckLayerArchitecture( problem.Ptr() != 0, "source problem is null" );
+	CheckLayerArchitecture( problem.Ptr() != nullptr, "source problem is null" );
 	CheckOutputs();
 	CheckLayerArchitecture( GetOutputCount() >= 2, "problem source layer has less than 2 outputs" );
 
 	// The data
-	outputDescs[0] = CBlobDesc( CT_Float );
-	outputDescs[0].SetDimSize( BD_BatchWidth, batchSize );
-	outputDescs[0].SetDimSize( BD_Channels, problem->GetFeatureCount() );
-	exchangeBufs[0].SetSize(outputDescs[0].BlobSize());
+	outputDescs[EB_Data] = CBlobDesc( CT_Float );
+	outputDescs[EB_Data].SetDimSize( BD_BatchWidth, batchSize );
+	outputDescs[EB_Data].SetDimSize( BD_Channels, problem->GetFeatureCount() );
+	exchangeBufs[EB_Data].SetSize( outputDescs[EB_Data].BlobSize() );
 
 	// The labels
 	int labelSize = problem->GetClassCount();
-	if(labelSize == 2) {
+	if( labelSize == 2 ) {
 		labelSize = 1;
 	}
-	outputDescs[1] = CBlobDesc( labelType );
-	outputDescs[1].SetDimSize( BD_BatchWidth, batchSize );
+	outputDescs[EB_Label] = CBlobDesc( labelType );
+	outputDescs[EB_Label].SetDimSize( BD_BatchWidth, batchSize );
 	if( labelType != CT_Int ) {
-		outputDescs[1].SetDimSize( BD_Channels, labelSize );
+		outputDescs[EB_Label].SetDimSize( BD_Channels, labelSize );
 	}
-	exchangeBufs[1].SetSize(outputDescs[1].BlobSize());
+	exchangeBufs[EB_Label].SetSize( outputDescs[EB_Label].BlobSize() );
 
 	// The weights
-	outputDescs[2] = CBlobDesc( CT_Float );
-	outputDescs[2].SetDimSize( BD_BatchWidth, batchSize );
-	exchangeBufs[2].SetSize(outputDescs[2].BlobSize());
+	outputDescs[EB_Weight] = CBlobDesc( CT_Float );
+	outputDescs[EB_Weight].SetDimSize( BD_BatchWidth, batchSize );
+	exchangeBufs[EB_Weight].SetSize( outputDescs[EB_Weight].BlobSize() );
 }
 
 void CProblemSourceLayer::RunOnce()
 {
-	NeoAssert(problem.Ptr() != 0);
+	NeoAssert( problem != nullptr );
 
-	for(int i = 0; i < exchangeBufs[0].Size(); ++i) {
-		exchangeBufs[0][i] = emptyFill;
-	}
-	for(int i = 0; i < exchangeBufs[1].Size(); ++i) {
-		exchangeBufs[1][i] = 0;
+	::memset( exchangeBufs[EB_Label].GetPtr(), 0, exchangeBufs[EB_Label].Size() * sizeof( float ) );
+	if( emptyFill == 0.f ) {
+		::memset( exchangeBufs[EB_Data].GetPtr(), 0, exchangeBufs[EB_Data].Size() * sizeof( float ) );
+	} else {
+		for( int i = 0; i < exchangeBufs[EB_Data].Size(); ++i ) {
+			exchangeBufs[EB_Data][i] = emptyFill;
+		}
 	}
 
-	float* data = exchangeBufs[0].GetPtr();
-	float* labels = exchangeBufs[1].GetPtr();
-	float* weights = exchangeBufs[2].GetPtr();
-
-	int vectorCount = problem->GetVectorCount();
-	CFloatMatrixDesc matrix = problem->GetMatrix();
-	CFloatVectorDesc vector;
-
-	for(int i = 0; i < batchSize; ++i) {
-		// The data
-		matrix.GetRow( nextProblemIndex, vector );
-		for(int j = 0; j < vector.Size; ++j) {
-			data[vector.Indexes == nullptr ? j : vector.Indexes[j]] = static_cast<float>( vector.Values[j] );
+	if( shuffled != nullptr ) {
+		const CArray<int>& batchIndexes = shuffled->GenerateBatchIndexes( batchSize, /*batchShuffled*/true );
+		for( int i = 0; i < batchSize; ++i ) {
+			shuffled->DeleteUnseenElement( batchIndexes[i] );
+			fillExchangeBuffers( i, batchIndexes[i] );
 		}
-
-		// The labels
-		// Update labels
-		if( labelType == CT_Float ) {
-			if( outputBlobs[1]->GetChannelsCount() == 1 ) {
-				*labels = static_cast< float >( problem->GetBinaryClass( nextProblemIndex ) );
-			} else {
-				int classLabel = problem->GetClass( nextProblemIndex );
-				NeoAssert( 0 <= classLabel && classLabel < outputBlobs[1]->GetChannelsCount() );
-				::memset( labels, 0, outputBlobs[1]->GetChannelsCount() * sizeof( float ) );
-				labels[classLabel] = 1;
-			}
-		} else {
-			static_assert( sizeof( float ) == sizeof( int ), "sizeof( float ) != sizeof( int )" );
-			NeoAssert( outputBlobs[1]->GetChannelsCount() == 1 );
-			*reinterpret_cast<int*>( labels ) = problem->GetClass( nextProblemIndex );
+	} else {
+		const int vectorCount = problem->GetVectorCount();
+		for( int i = 0; i < batchSize; ++i, ++nextProblemIndex ) {
+			fillExchangeBuffers( i, nextProblemIndex % vectorCount );
 		}
-
-		// The weights
-		*weights = static_cast<float>(problem->GetVectorWeight(nextProblemIndex));
-
-		++nextProblemIndex;
-		nextProblemIndex %= vectorCount;
-
-		data += outputBlobs[0]->GetObjectSize();
-		labels += outputBlobs[1]->GetObjectSize();
-		weights += outputBlobs[2]->GetObjectSize();
 	}
 
-	outputBlobs[0]->CopyFrom(exchangeBufs[0].GetPtr());
+	outputBlobs[EB_Data]->CopyFrom( exchangeBufs[EB_Data].GetPtr() );
 	if( labelType == CT_Float ) {
-		outputBlobs[1]->CopyFrom( exchangeBufs[1].GetPtr() );
+		outputBlobs[EB_Label]->CopyFrom( exchangeBufs[EB_Label].GetPtr() );
 	} else {
-		outputBlobs[1]->CopyFrom( reinterpret_cast<int*>( exchangeBufs[1].GetPtr() ) );
+		outputBlobs[EB_Label]->CopyFrom( reinterpret_cast<int*>( exchangeBufs[EB_Label].GetPtr() ) );
 	}
-	outputBlobs[2]->CopyFrom(exchangeBufs[2].GetPtr());
+	outputBlobs[EB_Weight]->CopyFrom( exchangeBufs[EB_Weight].GetPtr() );
 }
 
 void CProblemSourceLayer::BackwardOnce()
 {
-	NeoAssert(0);
+	NeoAssert( false );
 }
 
-static const int ProblemSourceLayerVersion = 2000;
+constexpr int ProblemSourceLayerVersion = 2001;
 
 void CProblemSourceLayer::Serialize( CArchive& archive )
 {
-	archive.SerializeVersion( ProblemSourceLayerVersion, CDnn::ArchiveMinSupportedVersion );
+	const int version = archive.SerializeVersion( ProblemSourceLayerVersion, CDnn::ArchiveMinSupportedVersion );
 	CBaseLayer::Serialize( archive );
 
-	if( archive.IsStoring() ) {
-		archive << batchSize;
-		archive << static_cast<int>( labelType );
-	} else if( archive.IsLoading() ) {
-		archive >> batchSize;
+	if( version >= 2001 ) {
+		archive.Serialize( emptyFill );
+	} else { // loading
+		emptyFill = 0;
+	}
+	archive.Serialize( batchSize );
+	int labelTypeInt = static_cast<int>( labelType );
+	archive.Serialize( labelTypeInt );
+
+	if( archive.IsLoading() ) {
 		nextProblemIndex = NotFound;
-		problem = 0;
-		int labelTypeInt = 0;
-		archive >> labelTypeInt;
 		labelType = static_cast<TBlobType>( labelTypeInt );
+		problem = nullptr;
+		shuffled = nullptr;
+	}
+}
+
+void CProblemSourceLayer::fillExchangeBuffers( int shift, int index )
+{
+	float* data = exchangeBufs[EB_Data].GetPtr() + shift * outputBlobs[EB_Data]->GetObjectSize();
+	float* labels = exchangeBufs[EB_Label].GetPtr() + shift * outputBlobs[EB_Label]->GetObjectSize();
+	float* weights = exchangeBufs[EB_Weight].GetPtr() + shift * outputBlobs[EB_Weight]->GetObjectSize();
+
+	// The data
+	const CFloatMatrixDesc matrix = problem->GetMatrix();
+	CFloatVectorDesc vector;
+	matrix.GetRow( index, vector );
+	for( int j = 0; j < vector.Size; ++j ) {
+		data[( vector.Indexes == nullptr ) ? j : vector.Indexes[j]] = static_cast<float>( vector.Values[j] );
+	}
+
+	// The labels
+	// Update labels
+	if( labelType == CT_Float ) {
+		if( outputBlobs[EB_Label]->GetChannelsCount() == 1 ) {
+			*labels = static_cast<float>( problem->GetBinaryClass( index ) );
+		} else {
+			const int classLabel = problem->GetClass( index );
+			NeoAssert( 0 <= classLabel && classLabel < outputBlobs[EB_Label]->GetChannelsCount() );
+			::memset( labels, 0, outputBlobs[EB_Label]->GetChannelsCount() * sizeof( float ) );
+			labels[classLabel] = 1;
+		}
 	} else {
-		NeoAssert( false );
+		static_assert( sizeof( float ) == sizeof( int ), "sizeof( float ) != sizeof( int )" );
+		NeoAssert( outputBlobs[EB_Label]->GetChannelsCount() == 1 );
+		*reinterpret_cast<int*>( labels ) = problem->GetClass( index );
 	}
+
+	// The weights
+	*weights = static_cast<float>( problem->GetVectorWeight( index ) );
+}
+
+// Creates CProblemSourceLayer with the name
+CProblemSourceLayer* ProblemSource( CDnn& dnn, const char* name,
+	TBlobType labelType, int batchSize, const CPtr<const IProblem>& problem, bool shuffle, unsigned seed )
+{
+	CPtr<CProblemSourceLayer> result = new CProblemSourceLayer( dnn.GetMathEngine() );
+	result->SetProblem( problem, shuffle, seed );
+	result->SetLabelType( labelType );
+	result->SetBatchSize( batchSize );
+	result->SetName( name );
+	dnn.AddLayer( *result );
+	return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
+//---------------------------------------------------------------------------------------------------------------------
+
 const char* const CDnnModelWrapper::SourceLayerName = "CCnnModelWrapper::SourceLayer";
 const char* const CDnnModelWrapper::SinkLayerName = "CCnnModelWrapper::SinkLayer";
 
@@ -207,83 +378,78 @@ CDnnModelWrapper::CDnnModelWrapper(IMathEngine& _mathEngine, unsigned int seed)
 	SinkLayer->SetName(SinkLayerName);
 }
 
-int CDnnModelWrapper::GetClassCount() const
-{
-	return ClassCount;
-}
-
 bool CDnnModelWrapper::Classify(const CFloatVectorDesc& desc, CClassificationResult& result) const
 {
-	NeoAssert(SourceBlob.Ptr() != 0);
-	NeoPresume(SourceBlob.Ptr() == SourceLayer->GetBlob().Ptr());
+	NeoAssert( SourceBlob != nullptr );
+	NeoPresume( SourceBlob == SourceLayer->GetBlob() );
 
-	exchangeBuffer.SetSize(SourceBlob->GetDataSize());
-
-	for(int i = 0; i < exchangeBuffer.Size(); ++i) {
-		exchangeBuffer[i] = SourceEmptyFill;
+	exchangeBuffer.SetSize( SourceBlob->GetDataSize() );
+	if( SourceEmptyFill == 0.f ) {
+		::memset( exchangeBuffer.GetPtr(), 0, exchangeBuffer.Size() * sizeof( float ) );
+	} else {
+		for( int i = 0; i < exchangeBuffer.Size(); ++i ) {
+			exchangeBuffer[i] = SourceEmptyFill;
+		}
 	}
 
 	for(int i = 0; i < desc.Size; ++i) {
-		exchangeBuffer[desc.Indexes == nullptr ? i : desc.Indexes[i]] = desc.Values[i];
+		exchangeBuffer[( desc.Indexes == nullptr ) ? i : desc.Indexes[i]] = desc.Values[i];
 	}
-	SourceBlob->CopyFrom(exchangeBuffer.GetPtr());
+	SourceBlob->CopyFrom( exchangeBuffer.GetPtr() );
 
 	return classify( result );
 }
 
-static const int DnnModelWrapperVersion = 2000;
+constexpr int DnnModelWrapperVersion = 2001;
 
-void CDnnModelWrapper::Serialize(CArchive& archive)
+void CDnnModelWrapper::Serialize( CArchive& archive )
 {
-	archive.SerializeVersion( DnnModelWrapperVersion, CDnn::ArchiveMinSupportedVersion );
-
-	if( archive.IsStoring() ) {
-		archive << ClassCount << Random << Dnn;
-		archive << CString(SourceLayer->GetName());
-		archive << CString(SinkLayer->GetName());
-		CBlobDesc sourceDesc( CT_Float );
-		sourceDesc.SetDimSize(BD_BatchWidth, 0);
-		if( SourceBlob.Ptr() != 0 ) {
-			sourceDesc = SourceBlob->GetDesc();
-		}
-		for( int i = 0; i < CBlobDesc::MaxDimensions; i++ ) {
-			archive << sourceDesc.DimSize(i);
-		}
-	} else if( archive.IsLoading() ) {
-		archive >> ClassCount >> Random >> Dnn;
+	const int version = archive.SerializeVersion( DnnModelWrapperVersion, CDnn::ArchiveMinSupportedVersion );
 
-		CString name;
+	archive.Serialize( ClassCount );
+	if( version >= 2001 ) {
+		archive.Serialize( SourceEmptyFill );
+	} else { // loading
+		SourceEmptyFill = 0;
+	}
+	archive.Serialize( Random );
+	archive.Serialize( Dnn );
+
+	CString sourceName = SourceLayer->GetName();
+	archive.Serialize( sourceName );
+	CString sinkName = SinkLayer->GetName();
+	archive.Serialize( sinkName );
+
+	CBlobDesc sourceDesc( CT_Float );
+	sourceDesc.SetDimSize( BD_BatchWidth, 0 ); // set zero
+	if( SourceBlob != nullptr ) {
+		sourceDesc = SourceBlob->GetDesc();
+	}
+	for( int i = 0; i < CBlobDesc::MaxDimensions; ++i ) {
+		int size = sourceDesc.DimSize( i );
+		archive.Serialize( size );
+		sourceDesc.SetDimSize( i, size );
+	}
 
-		archive >> name;
-		if( Dnn.HasLayer(name) ) {
-			SourceLayer = CheckCast<CSourceLayer>(Dnn.GetLayer(name).Ptr());
+	if( archive.IsLoading() ) {
+		if( Dnn.HasLayer( sourceName ) ) {
+			SourceLayer = CheckCast<CSourceLayer>( Dnn.GetLayer( sourceName ).Ptr() );
 		} else {
-			SourceLayer->SetName(name);
+			SourceLayer->SetName( sourceName );
 		}
-
-		archive >> name;
-		if( Dnn.HasLayer(name) ) {
-			SinkLayer = CheckCast<CSinkLayer>(Dnn.GetLayer(name).Ptr());
+		if( Dnn.HasLayer( sinkName ) ) {
+			SinkLayer = CheckCast<CSinkLayer>( Dnn.GetLayer( sinkName ).Ptr() );
 		} else {
-			SinkLayer->SetName(name);
+			SinkLayer->SetName( sinkName );
 		}
-		CBlobDesc sourceDesc( CT_Float );
-		for( int i = 0; i < CBlobDesc::MaxDimensions; i++ ) {
-			int size;
-			archive >> size;
-			sourceDesc.SetDimSize(i, size);
-		}
-		if( sourceDesc.BlobSize() == 0 ) {
-			SourceBlob = 0;
+		if( sourceDesc.BlobSize() == 0 ) { // is zero
+			SourceBlob = nullptr;
 		} else {
 			SourceBlob = CDnnBlob::CreateBlob(mathEngine, CT_Float, sourceDesc);
 			SourceLayer->SetBlob(SourceBlob);
 		}
-
 		exchangeBuffer.SetSize(0);
 		tempExp.SetSize(0);
-	} else {
-		NeoAssert( false );
 	}
 }
 
@@ -339,8 +505,8 @@ bool CDnnModelWrapper::classify( CClassificationResult& result ) const
 	return true;
 }
 
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
+//---------------------------------------------------------------------------------------------------------------------
+
 CPtr<IModel> CDnnTrainingModelWrapper::Train(const IProblem& trainingClassificationData)
 {
 	CPtr<CDnnModelWrapper> model = FINE_DEBUG_NEW CDnnModelWrapper( mathEngine );
diff --git a/NeoML/src/TraditionalML/Shuffler.cpp b/NeoML/src/TraditionalML/Shuffler.cpp
index 563ea6c05..75873dd74 100644
--- a/NeoML/src/TraditionalML/Shuffler.cpp
+++ b/NeoML/src/TraditionalML/Shuffler.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,9 +20,11 @@ limitations under the License.
 
 namespace NeoML {
 
-CShuffler::CShuffler( CRandom& _random, int count )
-	: random( _random ), nextIndex( 0 )
+CShuffler::CShuffler( CRandom& _random, int count ) :
+	random( _random ),
+	nextIndex( 0 )
 {
+	NeoAssert( count > 1 );
 	indices.SetSize( count );
 	for( int i = 0; i < indices.Size(); ++i ) {
 		indices[i] = i;
@@ -32,19 +34,15 @@ CShuffler::CShuffler( CRandom& _random, int count )
 inline int CShuffler::getSwapIndex( int swapIndex )
 {
 	if( swapIndex != nextIndex ) {
-		int tmp = indices[swapIndex];
-		indices[swapIndex] = indices[nextIndex];
-		indices[nextIndex] = tmp;
+		FObj::swap( indices[swapIndex], indices[nextIndex] );
 	}
-
 	return indices[nextIndex++];
 }
 
 int CShuffler::Next()
 {
 	NeoPresume( nextIndex < indices.Size() );
-
-	int swapIndex = random.UniformInt( nextIndex, indices.Size() - 1 );
+	const int swapIndex = random.UniformInt( nextIndex, indices.Size() - 1 );
 	return getSwapIndex( swapIndex );
 }
 
@@ -64,7 +62,6 @@ int CShuffler::SetNext( int index )
 		}
 		NeoAssert( swapIndex != NotFound );
 	}
-
 	return getSwapIndex( swapIndex );
 }
 
@@ -73,8 +70,7 @@ const CArray<int>& CShuffler::GetAllIndices()
 	while( nextIndex < indices.Size() ) {
 		Next();
 	}
-
 	return indices;
 }
 
-}
+} // namespace NeoML
diff --git a/NeoML/test/src/CMakeLists.txt b/NeoML/test/src/CMakeLists.txt
index 7599ee194..6076daa4d 100644
--- a/NeoML/test/src/CMakeLists.txt
+++ b/NeoML/test/src/CMakeLists.txt
@@ -29,6 +29,7 @@ target_sources(${PROJECT_NAME} INTERFACE
     ${CMAKE_CURRENT_SOURCE_DIR}/OptimizerFunctionsTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ParameterLayerTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/PCATest.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ProblemSourceLayerTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/RandomProblem.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/RandomProblem.h
     ${CMAKE_CURRENT_SOURCE_DIR}/ReferenceDnnTest.cpp
diff --git a/NeoML/test/src/ProblemSourceLayerTest.cpp b/NeoML/test/src/ProblemSourceLayerTest.cpp
new file mode 100644
index 000000000..dcd2a11e1
--- /dev/null
+++ b/NeoML/test/src/ProblemSourceLayerTest.cpp
@@ -0,0 +1,127 @@
+﻿/* Copyright © 2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------*/
+
+#include <common.h>
+#pragma hdrstop
+
+#include <TestFixture.h>
+
+using namespace NeoML;
+using namespace NeoMLTest;
+
+namespace NeoMLTest {
+
+template<class T>
+static void recreateLayer( IMathEngine& mathEngine, CPtr<T>& layer,
+	const char* name, int batchSize, TBlobType type, CMemoryProblem* problem, CDnn& dnn )
+{
+	layer->SetName( name );
+	layer->SetBatchSize( batchSize );
+
+	CMemoryFile file;
+	CArchive archive( &file, CArchive::SD_Storing );
+	layer->Serialize( archive );
+	archive.Close();
+	layer = new T( mathEngine );
+	file.SeekToBegin();
+	archive.Open( &file, CArchive::SD_Loading );
+	layer->Serialize( archive );
+
+	EXPECT_EQ( type, layer->GetLabelType() );
+	layer->SetProblem( problem );
+	dnn.AddLayer( *layer );
+}
+
+// Check for float labels == enumBinarization(int labels).
+// As a type T could be CFullyConnectedSourceLayer or CProblemSourceLayer.
+template<class T>
+static void testLabelTypes( CDnn& dnn, CPtr<T> intLayer, CPtr<T> floatLayer )
+{
+	const int featureCount = 5;
+	const int vectorCount = 10;
+	const int classCount = 3;
+	const int runCount = 10;
+	const int batchSize = ( vectorCount / 2 ) - 1;
+
+	static_assert( vectorCount >= classCount, "" );
+	static_assert( classCount > 2, "" );
+
+	CPtr<CMemoryProblem> problem = new CMemoryProblem( featureCount, classCount );
+	for( int i = 0; i < vectorCount; ++i ) {
+		CSparseFloatVector vector;
+		const int index = dnn.Random().UniformInt( 0, featureCount - 1 );
+		vector.SetAt( index, 1.f );
+		problem->Add( vector, i % classCount );
+	}
+
+	intLayer->SetLabelType( CT_Int );
+	recreateLayer( dnn.GetMathEngine(), intLayer, "intLayer", batchSize, CT_Int, problem.Ptr(), dnn );
+	recreateLayer( dnn.GetMathEngine(), floatLayer, "floatLayer", batchSize, CT_Float, problem.Ptr(), dnn );
+
+	CPtr<CEnumBinarizationLayer> enumBin = EnumBinarization( classCount )( CDnnLayerLink( intLayer.Ptr(), 1 ) );
+
+	Sink( CDnnLayerLink( floatLayer.Ptr(), 0 ), "floatData" );
+	CPtr<CSinkLayer> floatLabel = Sink( CDnnLayerLink( floatLayer.Ptr(), 1 ), "floatLabel" );
+	Sink( CDnnLayerLink( floatLayer.Ptr(), 2 ), "floatWeights" );
+
+	Sink( CDnnLayerLink( intLayer.Ptr(), 0 ), "intData" );
+	CPtr<CSinkLayer> intLabel = Sink( enumBin.Ptr(), "intLabel" );
+	Sink( CDnnLayerLink( intLayer.Ptr(), 2 ), "intWeights" );
+
+	for( int run = 0; run < runCount; ++run ) {
+		dnn.RunOnce();
+
+		CArray<float> expected;
+		expected.SetSize( floatLabel->GetBlob()->GetDataSize() );
+		floatLabel->GetBlob()->CopyTo( expected.GetPtr() );
+
+		CArray<float> result;
+		result.SetSize( intLabel->GetBlob()->GetDataSize() );
+		intLabel->GetBlob()->CopyTo( result.GetPtr() );
+
+		EXPECT_EQ( classCount * batchSize, expected.Size() );
+		EXPECT_EQ( expected.Size(), result.Size() );
+		for( int i = 0; i < expected.Size(); ++i ) {
+			EXPECT_FLOAT_EQ( expected[i], result[i] );
+		}
+	}
+}
+
+} // namespace NeoMLTest
+
+//---------------------------------------------------------------------------------------------------------------------
+
+TEST( CDnnProblemTest, LabelTypes )
+{
+	CRandom random( 0x0CA );
+	{
+		CDnn dnn( random, MathEngine() );
+
+		CPtr<CProblemSourceLayer> intSource = new CProblemSourceLayer( MathEngine() );
+		CPtr<CProblemSourceLayer> floatSource = new CProblemSourceLayer( MathEngine() );
+
+		testLabelTypes( dnn, intSource, floatSource );
+	}
+	{
+		CDnn dnn( random, MathEngine() );
+
+		CPtr<CFullyConnectedSourceLayer> intFc = new CFullyConnectedSourceLayer( MathEngine() );
+		intFc->SetNumberOfElements( 3 );
+		CPtr<CFullyConnectedSourceLayer> floatFc = new CFullyConnectedSourceLayer( MathEngine() );
+		floatFc->SetNumberOfElements( 3 );
+
+		testLabelTypes( dnn, intFc, floatFc );
+	}
+}