diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea6b093
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,33 @@
+
+#Ignore thumbnails created by Windows
+Thumbs.db
+#Ignore files built by Visual Studio
+*.obj
+*.exe
+*.pdb
+*.user
+*.aps
+*.pch
+*.vspscc
+*_i.c
+*_p.c
+*.ncb
+*.suo
+*.tlb
+*.tlh
+*.bak
+*.cache
+*.ilk
+*.log
+*.binlog
+[Bb]in
+[Dd]ebug*/
+*.lib
+*.sbr
+obj/
+[Rr]elease*/
+_ReSharper*/
+[Tt]est[Rr]esult*
+.vs/
+#Nuget packages folder
+packages/
diff --git a/.vs/GitHub/v15/.suo b/.vs/GitHub/v15/.suo
deleted file mode 100644
index 4d741b4..0000000
Binary files a/.vs/GitHub/v15/.suo and /dev/null differ
diff --git a/.vs/ProjectSettings.json b/.vs/ProjectSettings.json
deleted file mode 100644
index f8b4888..0000000
--- a/.vs/ProjectSettings.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "CurrentProjectSetting": null
-}
\ No newline at end of file
diff --git a/.vs/VSWorkspaceState.json b/.vs/VSWorkspaceState.json
deleted file mode 100644
index 6b61141..0000000
--- a/.vs/VSWorkspaceState.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "ExpandedNodes": [
- ""
- ],
- "PreviewInSolutionExplorer": false
-}
\ No newline at end of file
diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite
deleted file mode 100644
index bba64e6..0000000
Binary files a/.vs/slnx.sqlite and /dev/null differ
diff --git a/PAL2/.vs/PAL_Solution/v15/.suo b/PAL2/.vs/PAL_Solution/v15/.suo
deleted file mode 100644
index e8eb191..0000000
Binary files a/PAL2/.vs/PAL_Solution/v15/.suo and /dev/null differ
diff --git a/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/db.lock b/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/db.lock
deleted file mode 100644
index e69de29..0000000
diff --git a/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide b/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide
deleted file mode 100644
index 394cf31..0000000
Binary files a/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide and /dev/null differ
diff --git a/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide-shm b/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide-shm
deleted file mode 100644
index fe9ac28..0000000
Binary files a/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide-shm and /dev/null differ
diff --git a/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide-wal b/PAL2/.vs/PAL_Solution/v15/Server/sqlite3/storage.ide-wal
deleted file mode 100644
index e69de29..0000000
diff --git a/PAL2/PALFunctions/bin/Debug/PALFunctions.dll b/PAL2/PALFunctions/bin/Debug/PALFunctions.dll
deleted file mode 100644
index 6fe8a10..0000000
Binary files a/PAL2/PALFunctions/bin/Debug/PALFunctions.dll and /dev/null differ
diff --git a/PAL2/PALFunctions/bin/Debug/PALFunctions.pdb b/PAL2/PALFunctions/bin/Debug/PALFunctions.pdb
deleted file mode 100644
index aa0c10f..0000000
Binary files a/PAL2/PALFunctions/bin/Debug/PALFunctions.pdb and /dev/null differ
diff --git a/PAL2/PALFunctions/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache b/PAL2/PALFunctions/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache
deleted file mode 100644
index 2a6709a..0000000
Binary files a/PAL2/PALFunctions/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache and /dev/null differ
diff --git a/PAL2/PALFunctions/obj/Debug/PALFunctions.Resources.resources b/PAL2/PALFunctions/obj/Debug/PALFunctions.Resources.resources
deleted file mode 100644
index 6c05a97..0000000
Binary files a/PAL2/PALFunctions/obj/Debug/PALFunctions.Resources.resources and /dev/null differ
diff --git a/PAL2/PALFunctions/obj/Debug/PALFunctions.dll b/PAL2/PALFunctions/obj/Debug/PALFunctions.dll
deleted file mode 100644
index 6fe8a10..0000000
Binary files a/PAL2/PALFunctions/obj/Debug/PALFunctions.dll and /dev/null differ
diff --git a/PAL2/PALFunctions/obj/Debug/PALFunctions.pdb b/PAL2/PALFunctions/obj/Debug/PALFunctions.pdb
deleted file mode 100644
index aa0c10f..0000000
Binary files a/PAL2/PALFunctions/obj/Debug/PALFunctions.pdb and /dev/null differ
diff --git a/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.CoreCompileInputs.cache b/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.CoreCompileInputs.cache
deleted file mode 100644
index b9843cd..0000000
--- a/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.CoreCompileInputs.cache
+++ /dev/null
@@ -1 +0,0 @@
-7b1716855409b5d87fc995fac1e5195765f96a61
diff --git a/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.FileListAbsolute.txt b/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.FileListAbsolute.txt
deleted file mode 100644
index 8547226..0000000
--- a/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.FileListAbsolute.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\obj\Debug\ResolveAssemblyReference.cache
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.pdb
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\obj\Debug\ResolveAssemblyReference.cache
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.pdb
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\obj\Debug\ResolveAssemblyReference.cache
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL\PALFunctions\obj\Debug\PALFunctions.pdb
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\obj\Debug\ResolveAssemblyReference.cache
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\clinth\Documents\~MyDocs\~Projects\PAL_PS\PALFunctions\obj\Debug\PALFunctions.pdb
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\obj\Debug\ResolveAssemblyReference.cache
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\Clint Huffman\Documents\PAL2\PALFunctions\obj\Debug\PALFunctions.pdb
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\ResolveAssemblyReference.cache
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\Clint Huffman\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.pdb
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\ResolveAssemblyReference.cache
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\Clint Huffman\Documents\SkyDrive\Projects\PAL2\PALFunctions\obj\Debug\PALFunctions.pdb
-C:\Users\clint\Desktop\PALPro\PALFunctions\bin\Debug\PALFunctions.dll
-C:\Users\clint\Desktop\PALPro\PALFunctions\bin\Debug\PALFunctions.pdb
-C:\Users\clint\Desktop\PALPro\PALFunctions\bin\Debug\PALFunctions.xml
-C:\Users\clint\Desktop\PALPro\PALFunctions\obj\Debug\PALFunctions.Resources.resources
-C:\Users\clint\Desktop\PALPro\PALFunctions\obj\Debug\PALFunctions.vbproj.GenerateResource.Cache
-C:\Users\clint\Desktop\PALPro\PALFunctions\obj\Debug\PALFunctions.vbproj.CoreCompileInputs.cache
-C:\Users\clint\Desktop\PALPro\PALFunctions\obj\Debug\PALFunctions.dll
-C:\Users\clint\Desktop\PALPro\PALFunctions\obj\Debug\PALFunctions.xml
-C:\Users\clint\Desktop\PALPro\PALFunctions\obj\Debug\PALFunctions.pdb
diff --git a/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.GenerateResource.Cache b/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.GenerateResource.Cache
deleted file mode 100644
index 8e9fec1..0000000
Binary files a/PAL2/PALFunctions/obj/Debug/PALFunctions.vbproj.GenerateResource.Cache and /dev/null differ
diff --git a/PAL2/PALFunctions/obj/Debug/PALFunctions.xml b/PAL2/PALFunctions/obj/Debug/PALFunctions.xml
deleted file mode 100644
index 38dfc2f..0000000
--- a/PAL2/PALFunctions/obj/Debug/PALFunctions.xml
+++ /dev/null
@@ -1,26 +0,0 @@
-
- For Web farms in production, it is recommended that a server be removed from rotation prior to updating content for best performance and reliability. For a single Web server in production, content can be updated while the server is under load. The hotfix described in Knowledge Base Article 810281 is of interest to anyone experiencing errors after an application restarts, such as sharing violations with an error similar to "Cannot access file <FileName> because it is being used by another process."
-
- An issue involving anti-virus software and applications restarts is fixed in Knowledge Base Article 820746: FIX: Some Antivirus Programs May Cause Web Applications to Restart Unexpectedly for v1.0, and in Knowledge Base Article 821438 for v1.1. In a perfect world, the application domain will survive for the life of the process. Excessive values should be investigated, and a new threshold should be set as necessary.
- Thresholds: This analysis throws a warning alert if more than 1 application restart occurs per hour and throw a critical alert if more than 5 application restarts per hour occurs.
- For Web farms in production, it is recommended that a server be removed from rotation prior to updating content for best performance and reliability. For a single Web server in production, content can be updated while the server is under load. The hotfix described in Knowledge Base Article 810281 is of interest to anyone experiencing errors after an application restarts, such as sharing violations with an error similar to "Cannot access file <FileName> because it is being used by another process."
+
+ An issue involving anti-virus software and applications restarts is fixed in Knowledge Base Article 820746: FIX: Some Antivirus Programs May Cause Web Applications to Restart Unexpectedly for v1.0, and in Knowledge Base Article 821438 for v1.1. In a perfect world, the application domain will survive for the life of the process. Excessive values should be investigated, and a new threshold should be set as necessary.
+ Thresholds: This analysis throws a warning alert if more than 1 application restart occurs per hour and throw a critical alert if more than 5 application restarts per hour occurs.
+ 1 2 3 4 5 6 Adapter receives message and
- submits it to the engine, work done in adapter before message is given to
- engine not captured in these perf counters. Engine receives message from
- adapter, executes receive pipeline, map, subscription evaluation, persist
- message in DB. Orchestration or
- Solicit-Response port runs and generates a response message. Response message is dequeued in messaging engine, execute the send pipeline,
- map. Messaging engine gives
- response message to adapter. Adapter informs engine message
- is all done. Inbound Latency Request Response Latency Outbound Latency Outbound Adapter Latency 1 2 3 4 5 6 Adapter receives message and
- submits it to the engine, work done in adapter before message is given to
- engine not captured in these perf counters. Engine receives message from
- adapter, executes receive pipeline, map, subscription evaluation, persist
- message in DB. Orchestration or
- Solicit-Response port runs and generates a response message. Response message is dequeued in messaging engine, execute the send pipeline,
- map. Messaging engine gives
- response message to adapter. Adapter informs engine message
- is all done. Inbound Latency Request Response Latency Outbound Latency Outbound Adapter Latency 1 2 3 4 5 6 Adapter receives message and
- submits it to the engine, work done in adapter before message is given to
- engine not captured in these perf counters. Engine receives message from
- adapter, executes receive pipeline, map, subscription evaluation, persist
- message in DB. Orchestration or
- Solicit-Response port runs and generates a response message. Response message is dequeued in messaging engine, execute the send pipeline,
- map. Messaging engine gives
- response message to adapter. Adapter informs engine message
- is all done. Inbound Latency Request Response Latency Outbound Latency Outbound Adapter Latency 1 2 3 4 5 6 Adapter receives message and
+ submits it to the engine, work done in adapter before message is given to
+ engine not captured in these perf counters. Engine receives message from
+ adapter, executes receive pipeline, map, subscription evaluation, persist
+ message in DB. Orchestration or
+ Solicit-Response port runs and generates a response message. Response message is dequeued in messaging engine, execute the send pipeline,
+ map. Messaging engine gives
+ response message to adapter. Adapter informs engine message
+ is all done. Inbound Latency Request Response Latency Outbound Latency Outbound Adapter Latency 1 2 3 4 5 6 Adapter receives message and
+ submits it to the engine, work done in adapter before message is given to
+ engine not captured in these perf counters. Engine receives message from
+ adapter, executes receive pipeline, map, subscription evaluation, persist
+ message in DB. Orchestration or
+ Solicit-Response port runs and generates a response message. Response message is dequeued in messaging engine, execute the send pipeline,
+ map. Messaging engine gives
+ response message to adapter. Adapter informs engine message
+ is all done. Inbound Latency Request Response Latency Outbound Latency Outbound Adapter Latency 1 2 3 4 5 6 Adapter receives message and
+ submits it to the engine, work done in adapter before message is given to
+ engine not captured in these perf counters. Engine receives message from
+ adapter, executes receive pipeline, map, subscription evaluation, persist
+ message in DB. Orchestration or
+ Solicit-Response port runs and generates a response message. Response message is dequeued in messaging engine, execute the send pipeline,
+ map. Messaging engine gives
+ response message to adapter. Adapter informs engine message
+ is all done. Inbound Latency Request Response Latency Outbound Latency Outbound Adapter Latency
- Excessive managed memory usage is commonly caused by:
-
+ Excessive managed memory usage is commonly caused by:
+ The Log Generation Checkpoint Depth performance counter reports the number of transaction log files that have not yet been saved to the database. This number represents the number of transaction log files that must be replayed to the database if the Microsoft Exchange Information Store service process (Store.exe) stops and needs to be restarted. As the log file generation depth increases, the Exchange Information Store startup time increases. If the transaction log file depth of a storage group reaches 5,000, the Extensible Storage Engine (ESE) dismounts all the databases that are in the affected storage group. This alert indicates that the checkpoint depth is greater than 2500. The transaction log file depth may grow during periods when the server is busy. However, large values typically occur when there is a failure or when a backup fails. User Action:
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
- Reference:
- ASP.NET Performance
- ]]>
-
-
-
-
- Reference:
- ASP.NET Performance
- ]]>
-
-
- Reference:
-
-processModel Element (ASP.NET Settings Schema)
-
-http://msdn2.microsoft.com/en-us/library/7w2sway1(VS.80).aspx
-
-If the number of requests allowed in the queue is exceeded, ASP.NET begins returning "503 – Server Too Busy" errors to new requests. When this limit is reached, the system is operating in abnormal conditions. This might be a symptom of a malfunction or, maybe, just high load. Restarting (bouncing) the worker process might become the only way to prevent further problems.
-
-]]>
-
-
-
-
-
-
-
-
+
+
+
+
+ Reference:
+ ASP.NET Performance
+ ]]>
+
+
+
+
+ Reference:
+ ASP.NET Performance
+ ]]>
+
+
+ Reference:
+
+processModel Element (ASP.NET Settings Schema)
+
+http://msdn2.microsoft.com/en-us/library/7w2sway1(VS.80).aspx
+
+If the number of requests allowed in the queue is exceeded, ASP.NET begins returning "503 – Server Too Busy" errors to new requests. When this limit is reached, the system is operating in abnormal conditions. This might be a symptom of a malfunction or, maybe, just high load. Restarting (bouncing) the worker process might become the only way to prevent further problems.
+
+]]>
+
+
+
+
+
+
+
+
-0: Not throttling
-2: Throttling due to imbalanced message publishing rate (input rate exceeds output rate)
-4: Throttling due to process memory pressure
-5: Throttling due to system memory pressure
-6: Throttling due to database growth
-8: Throttling due to high session count
-9: Throttling due to high thread count
-11: Throttling due to user override on publishing
-
-This analysis checks for each of these values and has a specific alert for each of them.
-
-References:
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx]]>
-
-
-
-
-
-
-
-
-0: Not throttling
-1: Throttling due to imbalanced message delivery rate (input rate exceeds output rate)
-3: Throttling due to high in-process message count
-4: Throttling due to process memory pressure
-5: Throttling due to system memory pressure
-9: Throttling due to high thread count
-10: Throttling due to user override on delivery
-
-This analysis checks for each of these values and has a specific alert for each of them.
-
-References:
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx]]>
-
-
-
-
-
-
-
-Database connection per CPUÂis the maximum number of concurrent database sessions (per CPU) allowed before throttling begins. The idle database sessions in the common per-host session pool do not add to this count, and this check is made strictly on the number of sessions actually being used by the host instance. This option is disabled by default; typically this setting should only be enabled if the database server is a bottleneck or for low-end database servers in the BizTalk Server system. You can monitor the number of active Database connections by using the Database session performance counter under the BizTalk:Message Agent performance object category. This parameter only affects outbound message throttling. Enter a value of 0 to disable throttling that is based on the number of database sessions. The default value is 0.
-
-Note: The MaxWorkerThreads registry key has influence on the number threads available to BizTalk and may help in the case where most of BizTalk's threads are busy with database connections.
-
-References:
-
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-
-Threads, DB sessions, and throttling
-http://blogs.msdn.com/biztalkperformance/archive/2007/09/28/threads-db-sessions-and-throttling.aspx
-
-Configuration Parameters that Affect Adapter Performance http://msdn2.microsoft.com/en-us/library/aa561380.aspx ]]>
-
-
-If the host is restarted, statistics held in memory are lost. Since there is some overhead involved, BizTalk Server will resume gathering statistics only when there are at least 100 publishes with 5% of the total publishes within the restarted host process.
-
-This counter will be set to a value of one if either of the conditions listed for the Message count in database threshold occurs. This threshold is documented in the topic How to Modify the Default Host Throttling Settings. By default the host Message count in database throttling threshold is set to a value of 50,000, which will trigger a throttling condition under the following circumstances:
-The total number of messages published by the host instance to the work, state, and suspended queues of the subscribing hosts exceeds 50,000.
-The number of messages in the spool table or the tracking table exceeds 500,000 messages.
-
-Since suspended messages are included in the Message count in database calculation, throttling of message publishing can occur even if the BizTalk server is experiencing low or no load.
-
-This analysis checks for a value of 1. If this occurs, then consider a course of action that will reduce the number of messages in the database. For example, ensure the BizTalk SQL Server jobs are running without error and use the Group Hub in the BizTalk Administration console to determine if message build up is caused by large numbers of suspended messages.
-
-References:
-Suspended Messages are Included in the Message Count in Database Throttling Threshold
-How to Modify the Default Host Throttling Settings
-http://msdn2.microsoft.com/en-us/library/aa559628.aspx
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-]]>
-
-
-This parameter can be set to a smaller value for large message scenarios, where either the average message size is high, or the processing of messages may require a large amount of messages. Such a case would be evident if a scenario experiences memory-based throttling too often and if the memory threshold gets auto-adjusted to a substantially low value. Such behavior would indicate that the outbound transport should process fewer messages concurrently to avoid excessive memory usage. Also, for scenarios where the adapter is more efficient when processing a few messages at a time (for example, when sending to a server that limits concurrent connections), this parameter may be tuned to a lower value than the default.
-
-This analysis checks the High In-Process Message Count counter to determine if this kind of throttling is occurring. If so, consider adjusting the In-Process messages per CPUÂ setting. This parameter only affects outbound message throttling. Enter a value of 0 in the In-Process messages per CPUÂ setting to disable throttling based on the number of in-process messages per CPU. The default value for the In-Process messages per CPUÂsetting is 1,000. Note modifying this value can also have an impact on low latency of messages and/or the efficiency of BizTalk resources.
-
-References:
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-
-How to Modify the Default Host Throttling Settings
-http://msdn2.microsoft.com/en-us/library/aa559628.aspx
-]]>
-
-
-Outbound throttling can cause delayed message delivery and messages may build up in the in-memory queue and cause de-queue threads to be blocked until the throttling condition is mitigated. When de-queue threads are blocked no additional messages are pulled from the MessageBox into the in-memory queue for outbound delivery.
-
-This analysis checks for a value of 1 in the High Message Delivery Rate counter. High message delivery rates can be caused by high processing complexity, slow outbound adapters, or a momentary shortage of system resources.
-
-References:
-Host Throttling Performance Counters\Message Processing Throttling Settings Dialog Box
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx
-]]>
-
-
-This analysis checks for a value of 1 in the High Message Publishing Rate counter. If this occurs, then the database cannot keep up with the publishing rate of messages to the BizTalk messagebox database.
-
-References:
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx
-
-Message Publishing Throttling Settings Dialog Box
-What is Host Throttling?
-]]>
-
-
-This analysis checks for a value of 1 in the High Process Memory counter. If his occurs, then try to determine the cause of the memory increase by using Debug Diag in memory leak analysis. Note that is it normal for processes to consume a large portion of memory during startup and this may initially appear as a memory leak, but a true memory leak occurs when it fails to release memory that it no longer needs, thereby reducing the amount of available memory over time. See the How to Capture a Memory Dump of a Process that is Leaking Memory reference below and/or the Memory Leak Detection analysis in PAL for more information on how to generically analyze process memory leaks in BizTalk.
-
-High process memory throttling can occur if the batch to be published has steep memory requirements or too many threads are processing messages. If the system appears to be over-throttling, consider increasing the value associated with the Process memory usage threshold for the host and verify that the host instance does not generate an "out of memory" error. If an "out of memory" error is raised by increasing the Process memory usage threshold, then consider reducing the values for the Internal message queue size and In-process messages per CPU thresholds. This strategy is particularly relevant in large message processing scenarios. In addition, this value should be set to a low value for scenarios having large memory requirement per message. Setting a low value will kick in throttling early on and prevent a memory explosion within the process.
-
-If your BizTalk server regularly runs out of virtual memory, then consider BizTalk Server 64-bit. Each Process on 64-bit servers can address up to 4TB's of virtual memory versus the 2GB™s in 32-bit. In general, 64-bit BizTalk and 64-bit SQL Server is highly recommended. See the BizTalk Server 64-bit Support reference for more information.
-
-By default, the BizTalk Process Memory Usage throttling setting is 25.
-
-References:
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx
-
-How to Modify the Default Host Throttling Settings
-http://msdn2.microsoft.com/en-us/library/aa559628.aspx
-How to Capture a Memory Dump of a Process that is Leaking Memory
-http://msdn2.microsoft.com/en-us/library/aa560560.aspx
-BizTalk Server 64-bit Support
-http://msdn2.microsoft.com/en-us/library/aa560166.aspx]]>
-
-
-This analysis checks for a value of 1 in the High System Memory counter. Since this measures total system memory, a throttling condition may be triggered if non-BizTalk Server processes are consuming an extensive amount of system memory. Therefore if this occurs, the best approach is to identify which processes are consuming the most physical memory and/or add additional physical memory to the server. Also, consider reducing load by reducing the default size of the EPM thread pool, and/or the size of adapter batches. For more information, see the Memory Leak DetectionÂ.
-
-References:
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-
-How to Modify the Default Host Throttling Settings
-http://msdn2.microsoft.com/en-us/library/aa559628.aspx
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx
-]]>
-
-
-Note: The user-specified value is used as a guideline, and the host may dynamically self-tune this threshold value based on the memory usage patterns and thread requirements of the process.
-
-This analysis checks for a value of 1 in the High Thread Count counter. Consider adjusting the different thread pool sizes to ensure that the system does not create a large number of threads. This analysis can be correlated with Context Switches per Second analysis to determine if the operating system is saturated with too many threads, but in most cases high thread counts cause more contention on the backend database than on the BizTalk server. For more information about modifying the thread pool sizes see How to Modify the Default Host Throttling Settings.
-
-References:
-Host Throttling Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa578302.aspx
-
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx
-
-How to Modify the Default Host Throttling Settings
-http://msdn2.microsoft.com/en-us/library/aa559628.aspx
-
-Configuration Parameters that Affect Adapter Performance
-http://msdn2.microsoft.com/en-us/library/aa561380.aspx
-
-Threads, DB sessions, and throttling
-http://blogs.msdn.com/biztalkperformance/archive/2007/09/28/threads-db-sessions-and-throttling.aspx
-]]>
-
-
-This analysis checks for the existance of a message delivery delay as a warning and a delay of 5 seconds or more as a critical. Long message delivery delays may indicate heavy throttling due to high load.
-
-If either of these alerts occur, then identify the throttling condition and determine if the throttling condition is desirable. Adjustment of the BizTalk throttling settings may be needed.
-
-References:
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx ]]>
-
-
-
-
-This analysis checks for the existance of a message publishing delay as a warning and a delay of 5 seconds or more as a critical. Long message delivery delays may indicate heavy throttling due to high load.
-
-If either of these alerts occur, then identify the throttling condition and determine if the throttling condition is desirable. Adjustment of the BizTalk throttling settings may be needed.
-
-References:
-How BizTalk Server Implements Host Throttling
-http://msdn2.microsoft.com/en-us/library/aa559893.aspx ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Request/Response Messaging
-http://msdn2.microsoft.com/en-us/library/aa559029.aspx
-BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
-http://msdn2.microsoft.com/en-us/library/aa972198.aspx
]]>
-
-
-
-
-Reducing latency is important to some users of BizTalk, therefore tracking how much time documents spend in the inbound adapter is important.
-
-Here is a chart showing how latency is measured.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Assuming a low latency environment, this analysis checks if the document spent more than 5 seconds in the inbound adapter. This may indicate a processing delay in the transport of messages through inbound adapters in this host instance. If multiple inbound adapters exist in this host instance, then consider separating them into their own hosts in order to determine which inbound adapter has high latency.
-
-References:
-BizTalk Server Database Optimization
-http://msdn2.microsoft.com/en-us/library/bb743398.aspx
-Request/Response Messaging
-http://msdn2.microsoft.com/en-us/library/aa559029.aspx
-BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
-http://msdn2.microsoft.com/en-us/library/aa972198.aspx
-Identifying Bottlenecks in the BizTalk Tier
-http://msdn2.microsoft.com/en-us/library/aa561922.aspx
-BizTalk Server 2004: Performance Tuning for Low Latency Messaging
-http://msdn2.microsoft.com/en-us/library/aa475435.aspx]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Assuming a low latency environment, this analysis checks for latency in the outbound adapter of greater than 5 seconds on average. This may indicate a processing delay in the transport of messages through outbound adapters in this host instance. If multiple outbound adapters exist in this host instance, then consider separating them into their own hosts in order to determine which outbound adapter has high latency.
-
-References:
-Request/Response Messaging
-http://msdn2.microsoft.com/en-us/library/aa559029.aspx
-BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
-http://msdn2.microsoft.com/en-us/library/aa972198.aspx ]]>
-
-
-
-
-Note: When the BizTalk spool tables size in on an increasing trend, then throttling due to imbalanced message delivery rate (input rate exceeds output rate) or throttling due to Database size may occur.
-
-This analysis checks for an increasing trend in the BizTalk Spool Table Size.
-
-References:
-Understanding BizTalk Server 2004 SP1 Throughput and Capacity
-http://blogs.msdn.com/biztalkperformance/archive/2005/04/07/406343.aspx
-Sustainable Load Test
-http://msdn2.microsoft.com/en-us/library/aa577964.aspx
-Recommendations When Testing Engine Performance
-http://msdn2.microsoft.com/en-us/library/aa547236.aspx]]>
-
-This analysis checks for an increasing trend of more than 5MBâ„¢s per hour in the tracking data size.
-
-References:
-Archiving and Purging the BizTalk Tracking Database
-http://msdn2.microsoft.com/en-us/library/aa560754.aspx]]>
-
-Note: The Host Queue Length is a weighted Queue length by aggregating the record count of all the Queues (Work Q, State Q, Suspended Q) of the target host.
-
-This analysis checks for average queue lengths greater than 1.
-
-References:
-BizTalk Server 2006: Managing a Successful Performance Lab
-http://msdn2.microsoft.com/en-us/library/aa972201.aspx]]>
-
-
-The suspended message queue is a queue that contains work items for which an error or failure was encountered during processing. A suspended queue stores the messages until they can be corrected and reprocessed, or deleted.
-
-This analysis checks for any occurrence of suspended messages. An increasing trend could indicate severe processing errors.
-
-References:
-BizTalk Server 2004: Monitoring and Troubleshooting
-http://msdn2.microsoft.com/en-us/library/ms942197.aspx]]>
-
-
-Dehydration is the process of serializing the state of an orchestration into a SQL Server database. Rehydration is the reverse of this process: deserializing the last running state of an orchestration from the database. Dehydration is used to minimize the use of system resources by reducing the number of orchestrations that have to be instantiated in memory at one time. Therefore, dehyrations save memory consumption, but are relatively expensive operations to perform.
-
-This analysis checks for dehydrations of 10 or more occurring. If so, BizTalk may be running out of memory (either virtual or physical), there are a high number of orchestrations waiting on messages, or the dehydration settings are not set properly.
-
-References:
-Orchestration Dehydration and Rehydration
-http://msdn2.microsoft.com/en-us/library/aa995563.aspx
-]]>
-
-
-
-
-Dehydration is the process of serializing the state of an orchestration into a SQL Server database. Rehydration is the reverse of this process: deserializing the last running state of an orchestration from the database. Dehydration is used to minimize the use of system resources by reducing the number of orchestrations that have to be instantiated in memory at one time. The engine dehydrates the instance by saving the state, and frees up the memory required by the instance. By dehydrating dormant orchestration instances, the engine makes it possible for a large number of long-running business processes to run concurrently on the same computer.
-
-This analysis checks for an increasing trend of 1 idle orchestration per hour.
-
-References:
-Orchestration Dehydration and Rehydration
-http://msdn2.microsoft.com/en-us/library/aa995563.aspx]]>
-
-
-
-
-This analysis determines if any of the host instances are consuming a large size of the system's memory and if the host instance is increasing in memory consumption over time. A host instance consuming large portions of memory is okay as long as the it returns the memory back to the system. Look for increasing trends in the chart. An increasing trend over a long period of time could indicate a memory leak. Private Bytes is the current size, in bytes, of memory that a process has allocated that cannot be shared with other processes.
-
-This analysis checks for a 10MBâ„¢s per hour increasing trend. Use this analysis in correlation with the Available Memory analysis and the Memory Leak Analysis.
-
-Also, keep in mind that newly started host instances will initially appear as a memory leak when it is simply normal start up behavior. A memory leak is when a process continues to consume memory and not releasing memory over a long period of time.
-
-If you suspect a memory leak condition, then read the Memory Growth in BizTalk Messaging article below. Otherwise, install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
-
-References:
-Debug Diagnostic Tool v1.1
-http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en
-Memory Growth in BizTalk Messaging
-http://blogs.msdn.com/biztalkperformance/archive/2005/04/16/408866.aspx
-
-]]>
-
-
-This analysis checks for a 10MBâ„¢s per hour increasing trend in virtual bytes. Use this analysis in correlation with the Available Memory analysis and the Memory Leak Analysis.
-
-Also, keep in mind that newly started host instances will initially appear as a memory leak when it is simply normal start up behavior. A memory leak is when a process continues to consume memory and not releasing memory over a long period of time.
-
-If you suspect a memory leak condition, then read the Memory Growth in BizTalk Messaging article below. Otherwise, install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
-
-References:
-Memory Growth in BizTalk Messaging
-http://blogs.msdn.com/biztalkperformance/archive/2005/04/16/408866.aspx
-Debug Diagnostic Tool v1.1
-http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
-
-
-DBNetLib (Database Network Library) errors occur when the BizTalk Server runtime is unable to communicate with either the MessageBox or Management databases. When this occurs, the BizTalk Server runtime instance that catches the exception shuts down and then cycles every minute to check to see if the database is available. See the references section for more information on this topic.
-
-When a client initiates a TCP/IP socket connection to a server, the client typically connects to a specific port on the server and requests that the server respond to the client over an ephemeral, or short lived, TCP or UDP port. On Windows Server 2003 and Windows XP the default range of ephemeral ports used by client applications is from 1025 through 5000. Under certain conditions it is possible that the available ports in the default range will be exhausted. See the references section for more information on this topic.
-
-This analysis checks for any occurrence of database connection failures. Database connection failures are critical because BizTalk cannot function without the database. If the cause of the database connection failure is unknown, then consider the topics listed above and/or contact Microsoft Support to determine the nature of the connectivity failure.
-
-References:
-Scaled-Out Databases
-http://msdn2.microsoft.com/en-us/library/aa561513.aspx
-Avoiding DBNETLIB Exceptions
-http://msdn2.microsoft.com/en-us/library/aa560429.aspx
-Avoiding TCP/IP Port Exhaustion
-http://msdn2.microsoft.com/en-us/library/aa560610.aspx]]>
-
-
-This analysis checks for any discarded messages. See the references section for more information regarding discarded messages.
-
-References:
-BizTalk Core Engine's WebLog
-http://blogs.msdn.com/biztalk_core_engine/archive/2004/06/30/169430.aspx]]>
-
-
-While spikes or bursts of orchestrations resident in memory may be considered normal an increasing trend could indicate a pile up of orchestrations in memory. An increasing trend over time may occur when BizTalk is unable to dehydrate messages/orchestration instances, therefore try to correlate this counter with XLANG/s Orchestrations(?)\Dehydratable orchestrations where the question mark (?) is the same counter instance as this counter. If there is a high number of orchestrations resident in memory and if there is a low number of dehydratable orchestrations, then your orchestrations are likely Idle in memory and may cause a memory leak condition. Use this analysis in correlation with \XLANG/s Orchestrations(*)\Idle orchestrations if present. An increasing trend in BizTalk Idle Orchestrations is a better indicator of memory leaks due to the inability to dehydrate orchestration instances.
-
-This analysis checks for an increasing trend in orchestrations resident in memory and if more than 50% of the orchestrations resident in memory are not dehydratable.
-
-References:
-Orchestration Engine Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa561431.aspx
-Orchestration Dehydration and Rehydration
-http://msdn2.microsoft.com/en-us/library/aa995563.aspx]]>
-
-
-This analysis checks for any suspended messages/orchestrations.
-
-References:
-BizTalk Server 2004: Monitoring and Troubleshooting
-http://msdn2.microsoft.com/en-us/library/ms942197.aspx]]>
-
-
-This analysis checks only shows statistics for this counter.
-
-References:
-Orchestration Engine Performance Counters
-http://msdn2.microsoft.com/en-us/library/aa561431.aspx]]>
-
-
-The orchestration engine saves the state of a running orchestration instance at various points. If it needs to rehydrate the orchestration instance, start up from a controlled shutdown, or recover from an unexpected shutdown, it will run the orchestration instance from the last persistence point, as though nothing else had occurred. In order to persist an orchestration instance, all object instances that your orchestration refers to directly or indirectly (as through other objects) must be serializable for your orchestration state to be persisted. As message-persistence frequency (the number of times that data needs to be persisted) increases, overall performance decreases. In effect, each persistence point is a round trip to the database, so whenever possible reduce the frequency of persistence points by avoiding or consolidating persistence points when possible. See the references below for more information regarding when persistence points occur.
-
-This analysis checks for more than 10 persistence points per second on average. This is a general starting point.
-
-References:
-Persistence in Orchestrations
-http://msdn2.microsoft.com/en-us/library/aa559440.aspx
-Persistence and the Orchestration Engine
-http://msdn2.microsoft.com/en-us/library/aa547090.aspx]]>
-
-
-
-
-Transactional scope aborts should not normally occur in a production environment, therefore this analysis checks for the occurrence of any transactional scopes aborted.
-
-References:
-Transactions Across BizTalk Server 2004
-http://msdn2.microsoft.com/en-us/library/ms942198.aspx]]>
-
-
-Transactional scope compensations should not normally occur in a production environment, therefore this analysis checks for the occurrence of any transactional scopes aborted.
-
-References:
-Transactions Across BizTalk Server 2004
-http://msdn2.microsoft.com/en-us/library/ms942198.aspx]]>
-
-
-This analysis provides statistics only.
-
-References:
-BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
-http://msdn2.microsoft.com/en-us/library/aa972198.aspx]]>
+0: Not throttling
+2: Throttling due to imbalanced message publishing rate (input rate exceeds output rate)
+4: Throttling due to process memory pressure
+5: Throttling due to system memory pressure
+6: Throttling due to database growth
+8: Throttling due to high session count
+9: Throttling due to high thread count
+11: Throttling due to user override on publishing
+
+This analysis checks for each of these values and has a specific alert for each of them.
+
+References:
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx]]>
+
+
+
+
+
+
+
+
+0: Not throttling
+1: Throttling due to imbalanced message delivery rate (input rate exceeds output rate)
+3: Throttling due to high in-process message count
+4: Throttling due to process memory pressure
+5: Throttling due to system memory pressure
+9: Throttling due to high thread count
+10: Throttling due to user override on delivery
+
+This analysis checks for each of these values and has a specific alert for each of them.
+
+References:
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx]]>
+
+
+
+
+
+
+
+Database connection per CPUÂis the maximum number of concurrent database sessions (per CPU) allowed before throttling begins. The idle database sessions in the common per-host session pool do not add to this count, and this check is made strictly on the number of sessions actually being used by the host instance. This option is disabled by default; typically this setting should only be enabled if the database server is a bottleneck or for low-end database servers in the BizTalk Server system. You can monitor the number of active Database connections by using the Database session performance counter under the BizTalk:Message Agent performance object category. This parameter only affects outbound message throttling. Enter a value of 0 to disable throttling that is based on the number of database sessions. The default value is 0.
+
+Note: The MaxWorkerThreads registry key has influence on the number threads available to BizTalk and may help in the case where most of BizTalk's threads are busy with database connections.
+
+References:
+
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+
+Threads, DB sessions, and throttling
+http://blogs.msdn.com/biztalkperformance/archive/2007/09/28/threads-db-sessions-and-throttling.aspx
+
+Configuration Parameters that Affect Adapter Performance http://msdn2.microsoft.com/en-us/library/aa561380.aspx ]]>
+
+
+If the host is restarted, statistics held in memory are lost. Since there is some overhead involved, BizTalk Server will resume gathering statistics only when there are at least 100 publishes with 5% of the total publishes within the restarted host process.
+
+This counter will be set to a value of one if either of the conditions listed for the Message count in database threshold occurs. This threshold is documented in the topic How to Modify the Default Host Throttling Settings. By default the host Message count in database throttling threshold is set to a value of 50,000, which will trigger a throttling condition under the following circumstances:
+The total number of messages published by the host instance to the work, state, and suspended queues of the subscribing hosts exceeds 50,000.
+The number of messages in the spool table or the tracking table exceeds 500,000 messages.
+
+Since suspended messages are included in the Message count in database calculation, throttling of message publishing can occur even if the BizTalk server is experiencing low or no load.
+
+This analysis checks for a value of 1. If this occurs, then consider a course of action that will reduce the number of messages in the database. For example, ensure the BizTalk SQL Server jobs are running without error and use the Group Hub in the BizTalk Administration console to determine if message build up is caused by large numbers of suspended messages.
+
+References:
+Suspended Messages are Included in the Message Count in Database Throttling Threshold
+How to Modify the Default Host Throttling Settings
+http://msdn2.microsoft.com/en-us/library/aa559628.aspx
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+]]>
+
+
+This parameter can be set to a smaller value for large message scenarios, where either the average message size is high, or the processing of messages may require a large amount of messages. Such a case would be evident if a scenario experiences memory-based throttling too often and if the memory threshold gets auto-adjusted to a substantially low value. Such behavior would indicate that the outbound transport should process fewer messages concurrently to avoid excessive memory usage. Also, for scenarios where the adapter is more efficient when processing a few messages at a time (for example, when sending to a server that limits concurrent connections), this parameter may be tuned to a lower value than the default.
+
+This analysis checks the High In-Process Message Count counter to determine if this kind of throttling is occurring. If so, consider adjusting the In-Process messages per CPUÂ setting. This parameter only affects outbound message throttling. Enter a value of 0 in the In-Process messages per CPUÂ setting to disable throttling based on the number of in-process messages per CPU. The default value for the In-Process messages per CPUÂsetting is 1,000. Note modifying this value can also have an impact on low latency of messages and/or the efficiency of BizTalk resources.
+
+References:
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+
+How to Modify the Default Host Throttling Settings
+http://msdn2.microsoft.com/en-us/library/aa559628.aspx
+]]>
+
+
+Outbound throttling can cause delayed message delivery and messages may build up in the in-memory queue and cause de-queue threads to be blocked until the throttling condition is mitigated. When de-queue threads are blocked no additional messages are pulled from the MessageBox into the in-memory queue for outbound delivery.
+
+This analysis checks for a value of 1 in the High Message Delivery Rate counter. High message delivery rates can be caused by high processing complexity, slow outbound adapters, or a momentary shortage of system resources.
+
+References:
+Host Throttling Performance Counters\Message Processing Throttling Settings Dialog Box
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx
+]]>
+
+
+This analysis checks for a value of 1 in the High Message Publishing Rate counter. If this occurs, then the database cannot keep up with the publishing rate of messages to the BizTalk messagebox database.
+
+References:
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx
+
+Message Publishing Throttling Settings Dialog Box
+What is Host Throttling?
+]]>
+
+
+This analysis checks for a value of 1 in the High Process Memory counter. If his occurs, then try to determine the cause of the memory increase by using Debug Diag in memory leak analysis. Note that is it normal for processes to consume a large portion of memory during startup and this may initially appear as a memory leak, but a true memory leak occurs when it fails to release memory that it no longer needs, thereby reducing the amount of available memory over time. See the How to Capture a Memory Dump of a Process that is Leaking Memory reference below and/or the Memory Leak Detection analysis in PAL for more information on how to generically analyze process memory leaks in BizTalk.
+
+High process memory throttling can occur if the batch to be published has steep memory requirements or too many threads are processing messages. If the system appears to be over-throttling, consider increasing the value associated with the Process memory usage threshold for the host and verify that the host instance does not generate an "out of memory" error. If an "out of memory" error is raised by increasing the Process memory usage threshold, then consider reducing the values for the Internal message queue size and In-process messages per CPU thresholds. This strategy is particularly relevant in large message processing scenarios. In addition, this value should be set to a low value for scenarios having large memory requirement per message. Setting a low value will kick in throttling early on and prevent a memory explosion within the process.
+
+If your BizTalk server regularly runs out of virtual memory, then consider BizTalk Server 64-bit. Each Process on 64-bit servers can address up to 4TB's of virtual memory versus the 2GB™s in 32-bit. In general, 64-bit BizTalk and 64-bit SQL Server is highly recommended. See the BizTalk Server 64-bit Support reference for more information.
+
+By default, the BizTalk Process Memory Usage throttling setting is 25.
+
+References:
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx
+
+How to Modify the Default Host Throttling Settings
+http://msdn2.microsoft.com/en-us/library/aa559628.aspx
+How to Capture a Memory Dump of a Process that is Leaking Memory
+http://msdn2.microsoft.com/en-us/library/aa560560.aspx
+BizTalk Server 64-bit Support
+http://msdn2.microsoft.com/en-us/library/aa560166.aspx]]>
+
+
+This analysis checks for a value of 1 in the High System Memory counter. Since this measures total system memory, a throttling condition may be triggered if non-BizTalk Server processes are consuming an extensive amount of system memory. Therefore if this occurs, the best approach is to identify which processes are consuming the most physical memory and/or add additional physical memory to the server. Also, consider reducing load by reducing the default size of the EPM thread pool, and/or the size of adapter batches. For more information, see the Memory Leak DetectionÂ.
+
+References:
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+
+How to Modify the Default Host Throttling Settings
+http://msdn2.microsoft.com/en-us/library/aa559628.aspx
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx
+]]>
+
+
+Note: The user-specified value is used as a guideline, and the host may dynamically self-tune this threshold value based on the memory usage patterns and thread requirements of the process.
+
+This analysis checks for a value of 1 in the High Thread Count counter. Consider adjusting the different thread pool sizes to ensure that the system does not create a large number of threads. This analysis can be correlated with Context Switches per Second analysis to determine if the operating system is saturated with too many threads, but in most cases high thread counts cause more contention on the backend database than on the BizTalk server. For more information about modifying the thread pool sizes see How to Modify the Default Host Throttling Settings.
+
+References:
+Host Throttling Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa578302.aspx
+
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx
+
+How to Modify the Default Host Throttling Settings
+http://msdn2.microsoft.com/en-us/library/aa559628.aspx
+
+Configuration Parameters that Affect Adapter Performance
+http://msdn2.microsoft.com/en-us/library/aa561380.aspx
+
+Threads, DB sessions, and throttling
+http://blogs.msdn.com/biztalkperformance/archive/2007/09/28/threads-db-sessions-and-throttling.aspx
+]]>
+
+
+This analysis checks for the existance of a message delivery delay as a warning and a delay of 5 seconds or more as a critical. Long message delivery delays may indicate heavy throttling due to high load.
+
+If either of these alerts occur, then identify the throttling condition and determine if the throttling condition is desirable. Adjustment of the BizTalk throttling settings may be needed.
+
+References:
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx ]]>
+
+
+
+
+This analysis checks for the existance of a message publishing delay as a warning and a delay of 5 seconds or more as a critical. Long message delivery delays may indicate heavy throttling due to high load.
+
+If either of these alerts occur, then identify the throttling condition and determine if the throttling condition is desirable. Adjustment of the BizTalk throttling settings may be needed.
+
+References:
+How BizTalk Server Implements Host Throttling
+http://msdn2.microsoft.com/en-us/library/aa559893.aspx ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Request/Response Messaging
+http://msdn2.microsoft.com/en-us/library/aa559029.aspx
+BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
+http://msdn2.microsoft.com/en-us/library/aa972198.aspx
]]>
+
+
+
+
+Reducing latency is important to some users of BizTalk, therefore tracking how much time documents spend in the inbound adapter is important.
+
+Here is a chart showing how latency is measured.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Assuming a low latency environment, this analysis checks if the document spent more than 5 seconds in the inbound adapter. This may indicate a processing delay in the transport of messages through inbound adapters in this host instance. If multiple inbound adapters exist in this host instance, then consider separating them into their own hosts in order to determine which inbound adapter has high latency.
+
+References:
+BizTalk Server Database Optimization
+http://msdn2.microsoft.com/en-us/library/bb743398.aspx
+Request/Response Messaging
+http://msdn2.microsoft.com/en-us/library/aa559029.aspx
+BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
+http://msdn2.microsoft.com/en-us/library/aa972198.aspx
+Identifying Bottlenecks in the BizTalk Tier
+http://msdn2.microsoft.com/en-us/library/aa561922.aspx
+BizTalk Server 2004: Performance Tuning for Low Latency Messaging
+http://msdn2.microsoft.com/en-us/library/aa475435.aspx]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Assuming a low latency environment, this analysis checks for latency in the outbound adapter of greater than 5 seconds on average. This may indicate a processing delay in the transport of messages through outbound adapters in this host instance. If multiple outbound adapters exist in this host instance, then consider separating them into their own hosts in order to determine which outbound adapter has high latency.
+
+References:
+Request/Response Messaging
+http://msdn2.microsoft.com/en-us/library/aa559029.aspx
+BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
+http://msdn2.microsoft.com/en-us/library/aa972198.aspx ]]>
+
+
+
+
+Note: When the BizTalk spool tables size in on an increasing trend, then throttling due to imbalanced message delivery rate (input rate exceeds output rate) or throttling due to Database size may occur.
+
+This analysis checks for an increasing trend in the BizTalk Spool Table Size.
+
+References:
+Understanding BizTalk Server 2004 SP1 Throughput and Capacity
+http://blogs.msdn.com/biztalkperformance/archive/2005/04/07/406343.aspx
+Sustainable Load Test
+http://msdn2.microsoft.com/en-us/library/aa577964.aspx
+Recommendations When Testing Engine Performance
+http://msdn2.microsoft.com/en-us/library/aa547236.aspx]]>
+
+This analysis checks for an increasing trend of more than 5MBâ„¢s per hour in the tracking data size.
+
+References:
+Archiving and Purging the BizTalk Tracking Database
+http://msdn2.microsoft.com/en-us/library/aa560754.aspx]]>
+
+Note: The Host Queue Length is a weighted Queue length by aggregating the record count of all the Queues (Work Q, State Q, Suspended Q) of the target host.
+
+This analysis checks for average queue lengths greater than 1.
+
+References:
+BizTalk Server 2006: Managing a Successful Performance Lab
+http://msdn2.microsoft.com/en-us/library/aa972201.aspx]]>
+
+
+The suspended message queue is a queue that contains work items for which an error or failure was encountered during processing. A suspended queue stores the messages until they can be corrected and reprocessed, or deleted.
+
+This analysis checks for any occurrence of suspended messages. An increasing trend could indicate severe processing errors.
+
+References:
+BizTalk Server 2004: Monitoring and Troubleshooting
+http://msdn2.microsoft.com/en-us/library/ms942197.aspx]]>
+
+
+Dehydration is the process of serializing the state of an orchestration into a SQL Server database. Rehydration is the reverse of this process: deserializing the last running state of an orchestration from the database. Dehydration is used to minimize the use of system resources by reducing the number of orchestrations that have to be instantiated in memory at one time. Therefore, dehyrations save memory consumption, but are relatively expensive operations to perform.
+
+This analysis checks for dehydrations of 10 or more occurring. If so, BizTalk may be running out of memory (either virtual or physical), there are a high number of orchestrations waiting on messages, or the dehydration settings are not set properly.
+
+References:
+Orchestration Dehydration and Rehydration
+http://msdn2.microsoft.com/en-us/library/aa995563.aspx
+]]>
+
+
+
+
+Dehydration is the process of serializing the state of an orchestration into a SQL Server database. Rehydration is the reverse of this process: deserializing the last running state of an orchestration from the database. Dehydration is used to minimize the use of system resources by reducing the number of orchestrations that have to be instantiated in memory at one time. The engine dehydrates the instance by saving the state, and frees up the memory required by the instance. By dehydrating dormant orchestration instances, the engine makes it possible for a large number of long-running business processes to run concurrently on the same computer.
+
+This analysis checks for an increasing trend of 1 idle orchestration per hour.
+
+References:
+Orchestration Dehydration and Rehydration
+http://msdn2.microsoft.com/en-us/library/aa995563.aspx]]>
+
+
+
+
+This analysis determines if any of the host instances are consuming a large size of the system's memory and if the host instance is increasing in memory consumption over time. A host instance consuming large portions of memory is okay as long as the it returns the memory back to the system. Look for increasing trends in the chart. An increasing trend over a long period of time could indicate a memory leak. Private Bytes is the current size, in bytes, of memory that a process has allocated that cannot be shared with other processes.
+
+This analysis checks for a 10MBâ„¢s per hour increasing trend. Use this analysis in correlation with the Available Memory analysis and the Memory Leak Analysis.
+
+Also, keep in mind that newly started host instances will initially appear as a memory leak when it is simply normal start up behavior. A memory leak is when a process continues to consume memory and not releasing memory over a long period of time.
+
+If you suspect a memory leak condition, then read the Memory Growth in BizTalk Messaging article below. Otherwise, install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
+
+References:
+Debug Diagnostic Tool v1.1
+http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en
+Memory Growth in BizTalk Messaging
+http://blogs.msdn.com/biztalkperformance/archive/2005/04/16/408866.aspx
+
+]]>
+
+
+This analysis checks for a 10MBâ„¢s per hour increasing trend in virtual bytes. Use this analysis in correlation with the Available Memory analysis and the Memory Leak Analysis.
+
+Also, keep in mind that newly started host instances will initially appear as a memory leak when it is simply normal start up behavior. A memory leak is when a process continues to consume memory and not releasing memory over a long period of time.
+
+If you suspect a memory leak condition, then read the Memory Growth in BizTalk Messaging article below. Otherwise, install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
+
+References:
+Memory Growth in BizTalk Messaging
+http://blogs.msdn.com/biztalkperformance/archive/2005/04/16/408866.aspx
+Debug Diagnostic Tool v1.1
+http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
+
+
+DBNetLib (Database Network Library) errors occur when the BizTalk Server runtime is unable to communicate with either the MessageBox or Management databases. When this occurs, the BizTalk Server runtime instance that catches the exception shuts down and then cycles every minute to check to see if the database is available. See the references section for more information on this topic.
+
+When a client initiates a TCP/IP socket connection to a server, the client typically connects to a specific port on the server and requests that the server respond to the client over an ephemeral, or short lived, TCP or UDP port. On Windows Server 2003 and Windows XP the default range of ephemeral ports used by client applications is from 1025 through 5000. Under certain conditions it is possible that the available ports in the default range will be exhausted. See the references section for more information on this topic.
+
+This analysis checks for any occurrence of database connection failures. Database connection failures are critical because BizTalk cannot function without the database. If the cause of the database connection failure is unknown, then consider the topics listed above and/or contact Microsoft Support to determine the nature of the connectivity failure.
+
+References:
+Scaled-Out Databases
+http://msdn2.microsoft.com/en-us/library/aa561513.aspx
+Avoiding DBNETLIB Exceptions
+http://msdn2.microsoft.com/en-us/library/aa560429.aspx
+Avoiding TCP/IP Port Exhaustion
+http://msdn2.microsoft.com/en-us/library/aa560610.aspx]]>
+
+
+This analysis checks for any discarded messages. See the references section for more information regarding discarded messages.
+
+References:
+BizTalk Core Engine's WebLog
+http://blogs.msdn.com/biztalk_core_engine/archive/2004/06/30/169430.aspx]]>
+
+
+While spikes or bursts of orchestrations resident in memory may be considered normal an increasing trend could indicate a pile up of orchestrations in memory. An increasing trend over time may occur when BizTalk is unable to dehydrate messages/orchestration instances, therefore try to correlate this counter with XLANG/s Orchestrations(?)\Dehydratable orchestrations where the question mark (?) is the same counter instance as this counter. If there is a high number of orchestrations resident in memory and if there is a low number of dehydratable orchestrations, then your orchestrations are likely Idle in memory and may cause a memory leak condition. Use this analysis in correlation with \XLANG/s Orchestrations(*)\Idle orchestrations if present. An increasing trend in BizTalk Idle Orchestrations is a better indicator of memory leaks due to the inability to dehydrate orchestration instances.
+
+This analysis checks for an increasing trend in orchestrations resident in memory and if more than 50% of the orchestrations resident in memory are not dehydratable.
+
+References:
+Orchestration Engine Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa561431.aspx
+Orchestration Dehydration and Rehydration
+http://msdn2.microsoft.com/en-us/library/aa995563.aspx]]>
+
+
+This analysis checks for any suspended messages/orchestrations.
+
+References:
+BizTalk Server 2004: Monitoring and Troubleshooting
+http://msdn2.microsoft.com/en-us/library/ms942197.aspx]]>
+
+
+This analysis checks only shows statistics for this counter.
+
+References:
+Orchestration Engine Performance Counters
+http://msdn2.microsoft.com/en-us/library/aa561431.aspx]]>
+
+
+The orchestration engine saves the state of a running orchestration instance at various points. If it needs to rehydrate the orchestration instance, start up from a controlled shutdown, or recover from an unexpected shutdown, it will run the orchestration instance from the last persistence point, as though nothing else had occurred. In order to persist an orchestration instance, all object instances that your orchestration refers to directly or indirectly (as through other objects) must be serializable for your orchestration state to be persisted. As message-persistence frequency (the number of times that data needs to be persisted) increases, overall performance decreases. In effect, each persistence point is a round trip to the database, so whenever possible reduce the frequency of persistence points by avoiding or consolidating persistence points when possible. See the references below for more information regarding when persistence points occur.
+
+This analysis checks for more than 10 persistence points per second on average. This is a general starting point.
+
+References:
+Persistence in Orchestrations
+http://msdn2.microsoft.com/en-us/library/aa559440.aspx
+Persistence and the Orchestration Engine
+http://msdn2.microsoft.com/en-us/library/aa547090.aspx]]>
+
+
+
+
+Transactional scope aborts should not normally occur in a production environment, therefore this analysis checks for the occurrence of any transactional scopes aborted.
+
+References:
+Transactions Across BizTalk Server 2004
+http://msdn2.microsoft.com/en-us/library/ms942198.aspx]]>
+
+
+Transactional scope compensations should not normally occur in a production environment, therefore this analysis checks for the occurrence of any transactional scopes aborted.
+
+References:
+Transactions Across BizTalk Server 2004
+http://msdn2.microsoft.com/en-us/library/ms942198.aspx]]>
+
+
+This analysis provides statistics only.
+
+References:
+BizTalk Server 2006: Scalability Case Study Using the SOAP Adapter in BizTalk Server 2006
+http://msdn2.microsoft.com/en-us/library/aa972198.aspx]]>
-
-
-
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
- Thresholds: This analysis throws a warning alert if .NET CLR Exceptions per second is greater than 10 and a critical alert if .NET CLR Exceptions per second is greater than 50.
-
- Next Steps:
-
-
-
- Need Assistance? Microsoft Premier Field Engineering (PFE) offers onsite assistance with issues like this. Contact your Microsoft Technical Account Manager (TAM) for more information. If you do not have a Microsoft Premier Support Agreement, then go to http://www.microsoft.com/services/microsoftservices/srv_premier.mspx for more info.
-
- References:
-
- ]]>
-
-
-
-
-
-
- Reference:
-
- ]]>
-
-
+
+
+
+
+ Thresholds: This analysis throws a warning alert if .NET CLR Exceptions per second is greater than 10 and a critical alert if .NET CLR Exceptions per second is greater than 50.
+
+ Next Steps:
+
+
+
+ Need Assistance? Microsoft Premier Field Engineering (PFE) offers onsite assistance with issues like this. Contact your Microsoft Technical Account Manager (TAM) for more information. If you do not have a Microsoft Premier Support Agreement, then go to http://www.microsoft.com/services/microsoftservices/srv_premier.mspx for more info.
+
+ References:
+
+ ]]>
+
+
+
+
+
+
+ Reference:
+
+ ]]>
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-If the response times are greater than .010 (10 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
-
-If the response times are greater than .020 (20 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
]]>
-
-
-
-
-
-If the response times are greater than .010 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
-
-If the response times are greater than .020 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
-
]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This is used for information purposes only.
-
-]]>
-This is used for information purposes only.
-
-]]>
-This is used for information purposes only.
-
-]]>
-
-
-
-
-
-This counter is used to report load on the server.
-
-]]>
-This counter is used to report load on the server.
-
-]]>
-This counter is used to report load on the server.
-
]]>
-
-
-
-
-
-
-
-
-
-
-This counter is indicitive of an overloaded domain controller or there are network connectivity problems to the DC.
-
-]]>
-
-
-
-
-This counter is useful for determining domain controller latencies that may affect overall Exchange performance.
-
-]]>
-
-
-
-
-This counter is useful to determine if there is an increase in LDAP calls to the domain controllers possibly affecting overall latency on the server.
-
-]]>
-To resolve this error, do one or more of the following:
-
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-If there are messages queued for this counter, this this means that the HUB server(s) is not picking up mail in a timely fashion.
-
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
If this counter is sustained over 20, store version buckets is very high, and RPC requests flatlines at the same time, then 623 events might be occurring due to a queued transaction inside Store. See http://blogs.technet.com/mikelag/archive/2008/08/22/search-folder-performance-problem-detection.aspx for additional information]]>
- The Log Generation Checkpoint Depth performance counter reports the number of transaction log files that have not yet been saved to the database. This number represents the number of transaction log files that must be replayed to the database if the Microsoft Exchange Information Store service process (Store.exe) stops and needs to be restarted. As the log file generation depth increases, the Exchange Information Store startup time increases. If the transaction log file depth of a storage group reaches 5,000, the Extensible Storage Engine (ESE) dismounts all the databases that are in the affected storage group. This alert indicates that the checkpoint depth is greater than 2500. The transaction log file depth may grow during periods when the server is busy. However, large values typically occur when there is a failure or when a backup fails. User Action:
+
+
+
+
+If the response times are greater than .010 (10 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .020 (20 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
]]>
+
+
+
+
+
+If the response times are greater than .010 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .020 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This is used for information purposes only.
+
+]]>
+This is used for information purposes only.
+
+]]>
+This is used for information purposes only.
+
+]]>
+
+
+
+
+
+This counter is used to report load on the server.
+
+]]>
+This counter is used to report load on the server.
+
+]]>
+This counter is used to report load on the server.
+
]]>
+
+
+
+
+
+
+
+
+
+
+This counter is indicitive of an overloaded domain controller or there are network connectivity problems to the DC.
+
+]]>
+
+
+
+
+This counter is useful for determining domain controller latencies that may affect overall Exchange performance.
+
+]]>
+
+
+
+
+This counter is useful to determine if there is an increase in LDAP calls to the domain controllers possibly affecting overall latency on the server.
+
+]]>
+To resolve this error, do one or more of the following:
+
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+If there are messages queued for this counter, this this means that the HUB server(s) is not picking up mail in a timely fashion.
+
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
If this counter is sustained over 20, store version buckets is very high, and RPC requests flatlines at the same time, then 623 events might be occurring due to a queued transaction inside Store. See http://blogs.technet.com/mikelag/archive/2008/08/22/search-folder-performance-problem-detection.aspx for additional information]]>
+
-
-
-
-
-If the response times are greater than .010 (10 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
-
-If the response times are greater than .020 (20 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
-Monitoring Mailbox Servers
-http://technet.microsoft.com/en-us/library/bb201689(EXCHG.80).aspx
]]>
-
-
-
-
-
-If the response times are greater than .010 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
-
-If the response times are greater than .020 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
-Monitoring Mailbox Servers
-http://technet.microsoft.com/en-us/library/bb201689(EXCHG.80).aspx
-
]]>
-
-
-
-
-The average value of the Log Record Stalls/sec performance counter should be under 10 per second. The maximum value should not exceed 100 per second.
- ]]>
-
-
-The Client Out Queue Length (SMTP) performance counter indicates the number of messages in the queue that contains Simple Mail Transfer Protocol (SMTP) messages sent by the Inetinfo.exe process. The queues should be under 50 at all times unless there is a mail queue.
-
-]]>
-The Store Out Queue Length (SMTP) performance counter indicates the number of messages in the queue that contains Simple Mail Transfer Protocol (SMTP) messages sent by the Store.exe process. The queues should be under 50 at all times unless there is a mail queue.]]>
-This counter should remain at zero during normal operations. ]]>
-This counter should remain at zero during normal operations. ]]>
-This counter should remain at zero during normal operations. ]]>
-This counter should remain at zero during normal operations.
-]]>
-This value should not go above 100. ]]>
-
-
-
-
-If these values indicate a warning or critical error, then Desktop search engines might be having an adverse affect on Exchange server performance. If this is the case, then have the clients change their profiles to cached mode instead of online mode. Doing this will take the processing time off of the server and put it on the client by using the local desktop search instead of querying the server directly that generates a lot of RPC requests to the server.]]>
-
-
-
-
-
-
-The average value of the RPC Requests performance counter should be under 30 at all times. By default, the maximum value for RPC requests is 100. Therefore, unless it is configured otherwise, the Microsoft Exchange Information Store service can service only 100 RPC Requests at the same time before it rejects client requests.
-
-]]>
-
-
-
-
-
-
-
-
-If the Virus Scan Queue Length counter increases with RPC requests for MSExchangeIS, then a potential bottleneck could be the Antivirus software
-
-If this value is at a warning or critical stage, then this can indicate performance issues related to the following:
-
-- Decreased server performance, usually caused by a CPU bottleneck.
-- Larger than usual server load.
-- An out-of-date virus scanning engine.
-- An incorrectly configured virus scanning engine.
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-If this value is high, then we might be creating a lot of search restrictions instead of reusing cached views in the store causing performance delays at the client and server.
-
-If users are opening other users calendars with more than 5,000 calendar items and the Outlook versions being utilized is Outlook 2003 SP1 or earlier, then applying http://support.microsoft.com/?id=896418 may help this situation.
-
-Another possible solution is to use the "Reset Views" registry key on the server per http://support.microsoft.com/?id=216076 to see if performance increases at that point. If that helps, then lowering the "Aging Keep Time" on the server may help.
-
-If the delay is still occurring, then we may have to increase msExchMaxCachedViews per https://technet.microsoft.com/en-us/library/aa996115.aspx to allow more search restrictions to be created in the store. It is recommended to increase this value to a maximum of 20 search folders. If a larger setting is required, it is recommended to move mailboxes that are exhibiting this problem to their own Storage Group to help alleviate some of the pain.
-
-For a client side fix for a single folder, you can use MFCMAPI to modify the PR_MAX_CACHED_VIEWS on the folder itself increasing the amount of cached views available for that folder]]>
-The counter increments when we have a DEEP traversal of the folders in a users mailbox (search for all subfolders) searching for a string in the inbox or subject.]]>
-
-
-
-
-The categorizer performs the following core operations:
-
-- Resolves and validates all recipients.
-- Determines limits and restrictions for all recipients.
-- Expands distribution lists if that is required.
-- Determines whether the message is for local or remote delivery
-
-This error indicates that there may be problems with server or network performance, Directory Service Access (DSAccess), the Active Directory® directory service, or outages with key servers in the Exchange Server organization. An increase in the Categorizer Queue Length counter may indicate a either a slow response or no response in consulting Active Directory or in handing off messages for local delivery or SMTP. This might cause delayed mail delivery.
-
-]]>
-
-
-If you are seeing high queues for Local Delivery, then there may be performance issues on the server causing slow message delivery locally to the store. Check other counters for Disk Latencies, High CPU, LDAP latencies, and any Antivirus software that might be interacting with the store.
-
-If this server houses a journaling mailbox, then ensure that these mailboxes do not contain high items counts as having millions of messages in this mailbox could cause slow performance with no errors being logged.]]>
-
-
-
-
-
-
-The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
-
-High LDAP read latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP read latencies generally indicate one of the following problems:
-
-- A performance problem with the network connection to the domain controller.
-- Performance problems with the domain controller itself.
-- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
]]>
-
-
-
-
-The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
-
-High LDAP search latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP search latencies generally indicate one of the following problems:
-
-- A performance problem with the network connection to the domain controller.
-- Performance problems with the domain controller itself.
-- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
-]]>
-
-
-
-
-The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
-
-High LDAP search latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP search latencies generally indicate one of the following problems:
-
-- A performance problem with the network connection to the domain controller.
-- Performance problems with the domain controller itself.
-- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
]]>
-
-
-The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
-
-High LDAP read latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP read latencies generally indicate one of the following problems:
-
-- A performance problem with the network connection to the domain controller.
-- Performance problems with the domain controller itself.
-- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
]]>
-
-
-
-
-An unoptimized search for a string in the textcontext or subject will result a hit of the counter. For example, an OWA user searching in the subject or body field of a particular folder.
-
-If the server is experiencing high CPU utilization for the Information Store process, disk activity is high, RPC requests has a plateau, and this counter is high, you may have an issue where Search folders are causing performance bottlenecks on the server. If users have high items counts and 3rd party application make use of search folders in a users mailbox, this could be a cause. Running an isinteg -dump against an affected mailbox store will help show if there is a Search FID problem.]]>
-This counter can be enabled by setting "Search Flags" to 0 in the registry under each mailbox store. Note: Setting this flag will turn off CI and Query processing causing slower search code to be hit.
-
- This is normally disabled for testing/troubleshooting purposes and this registry key should be removed after the problem has been identified
-
]]>
+
+
+
+
+If the response times are greater than .010 (10 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .020 (20 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+Monitoring Mailbox Servers
+http://technet.microsoft.com/en-us/library/bb201689(EXCHG.80).aspx
]]>
+
+
+
+
+
+If the response times are greater than .010 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .020 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+Monitoring Mailbox Servers
+http://technet.microsoft.com/en-us/library/bb201689(EXCHG.80).aspx
+
]]>
+
+
+
+
+The average value of the Log Record Stalls/sec performance counter should be under 10 per second. The maximum value should not exceed 100 per second.
+ ]]>
+
+
+The Client Out Queue Length (SMTP) performance counter indicates the number of messages in the queue that contains Simple Mail Transfer Protocol (SMTP) messages sent by the Inetinfo.exe process. The queues should be under 50 at all times unless there is a mail queue.
+
+]]>
+The Store Out Queue Length (SMTP) performance counter indicates the number of messages in the queue that contains Simple Mail Transfer Protocol (SMTP) messages sent by the Store.exe process. The queues should be under 50 at all times unless there is a mail queue.]]>
+This counter should remain at zero during normal operations. ]]>
+This counter should remain at zero during normal operations. ]]>
+This counter should remain at zero during normal operations. ]]>
+This counter should remain at zero during normal operations.
+]]>
+This value should not go above 100. ]]>
+
+
+
+
+If these values indicate a warning or critical error, then Desktop search engines might be having an adverse affect on Exchange server performance. If this is the case, then have the clients change their profiles to cached mode instead of online mode. Doing this will take the processing time off of the server and put it on the client by using the local desktop search instead of querying the server directly that generates a lot of RPC requests to the server.]]>
+
+
+
+
+
+
+The average value of the RPC Requests performance counter should be under 30 at all times. By default, the maximum value for RPC requests is 100. Therefore, unless it is configured otherwise, the Microsoft Exchange Information Store service can service only 100 RPC Requests at the same time before it rejects client requests.
+
+]]>
+
+
+
+
+
+
+
+
+If the Virus Scan Queue Length counter increases with RPC requests for MSExchangeIS, then a potential bottleneck could be the Antivirus software
+
+If this value is at a warning or critical stage, then this can indicate performance issues related to the following:
+
+- Decreased server performance, usually caused by a CPU bottleneck.
+- Larger than usual server load.
+- An out-of-date virus scanning engine.
+- An incorrectly configured virus scanning engine.
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+If this value is high, then we might be creating a lot of search restrictions instead of reusing cached views in the store causing performance delays at the client and server.
+
+If users are opening other users calendars with more than 5,000 calendar items and the Outlook versions being utilized is Outlook 2003 SP1 or earlier, then applying http://support.microsoft.com/?id=896418 may help this situation.
+
+Another possible solution is to use the "Reset Views" registry key on the server per http://support.microsoft.com/?id=216076 to see if performance increases at that point. If that helps, then lowering the "Aging Keep Time" on the server may help.
+
+If the delay is still occurring, then we may have to increase msExchMaxCachedViews per https://technet.microsoft.com/en-us/library/aa996115.aspx to allow more search restrictions to be created in the store. It is recommended to increase this value to a maximum of 20 search folders. If a larger setting is required, it is recommended to move mailboxes that are exhibiting this problem to their own Storage Group to help alleviate some of the pain.
+
+For a client side fix for a single folder, you can use MFCMAPI to modify the PR_MAX_CACHED_VIEWS on the folder itself increasing the amount of cached views available for that folder]]>
+The counter increments when we have a DEEP traversal of the folders in a users mailbox (search for all subfolders) searching for a string in the inbox or subject.]]>
+
+
+
+
+The categorizer performs the following core operations:
+
+- Resolves and validates all recipients.
+- Determines limits and restrictions for all recipients.
+- Expands distribution lists if that is required.
+- Determines whether the message is for local or remote delivery
+
+This error indicates that there may be problems with server or network performance, Directory Service Access (DSAccess), the Active Directory® directory service, or outages with key servers in the Exchange Server organization. An increase in the Categorizer Queue Length counter may indicate a either a slow response or no response in consulting Active Directory or in handing off messages for local delivery or SMTP. This might cause delayed mail delivery.
+
+]]>
+
+
+If you are seeing high queues for Local Delivery, then there may be performance issues on the server causing slow message delivery locally to the store. Check other counters for Disk Latencies, High CPU, LDAP latencies, and any Antivirus software that might be interacting with the store.
+
+If this server houses a journaling mailbox, then ensure that these mailboxes do not contain high items counts as having millions of messages in this mailbox could cause slow performance with no errors being logged.]]>
+
+
+
+
+
+
+The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
+
+High LDAP read latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP read latencies generally indicate one of the following problems:
+
+- A performance problem with the network connection to the domain controller.
+- Performance problems with the domain controller itself.
+- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
]]>
+
+
+
+
+The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
+
+High LDAP search latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP search latencies generally indicate one of the following problems:
+
+- A performance problem with the network connection to the domain controller.
+- Performance problems with the domain controller itself.
+- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
+]]>
+
+
+
+
+The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
+
+High LDAP search latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP search latencies generally indicate one of the following problems:
+
+- A performance problem with the network connection to the domain controller.
+- Performance problems with the domain controller itself.
+- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
]]>
+
+
+The average value should be under 50 milliseconds. Maximum values should not exceed 100 milliseconds.
+
+High LDAP read latencies can be caused by high remote procedure call (RPC) latencies and by increasing queues. High LDAP read latencies generally indicate one of the following problems:
+
+- A performance problem with the network connection to the domain controller.
+- Performance problems with the domain controller itself.
+- Network cards, switches, etc. are not hard coded to a particular speed/duplex setting.
]]>
+
+
+
+
+An unoptimized search for a string in the textcontext or subject will result a hit of the counter. For example, an OWA user searching in the subject or body field of a particular folder.
+
+If the server is experiencing high CPU utilization for the Information Store process, disk activity is high, RPC requests has a plateau, and this counter is high, you may have an issue where Search folders are causing performance bottlenecks on the server. If users have high items counts and 3rd party application make use of search folders in a users mailbox, this could be a cause. Running an isinteg -dump against an affected mailbox store will help show if there is a Search FID problem.]]>
+This counter can be enabled by setting "Search Flags" to 0 in the registry under each mailbox store. Note: Setting this flag will turn off CI and Query processing causing slower search code to be hit.
+
+ This is normally disabled for testing/troubleshooting purposes and this registry key should be removed after the problem has been identified
+
]]>
Availability Requests per second is the number of requests serviced per second. The request can be only for free busy or include suggestions. One request may contain multiple mailboxes.
-TechNet Exchange Counter Description:
-Shows the number of requests serviced per second. The request can be only for free/busy or include suggestions. One request may contain multiple mailboxes.
-Determines the rate at which Availability service requests are occurring.
-TechNet Exchange Guidance:
-Not applicable.
]]> - -
- Average Time to Process a Free Busy Request is the average time to process a free busy request in seconds. One request may contain multiple mailboxes. Free busy responses do not have meeting suggestions.
-TechNet Exchange Counter Description:
-Shows the average time to process a free/busy request in seconds. One request may contain multiple mailboxes. Free/busy responses do not have meeting suggestions.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should always be less than 5.
]]>
- Disk Reads/sec is the rate of read operations on the disk.
-TechNet Exchange Counter Description:
-Indicates that a paging situation may exist because data was read from disk instead of memory.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
-Disk Writes/sec + Disk Reads/sec = < 50
-NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>
- Disk Writes/sec is the rate of write operations on the disk.
-TechNet Exchange Counter Description:
-Indicates that a paging situation may exist because data was written to disk instead of being stored in memory.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
-Disk Writes/sec + Disk Reads/sec = < 50
-NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>
- Average Response Time is the average time (in milliseconds) that elapsed between the beginning and end of an OEH or ASPX request.
-TechNet Exchange Counter Description:
-Shows the average time (in milliseconds) that elapsed between the beginning and end of an OEH or ASPX request.
-Used to determine the latency that a client is experiencing.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 100 ms at all times.
-Higher values may indicate high user load or higher than normal CPU time.
]]>
- Average Search Time is the average time that elapsed while waiting for a search to complete.
-TechNet Exchange Counter Description:
-Shows the average time that elapsed while waiting for a search to complete.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 31,000 ms at all times.
]]>Average Request Time is the average time, in milliseconds, that elapsed while waiting for a request to complete. The transaction is tracked from the time Exchange ActiveSync receives the request and sends the response. The network latency between the device or client and Exchange ActiveSync on the Client Access server is not factored into this calculation.
-TechNet Exchange Counter Description:
-Shows the average time that elapsed while waiting for a request to complete.
-Includes Ping Request Time, which can increase the general response time of this counter. Adding ping counters helps clarify where performance is being impacted.
-Determines the rate at which Availability service requests are occurring.
-TechNet Exchange Guidance:
-Not applicable.
]]>Requests/sec is the number of HTTP requests that are received from the client via ASP.NET per second.
-TechNet Exchange Counter Description:
-Shows the number of HTTP requests that are received from the client via ASP.NET per second.
-Determines the current Exchange ActiveSync request rate.
-Stats Only to determine current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Ping Commands Pending is the number of Ping commands that are currently pending on the server.
-TechNet Exchange Counter Description:
-Shows the number of ping commands that are currently pending in the queue.
-Determines current ping commands pending in the queue.
-TechNet Exchange Guidance:
-Not applicable.
]]>Sync Commands/sec is the number of Sync commands that are processed per second. Clients use this command to synchronize items within a folder.
-TechNet Exchange Counter Description:
-Shows the number of Sync commands that are processed per second. Clients use this command to synchronize items within a folder.
-Stats Only to determine current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Current Unique Users is the number of unique users currently logged on to Outlook Web Access. This value monitors the number of unique active user sessions, so that users are only removed from this count after they log off or their session times out.
-TechNet Exchange Counter Description:
-Shows the number of unique users currently logged on to Outlook Web Access. This value monitors the number of unique active user sessions, so that users are only removed from this counter after they log off or their session times out.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Requests/sec is the number of requests handled by Outlook Web Access per second.
-TechNet Exchange Counter Description:
-Shows the number of requests handled by Outlook Web Access per second.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Requests/sec is the number of Autodiscover requests that have been processed each second.
-TechNet Exchange Counter Description:
-Shows the number of Autodiscover service requests that have been processed each second.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Requests per second is the number of requests processed each second.
-TechNet Exchange Counter Description:
-Shows the number of requests processed each second.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Current number of front-end HTTP connections.
-TechNet Exchange Counter Description:
-Shows the current number of front-end HTTP connections.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Number of unique users currently connected to a back-end server via RPC/HTTP.
-TechNet Exchange Counter Description:
-Shows the number of unique users currently connected to a back-end server via RPC/HTTP.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Rate of RPC/HTTP requests sent to the back-end servers.
-TechNet Exchange Counter Description:
-Shows the rate of RPC/HTTP requests sent to the back-end servers.
-Determines current Outlook Anywhere load.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- Rate at which the RPC Proxy attemts but fails to establish a connection to a back-end.
-TechNet Exchange Counter Description:
-Shows the rate at which the RPC proxy attempts are occurring but fail to establish a connection to a back-end server.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be 0 at all times.
]]>
- Download Task Queued is '1' if task is queued for execution, otherwise '0.'
-TechNet Exchange Counter Description:
-Shows a value of 1 if the task is queued for execution, otherwise shows 0.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be 0 at all times.
-Values greater than 0 indicate a failure to copy OAB data files from Mailbox servers.
]]>Download Tasks Completed is the number of OAB download tasks completed.
-TechNet Exchange Counter Description:
-Shows the number of OAB download tasks completed since the File Distribution service started. The default value is every 480 minutes or 8 hours.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than or equal to 3 per day. Values greater than 3 per day indicate the schedule for the Client Access server to download updated OAB files is not a default schedule.
]]>Availability Requests per second is the number of requests serviced per second. The request can be only for free busy or include suggestions. One request may contain multiple mailboxes.
+TechNet Exchange Counter Description:
+Shows the number of requests serviced per second. The request can be only for free/busy or include suggestions. One request may contain multiple mailboxes.
+Determines the rate at which Availability service requests are occurring.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ Average Time to Process a Free Busy Request is the average time to process a free busy request in seconds. One request may contain multiple mailboxes. Free busy responses do not have meeting suggestions.
+TechNet Exchange Counter Description:
+Shows the average time to process a free/busy request in seconds. One request may contain multiple mailboxes. Free/busy responses do not have meeting suggestions.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should always be less than 5.
]]>
+ Disk Reads/sec is the rate of read operations on the disk.
+TechNet Exchange Counter Description:
+Indicates that a paging situation may exist because data was read from disk instead of memory.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
+Disk Writes/sec + Disk Reads/sec = < 50
+NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>
+ Disk Writes/sec is the rate of write operations on the disk.
+TechNet Exchange Counter Description:
+Indicates that a paging situation may exist because data was written to disk instead of being stored in memory.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
+Disk Writes/sec + Disk Reads/sec = < 50
+NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>
+ Average Response Time is the average time (in milliseconds) that elapsed between the beginning and end of an OEH or ASPX request.
+TechNet Exchange Counter Description:
+Shows the average time (in milliseconds) that elapsed between the beginning and end of an OEH or ASPX request.
+Used to determine the latency that a client is experiencing.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 100 ms at all times.
+Higher values may indicate high user load or higher than normal CPU time.
]]>
+ Average Search Time is the average time that elapsed while waiting for a search to complete.
+TechNet Exchange Counter Description:
+Shows the average time that elapsed while waiting for a search to complete.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 31,000 ms at all times.
]]>Average Request Time is the average time, in milliseconds, that elapsed while waiting for a request to complete. The transaction is tracked from the time Exchange ActiveSync receives the request and sends the response. The network latency between the device or client and Exchange ActiveSync on the Client Access server is not factored into this calculation.
+TechNet Exchange Counter Description:
+Shows the average time that elapsed while waiting for a request to complete.
+Includes Ping Request Time, which can increase the general response time of this counter. Adding ping counters helps clarify where performance is being impacted.
+Determines the rate at which Availability service requests are occurring.
+TechNet Exchange Guidance:
+Not applicable.
]]>Requests/sec is the number of HTTP requests that are received from the client via ASP.NET per second.
+TechNet Exchange Counter Description:
+Shows the number of HTTP requests that are received from the client via ASP.NET per second.
+Determines the current Exchange ActiveSync request rate.
+Stats Only to determine current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Ping Commands Pending is the number of Ping commands that are currently pending on the server.
+TechNet Exchange Counter Description:
+Shows the number of ping commands that are currently pending in the queue.
+Determines current ping commands pending in the queue.
+TechNet Exchange Guidance:
+Not applicable.
]]>Sync Commands/sec is the number of Sync commands that are processed per second. Clients use this command to synchronize items within a folder.
+TechNet Exchange Counter Description:
+Shows the number of Sync commands that are processed per second. Clients use this command to synchronize items within a folder.
+Stats Only to determine current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Current Unique Users is the number of unique users currently logged on to Outlook Web Access. This value monitors the number of unique active user sessions, so that users are only removed from this count after they log off or their session times out.
+TechNet Exchange Counter Description:
+Shows the number of unique users currently logged on to Outlook Web Access. This value monitors the number of unique active user sessions, so that users are only removed from this counter after they log off or their session times out.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Requests/sec is the number of requests handled by Outlook Web Access per second.
+TechNet Exchange Counter Description:
+Shows the number of requests handled by Outlook Web Access per second.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Requests/sec is the number of Autodiscover requests that have been processed each second.
+TechNet Exchange Counter Description:
+Shows the number of Autodiscover service requests that have been processed each second.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Requests per second is the number of requests processed each second.
+TechNet Exchange Counter Description:
+Shows the number of requests processed each second.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Current number of front-end HTTP connections.
+TechNet Exchange Counter Description:
+Shows the current number of front-end HTTP connections.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Number of unique users currently connected to a back-end server via RPC/HTTP.
+TechNet Exchange Counter Description:
+Shows the number of unique users currently connected to a back-end server via RPC/HTTP.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Rate of RPC/HTTP requests sent to the back-end servers.
+TechNet Exchange Counter Description:
+Shows the rate of RPC/HTTP requests sent to the back-end servers.
+Determines current Outlook Anywhere load.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ Rate at which the RPC Proxy attemts but fails to establish a connection to a back-end.
+TechNet Exchange Counter Description:
+Shows the rate at which the RPC proxy attempts are occurring but fail to establish a connection to a back-end server.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be 0 at all times.
]]>
+ Download Task Queued is '1' if task is queued for execution, otherwise '0.'
+TechNet Exchange Counter Description:
+Shows a value of 1 if the task is queued for execution, otherwise shows 0.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be 0 at all times.
+Values greater than 0 indicate a failure to copy OAB data files from Mailbox servers.
]]>Download Tasks Completed is the number of OAB download tasks completed.
+TechNet Exchange Counter Description:
+Shows the number of OAB download tasks completed since the File Distribution service started. The default value is every 480 minutes or 8 hours.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than or equal to 3 per day. Values greater than 3 per day indicate the schedule for the Client Access server to download updated OAB files is not a default schedule.
]]>
- % Processor Time is the percentage of elapsed time that the processor spends to execute a non-Idle thread. It is calculated by measuring the percentage of time that the processor spends executing the idle thread and then subtracting that value from 100%. (Each processor has an idle thread that consumes cycles when no other threads are ready to run). This counter is the primary indicator of processor activity, and displays the average percentage of busy time observed during the sample interval. It should be noted that the accounting calculation of whether the processor is idle is performed at an internal sampling interval of the system clock (10ms). On todays fast processors, % Processor Time can therefore underestimate the processor utilization as the processor may be spending a lot of time servicing threads between the system clock sampling interval. Workload based timer applications are one example of applications which are more likely to be measured inaccurately as timers are signaled just after the sample is taken.
-TechNet Exchange Counter Description:
-Shows the percentage of time that the processor is executing application or operating system processes. This is when the processor is not idle.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be less than 75% on average.
]]>
- % User Time is the percentage of elapsed time the processor spends in the user mode. User mode is a restricted processing mode designed for applications, environment subsystems, and integral subsystems. The alternative, privileged mode, is designed for operating system components and allows direct access to hardware and all memory. The operating system switches application threads to privileged mode to access operating system services. This counter displays the average busy time as a percentage of the sample time.
-TechNet Exchange Counter Description:
-Shows the percentage of processor time that is spent in user mode.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should remain below 75%.
]]>
- % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service in called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
-TechNet Exchange Counter Description:
-Shows the percentage of processor time that is spent in privileged mode. Privileged mode is a processing mode designed for operating system components and hardware-manipulating drivers. It allows direct access to hardware and all memory.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should remain below 75%.
]]>% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
-TechNet Exchange Counter Description:
-Shows the percentage of elapsed processor time that all process threads used to execute instructions. An instruction is the basic unit of execution in a computer; a thread is the object that executes instructions; and a process is the object created when a program is run. Code executed to handle some hardware interruptions and trap conditions are included in this count.
-If total processor time is high, use this counter to determine which process is causing high CPU.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- Available MBytes is the amount of physical memory, in Megabytes, immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists. For a full explanation of the memory manager, refer to MSDN and/or the System Performance and Troubleshooting Guide chapter in the Windows Server 2003 Resource Kit.
-TechNet Exchange Counter Description:
-Shows the amount of physical memory, in megabytes (MB), immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free, and zero page lists. For a full explanation of the memory manager, refer to Microsoft Developer Network (MSDN) or "System Performance and Troubleshooting Guide" in the Windows Server 2003 Resource Kit.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should remain above 100 MB at all times.
]]>Pool Nonpaged Bytes is the size, in bytes, of the nonpaged pool, an area of system memory (physical memory used by the operating system) for objects that cannot be written to disk, but must remain in physical memory as long as they are allocated. Memory\\Pool Nonpaged Bytes is calculated differently than Process\\Pool Nonpaged Bytes, so it might not equal Process\\Pool Nonpaged Bytes\\_Total. This counter displays the last observed value only; it is not an average.
-TechNet Exchange Counter Description:
-Consists of system virtual addresses that are guaranteed to be resident in physical memory at all times and can thus be accessed from any address space without incurring paging input/output (I/O). Like paged pool, nonpaged pool is created during system initialization and is used by kernel-mode components to allocate system memory.
-Normally not looked at, unless connection counts are very high because each TCP connection consumes nonpaged pool memory.
-TechNet Exchange Guidance:
-Not applicable.
]]>TechNet Exchange Counter Description:
-Shows the portion of shared system memory that can be paged to the disk paging file. Paged pool is created during system initialization and is used by kernel-mode components to allocate system memory.
-Monitor for increases in pool paged bytes indicating a possible memory leak.
-TechNet Exchange Guidance:
-Not applicable.
]]>Cache Bytes is the sum of the Memory\\System Cache Resident Bytes, Memory\\System Driver Resident Bytes, Memory\\System Code Resident Bytes, and Memory\\Pool Paged Resident Bytes counters. This counter displays the last observed value only; it is not an average.
-TechNet Exchange Counter Description:
-Shows the current size, in bytes, of the file system cache. By default, the cache uses up to 50 percent of available physical memory. The counter value is the sum of Memory\System Cache Resident Bytes, Memory\System Driver Resident Bytes, Memory\System Code Resident Bytes, and Memory\Pool Paged Resident Bytes.
-Should remain steady after applications cache their memory usage. Check for large dips in this counter, which could attribute to working set trimming and excessive paging.
-Used by the content index catalog and continuous replication log copying.
-TechNet Exchange Guidance:
-Not applicable.
]]>Committed Bytes is the amount of committed virtual memory, in bytes. Committed memory is the physical memory which has space reserved on the disk paging file(s). There can be one or more paging files on each physical drive. This counter displays the last observed value only; it is not an average.
-TechNet Exchange Counter Description:
-Shows the amount of committed virtual memory, in bytes. Committed memory is the physical memory that has space reserved on the disk paging files. There can be one or more paging files on each physical drive. This counter displays the last observed value only; it is not an average.
-Determines the amount of committed bytes in use.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- % Committed Bytes In Use is the ratio of Memory\\Committed Bytes to the Memory\\Commit Limit. Committed memory is the physical memory in use for which space has been reserved in the paging file should it need to be written to disk. The commit limit is determined by the size of the paging file. If the paging file is enlarged, the commit limit increases, and the ratio is reduced). This counter displays the current percentage value only; it is not an average.
-TechNet Exchange Counter Description:
-Shows the ratio of Memory\Committed Bytes to the Memory\Commit Limit. Committed memory is the physical memory in use for which space has been reserved in the paging file should it need to be written to disk. The commit limit is determined by the size of the paging file. If the paging file is enlarged, the commit limit increases, and the ratio is reduced. This counter displays the current percentage value only; it is not an average.
-If this value is very high (more than 90 percent), you may begin to see commit failures. This is a clear indication that the system is under memory pressure.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- Transition Pages RePurposed is the rate at which the number of transition cache pages were reused for a different purpose. These pages would have otherwise remained in the page cache to provide a (fast) soft fault (instead of retrieving it from backing store) in the event the page was accessed in the future. Note these pages can contain private or sharable memory.
-TechNet Exchange Counter Description:
-Indicates system cache pressure.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be less than 100 on average with spikes less than 1,000.
]]>
- Page Reads/sec is the rate at which the disk was read to resolve hard page faults. It shows the number of reads operations, without regard to the number of pages retrieved in each operation. Hard page faults occur when a process references a page in virtual memory that is not in working set or elsewhere in physical memory, and must be retrieved from disk. This counter is a primary indicator of the kinds of faults that cause system-wide delays. It includes read operations to satisfy faults in the file system cache (usually requested by applications) and in non-cached mapped memory files. Compare the value of Memory\\Pages Reads/sec to the value of Memory\\Pages Input/sec to determine the average number of pages read during each operation.
-TechNet Exchange Counter Description:
-Indicates data must be read from the disk instead of memory. Indicates there is not enough memory and paging is beginning. A value of more than 30 per second means the server is no longer keeping up with the load.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be less than 100 on average.
]]>
- Pages/sec is the rate at which pages are read from or written to disk to resolve hard page faults. This counter is a primary indicator of the kinds of faults that cause system-wide delays. It is the sum of Memory\\Pages Input/sec and Memory\\Pages Output/sec. It is counted in numbers of pages, so it can be compared to other counts of pages, such as Memory\\Page Faults/sec, without conversion. It includes pages retrieved to satisfy faults in the file system cache (usually requested by applications) non-cached mapped memory files.
-TechNet Exchange Counter Description:
-Shows the rate at which pages are read from or written to disk to resolve hard page faults. This counter is a primary indicator of the kinds of faults that cause system-wide delays. It is the sum of Memory\Pages Input/sec and Memory\Pages Output/sec. It is counted in numbers of pages, so it can be compared to other counts of pages, such as Memory\Page Faults/sec, without conversion. It includes pages retrieved to satisfy faults in the file system cache (usually requested by applications) and non-cached mapped memory files.
-The values that are returned by the Pages/sec counter may be more than you expect. These values may not be related to either paging file activity or cache activity. Instead, these values may be caused by an application that is sequentially reading a memory-mapped file.
-Use Memory\Pages Input/sec and Memory\Pages Output/sec to determine page file I/O.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 1,000 on average.
]]>
- Pages Input/sec is the rate at which pages are read from disk to resolve hard page faults. Hard page faults occur when a process refers to a page in virtual memory that is not in its working set or elsewhere in physical memory, and must be retrieved from disk. When a page is faulted, the system tries to read multiple contiguous pages into memory to maximize the benefit of the read operation. Compare the value of Memory\\Pages Input/sec to the value of Memory\\Page Reads/sec to determine the average number of pages read into memory during each read operation.
-TechNet Exchange Counter Description:
-Shows the rate at which pages are read from disk to resolve hard page faults. Hard page faults occur when a process refers to a page in virtual memory that is not in its working set or elsewhere in physical memory, and must be retrieved from disk. When a page is faulted, the system tries to read multiple contiguous pages into memory to maximize the benefit of the read operation. Compare the value of Memory\Pages Input/sec to the value of Memory\Page Reads/sec to determine the average number of pages read into memory during each read operation.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 1,000 on average.
]]>
- Pages Output/sec is the rate at which pages are written to disk to free up space in physical memory. Pages are written back to disk only if they are changed in physical memory, so they are likely to hold data, not code. A high rate of pages output might indicate a memory shortage. Windows writes more pages back to disk to free up space when physical memory is in short supply. This counter shows the number of pages, and can be compared to other counts of pages, without conversion.
-TechNet Exchange Counter Description:
-Shows the rate at which pages are written to disk to free space in physical memory. Pages are written back to disk only if they are changed in physical memory, so they are likely to hold data, and not code. A high rate of pages output might indicate a memory shortage. Microsoft Windows writes more pages back to disk to free up space when physical memory is in short supply. This counter shows the number of pages, and can be compared to other counts of pages, without conversion.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 1,000 on average.
]]>Private Bytes is the current size, in bytes, of memory that this process has allocated that cannot be shared with other processes.
-TechNet Exchange Counter Description:
-Shows the current number of bytes this process has allocated that cannot be shared with other processes.
-This counter can be used for determining any memory leaks against processes.
-For the information store process, compare this counter value with database cache size to determine if there is a memory leak in the information store process. An increase in information store private bytes, together with the same increase in database cache, equals correct behavior (no memory leak).
-TechNet Exchange Guidance:
-Not applicable.
]]>Virtual Bytes is the current size, in bytes, of the virtual address space the process is using. Use of virtual address space does not necessarily imply corresponding use of either disk or main memory pages. Virtual space is finite, and the process can limit its ability to load libraries.
-TechNet Exchange Counter Description:
-Represents (in bytes) how much virtual address space the process is currently consuming.
-Used to determine if processes are consuming a large amount of virtual memory.
-TechNet Exchange Guidance:
-Not applicable.
]]>Working Set is the current size, in bytes, of the Working Set of this process. The Working Set is the set of memory pages touched recently by the threads in the process. If free memory in the computer is above a threshold, pages are left in the Working Set of a process even if they are not in use. When free memory falls below a threshold, pages are trimmed from Working Sets. If they are needed they will then be soft-faulted back into the Working Set before leaving main memory.
-TechNet Exchange Counter Description:
-Shows the current size, in bytes, of the working set of this process. The working set is the set of memory pages touched recently by the threads in the process. If free memory in the computer is above a threshold, pages are left in the working set of a process event if they are not in use. When free memory falls below a threshold, pages are trimmed from working sets. If they are needed, they will be soft-faulted back to the working set before leaving main memory.
-Large increases or decreases in working sets causes paging.
-Ensure that the paging file is set to the recommended value of RAM+10. If working sets are being trimmed, add Process(*)\Working set to see what processes are affected. This counter could indicate either system-wide or process-wide issues. Cross-reference this counter with Memory\System Cache Resident Bytes to see if system-wide working set trimming is occurring
-TechNet Exchange Guidance:
-Not applicable.
]]>The total number of handles currently open by this process. This number is equal to the sum of the handles currently open by each thread in this process.
-TechNet Exchange Counter Description:
-Shows the total number of handles currently open by this process. This number is the sum of the handles currently open by each thread in this process.
-An increase in handle counts for a particular process may be the symptom of a faulty process with handle leaks, which is causing performance issues on the server. This is not necessarily a problem, but is something to monitor over time to determine if a handle leak is occurring.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- % Time in GC is the percentage of elapsed time that was spent in performing a garbage collection (GC) since the last GC cycle. This counter is usually an indicator of the work done by the Garbage Collector on behalf of the application to collect and compact memory. This counter is updated only at the end of every GC and the counter value reflects the last observed value; its not an average.
-TechNet Exchange Counter Description:
-Shows when garbage collection has occurred. When the counter exceeds the threshold, it indicates that CPU is cleaning up and is not being used efficiently for load. Adding memory to the server would improve this situation.
-If this counter increases to a high value, there might be some objects that are surviving Gen 1 garbage collections and being promoted to Gen 2. Gen 2 collections require a full global catalog for clean up. Add other .NET memory counters to determine if this is the case.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 10% on average.
]]>
- This counter displays the number of exceptions thrown per second. These include both .NET exceptions and unmanaged exceptions that get converted into .NET exceptions e.g. null pointer reference exception in unmanaged code would get re-thrown in managed code as a .NET System.NullReferenceException; this counter includes both handled and unhandled exceptions. Exceptions should only occur in rare situations and not in the normal control flow of the program; this counter was designed as an indicator of potential performance problems due to large (>100s) rate of exceptions thrown. This counter is not an average over time; it displays the difference between the values observed in the last two samples divided by the duration of the sample interval.
-TechNet Exchange Counter Description:
-Displays the number of exceptions thrown per second. These include both .NET exceptions and unmanaged exceptions that get converted into .NET exceptions. For example, the null pointer reference exception in unmanaged code would get thrown again in managed code as a .NET System.NullReferenceException; this counter includes both handled and unhandled exceptions. Exceptions should only occur in rare situations and not in the normal control flow of the program. This counter was designed as an indicator of potential performance problems due to a large (>100 sec) rate of exceptions thrown. This counter is not an average over time; it displays the difference between the values observed in the last two samples divided by the duration of the sample interval.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be less than 5% of total RPS (Web Service(_Total)\Connection Attempts/sec * .05).
]]>The rate that connections to the Web service are being attempted.
-TechNet Exchange Counter Description:
-Shows the rate that connections to the Web service are being attempted.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>This counter is the sum of four other counters; Gen 0 Heap Size; Gen 1 Heap Size; Gen 2 Heap Size and the Large Object Heap Size. This counter indicates the current memory allocated in bytes on the GC Heaps.
-TechNet Exchange Counter Description:
-Shows the sum of four other counters: Gen 0 Heap Size, Gen 1 Heap Size, Gen 2 Heap Size, and the Large Object Heap Size. This counter indicates the current memory allocated in bytes on the GC Heaps.
-These regions of memory are of type MEM_COMMIT. (For details, see Platform SDK documentation for VirtualAlloc.) The value of this counter is always less than the value of Process\Private Bytes, which counts all MEM_COMMIT regions for the process. Private Bytes minus # Bytes in all Heaps is the number of bytes committed by unmanaged objects.
-Used to monitor possible memory leaks or excessive memory usage of managed or unmanaged objects.
-TechNet Exchange Guidance:
-Not applicable.
]]>Bytes Total/sec is the rate at which bytes are sent and received over each network adapter, including framing characters. Network Interface\Bytes Total/sec is a sum of Network Interface\Bytes Received/sec and Network Interface\Bytes Sent/sec.
-TechNet Exchange Counter Description:
-Indicates the rate at which the network adapter is processing data bytes.
-This counter includes all application and file data, in addition to protocol information such as packet headers.
-TechNet Exchange Guidance:
-For a 100-Mbps network adapter, should be below 6 to 7 Mbps.
-For a 1000-Mbps network adapter, should be below 60 to 70 Mbps.
]]>
- Packets Outbound Errors is the number of outbound packets that could not be transmitted because of errors.
-TechNet Exchange Counter Description:
-Indicates the number of outbound packets that could not be transmitted because of errors.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be 0 at all times.
]]>Datagrams/sec is the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
-TechNet Exchange Counter Description:
-Shows the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Datagrams/sec is the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
-TechNet Exchange Counter Description:
-Shows the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Connections Established is the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
-TechNet Exchange Counter Description:
-Shows the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
-The number of TCP connections that can be established is constrained by the size of the nonpaged pool. When the nonpaged pool is depleted, no new connections can be established.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Connections Established is the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
-TechNet Exchange Counter Description:
-Shows the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
-The number of TCP connections that can be established is constrained by the size of the nonpaged pool. When the nonpaged pool is depleted, no new connections can be established.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Segments Received/sec is the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
-TechNet Exchange Counter Description:
-Shows the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Segments Received/sec is the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
-TechNet Exchange Counter Description:
-Shows the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
-Determines current user load.
-TechNet Exchange Guidance:
-Not applicable.
]]>Connection Failures is the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
-TechNet Exchange Counter Description:
-Shows the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
-TechNet Exchange Guidance:
-An increasing number of failures, or a consistently increasing rate of failures, can indicate a bandwidth shortage.
]]>Connection Failures is the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
-TechNet Exchange Counter Description:
-Shows the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
-TechNet Exchange Guidance:
-An increasing number of failures, or a consistently increasing rate of failures, can indicate a bandwidth shortage.
]]>Connections Reset is the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
-TechNet Exchange Counter Description:
-Shows the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
-Some browsers send TCP reset (RST) packets, so be cautious when using this counter to determine reset rate.
-TechNet Exchange Guidance:
-An increasing number of resets or a consistently increasing rate of resets can indicate a bandwidth shortage.
]]>Connections Reset is the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
-TechNet Exchange Counter Description:
-Shows the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
-Some browsers send TCP reset (RST) packets, so be cautious when using this counter to determine reset rate.
-TechNet Exchange Guidance:
-An increasing number of resets or a consistently increasing rate of resets can indicate a bandwidth shortage.
]]>LDAP Searches/Sec is the number of LDAP search requests issued per second.
-TechNet Exchange Counter Description:
-Shows the number of Lightweight Directory Access Protocol (LDAP) search requests issued per second.
-Used to determine current LDAP search rate.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- LDAP Read Time for a given DC is the time (in ms) taken to send an LDAP read request to the specified DC and receive a response
-TechNet Exchange Counter Description:
-Shows the time in milliseconds (ms) to send an LDAP read request to the specified domain controller and receive a response.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
- LDAP Search Time is the time (in ms) taken to send an LDAP search request and receive a response
-TechNet Exchange Counter Description:
-Shows the time (in ms) to send an LDAP search request and receive a response.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
- LDAP Read Time is the time (in ms) taken to send an LDAP read request and receive a response
-TechNet Exchange Counter Description:
-Shows the time (in ms) to send an LDAP read request to the specified domain controller and receive a response.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
- LDAP Search Time is the time (in ms) taken to send an LDAP search request and receive a response
-TechNet Exchange Counter Description:
-Shows the time (in ms) to send an LDAP search request and receive a response.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
- LDAP Searches timed out per minute is the number of LDAP searches returned LDAP_TIMEOUT during the last minute
-TechNet Exchange Counter Description:
-Shows the number of LDAP searches that returned LDAP_Timeout during the last minute.
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be below 10 at all times for all roles.
-Higher values may indicate issues with Active Directory resources.
]]>
- Long running LDAP operations/Min is the number of LDAP operations on this DC that took longer than the specified threshold per minute (default threshold is 15 seconds)
-TechNet Exchange Counter Description:
-Shows the number of LDAP operations on this domain controller that took longer than the specified threshold per minute. (Default threshold is 15 seconds.)
-TechNet Exchange Guidance:
-For all Exchange Server 2007 roles, this value should be less than 50 at all times.
-Higher values may indicate issues with Active Directory resources.
]]>
+ % Processor Time is the percentage of elapsed time that the processor spends to execute a non-Idle thread. It is calculated by measuring the percentage of time that the processor spends executing the idle thread and then subtracting that value from 100%. (Each processor has an idle thread that consumes cycles when no other threads are ready to run). This counter is the primary indicator of processor activity, and displays the average percentage of busy time observed during the sample interval. It should be noted that the accounting calculation of whether the processor is idle is performed at an internal sampling interval of the system clock (10ms). On todays fast processors, % Processor Time can therefore underestimate the processor utilization as the processor may be spending a lot of time servicing threads between the system clock sampling interval. Workload based timer applications are one example of applications which are more likely to be measured inaccurately as timers are signaled just after the sample is taken.
+TechNet Exchange Counter Description:
+Shows the percentage of time that the processor is executing application or operating system processes. This is when the processor is not idle.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be less than 75% on average.
]]>
+ % User Time is the percentage of elapsed time the processor spends in the user mode. User mode is a restricted processing mode designed for applications, environment subsystems, and integral subsystems. The alternative, privileged mode, is designed for operating system components and allows direct access to hardware and all memory. The operating system switches application threads to privileged mode to access operating system services. This counter displays the average busy time as a percentage of the sample time.
+TechNet Exchange Counter Description:
+Shows the percentage of processor time that is spent in user mode.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should remain below 75%.
]]>
+ % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service in called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+TechNet Exchange Counter Description:
+Shows the percentage of processor time that is spent in privileged mode. Privileged mode is a processing mode designed for operating system components and hardware-manipulating drivers. It allows direct access to hardware and all memory.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should remain below 75%.
]]>% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
+TechNet Exchange Counter Description:
+Shows the percentage of elapsed processor time that all process threads used to execute instructions. An instruction is the basic unit of execution in a computer; a thread is the object that executes instructions; and a process is the object created when a program is run. Code executed to handle some hardware interruptions and trap conditions are included in this count.
+If total processor time is high, use this counter to determine which process is causing high CPU.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ Available MBytes is the amount of physical memory, in Megabytes, immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists. For a full explanation of the memory manager, refer to MSDN and/or the System Performance and Troubleshooting Guide chapter in the Windows Server 2003 Resource Kit.
+TechNet Exchange Counter Description:
+Shows the amount of physical memory, in megabytes (MB), immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free, and zero page lists. For a full explanation of the memory manager, refer to Microsoft Developer Network (MSDN) or "System Performance and Troubleshooting Guide" in the Windows Server 2003 Resource Kit.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should remain above 100 MB at all times.
]]>Pool Nonpaged Bytes is the size, in bytes, of the nonpaged pool, an area of system memory (physical memory used by the operating system) for objects that cannot be written to disk, but must remain in physical memory as long as they are allocated. Memory\\Pool Nonpaged Bytes is calculated differently than Process\\Pool Nonpaged Bytes, so it might not equal Process\\Pool Nonpaged Bytes\\_Total. This counter displays the last observed value only; it is not an average.
+TechNet Exchange Counter Description:
+Consists of system virtual addresses that are guaranteed to be resident in physical memory at all times and can thus be accessed from any address space without incurring paging input/output (I/O). Like paged pool, nonpaged pool is created during system initialization and is used by kernel-mode components to allocate system memory.
+Normally not looked at, unless connection counts are very high because each TCP connection consumes nonpaged pool memory.
+TechNet Exchange Guidance:
+Not applicable.
]]>TechNet Exchange Counter Description:
+Shows the portion of shared system memory that can be paged to the disk paging file. Paged pool is created during system initialization and is used by kernel-mode components to allocate system memory.
+Monitor for increases in pool paged bytes indicating a possible memory leak.
+TechNet Exchange Guidance:
+Not applicable.
]]>Cache Bytes is the sum of the Memory\\System Cache Resident Bytes, Memory\\System Driver Resident Bytes, Memory\\System Code Resident Bytes, and Memory\\Pool Paged Resident Bytes counters. This counter displays the last observed value only; it is not an average.
+TechNet Exchange Counter Description:
+Shows the current size, in bytes, of the file system cache. By default, the cache uses up to 50 percent of available physical memory. The counter value is the sum of Memory\System Cache Resident Bytes, Memory\System Driver Resident Bytes, Memory\System Code Resident Bytes, and Memory\Pool Paged Resident Bytes.
+Should remain steady after applications cache their memory usage. Check for large dips in this counter, which could attribute to working set trimming and excessive paging.
+Used by the content index catalog and continuous replication log copying.
+TechNet Exchange Guidance:
+Not applicable.
]]>Committed Bytes is the amount of committed virtual memory, in bytes. Committed memory is the physical memory which has space reserved on the disk paging file(s). There can be one or more paging files on each physical drive. This counter displays the last observed value only; it is not an average.
+TechNet Exchange Counter Description:
+Shows the amount of committed virtual memory, in bytes. Committed memory is the physical memory that has space reserved on the disk paging files. There can be one or more paging files on each physical drive. This counter displays the last observed value only; it is not an average.
+Determines the amount of committed bytes in use.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ % Committed Bytes In Use is the ratio of Memory\\Committed Bytes to the Memory\\Commit Limit. Committed memory is the physical memory in use for which space has been reserved in the paging file should it need to be written to disk. The commit limit is determined by the size of the paging file. If the paging file is enlarged, the commit limit increases, and the ratio is reduced). This counter displays the current percentage value only; it is not an average.
+TechNet Exchange Counter Description:
+Shows the ratio of Memory\Committed Bytes to the Memory\Commit Limit. Committed memory is the physical memory in use for which space has been reserved in the paging file should it need to be written to disk. The commit limit is determined by the size of the paging file. If the paging file is enlarged, the commit limit increases, and the ratio is reduced. This counter displays the current percentage value only; it is not an average.
+If this value is very high (more than 90 percent), you may begin to see commit failures. This is a clear indication that the system is under memory pressure.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ Transition Pages RePurposed is the rate at which the number of transition cache pages were reused for a different purpose. These pages would have otherwise remained in the page cache to provide a (fast) soft fault (instead of retrieving it from backing store) in the event the page was accessed in the future. Note these pages can contain private or sharable memory.
+TechNet Exchange Counter Description:
+Indicates system cache pressure.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be less than 100 on average with spikes less than 1,000.
]]>
+ Page Reads/sec is the rate at which the disk was read to resolve hard page faults. It shows the number of reads operations, without regard to the number of pages retrieved in each operation. Hard page faults occur when a process references a page in virtual memory that is not in working set or elsewhere in physical memory, and must be retrieved from disk. This counter is a primary indicator of the kinds of faults that cause system-wide delays. It includes read operations to satisfy faults in the file system cache (usually requested by applications) and in non-cached mapped memory files. Compare the value of Memory\\Pages Reads/sec to the value of Memory\\Pages Input/sec to determine the average number of pages read during each operation.
+TechNet Exchange Counter Description:
+Indicates data must be read from the disk instead of memory. Indicates there is not enough memory and paging is beginning. A value of more than 30 per second means the server is no longer keeping up with the load.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be less than 100 on average.
]]>
+ Pages/sec is the rate at which pages are read from or written to disk to resolve hard page faults. This counter is a primary indicator of the kinds of faults that cause system-wide delays. It is the sum of Memory\\Pages Input/sec and Memory\\Pages Output/sec. It is counted in numbers of pages, so it can be compared to other counts of pages, such as Memory\\Page Faults/sec, without conversion. It includes pages retrieved to satisfy faults in the file system cache (usually requested by applications) non-cached mapped memory files.
+TechNet Exchange Counter Description:
+Shows the rate at which pages are read from or written to disk to resolve hard page faults. This counter is a primary indicator of the kinds of faults that cause system-wide delays. It is the sum of Memory\Pages Input/sec and Memory\Pages Output/sec. It is counted in numbers of pages, so it can be compared to other counts of pages, such as Memory\Page Faults/sec, without conversion. It includes pages retrieved to satisfy faults in the file system cache (usually requested by applications) and non-cached mapped memory files.
+The values that are returned by the Pages/sec counter may be more than you expect. These values may not be related to either paging file activity or cache activity. Instead, these values may be caused by an application that is sequentially reading a memory-mapped file.
+Use Memory\Pages Input/sec and Memory\Pages Output/sec to determine page file I/O.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 1,000 on average.
]]>
+ Pages Input/sec is the rate at which pages are read from disk to resolve hard page faults. Hard page faults occur when a process refers to a page in virtual memory that is not in its working set or elsewhere in physical memory, and must be retrieved from disk. When a page is faulted, the system tries to read multiple contiguous pages into memory to maximize the benefit of the read operation. Compare the value of Memory\\Pages Input/sec to the value of Memory\\Page Reads/sec to determine the average number of pages read into memory during each read operation.
+TechNet Exchange Counter Description:
+Shows the rate at which pages are read from disk to resolve hard page faults. Hard page faults occur when a process refers to a page in virtual memory that is not in its working set or elsewhere in physical memory, and must be retrieved from disk. When a page is faulted, the system tries to read multiple contiguous pages into memory to maximize the benefit of the read operation. Compare the value of Memory\Pages Input/sec to the value of Memory\Page Reads/sec to determine the average number of pages read into memory during each read operation.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 1,000 on average.
]]>
+ Pages Output/sec is the rate at which pages are written to disk to free up space in physical memory. Pages are written back to disk only if they are changed in physical memory, so they are likely to hold data, not code. A high rate of pages output might indicate a memory shortage. Windows writes more pages back to disk to free up space when physical memory is in short supply. This counter shows the number of pages, and can be compared to other counts of pages, without conversion.
+TechNet Exchange Counter Description:
+Shows the rate at which pages are written to disk to free space in physical memory. Pages are written back to disk only if they are changed in physical memory, so they are likely to hold data, and not code. A high rate of pages output might indicate a memory shortage. Microsoft Windows writes more pages back to disk to free up space when physical memory is in short supply. This counter shows the number of pages, and can be compared to other counts of pages, without conversion.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 1,000 on average.
]]>Private Bytes is the current size, in bytes, of memory that this process has allocated that cannot be shared with other processes.
+TechNet Exchange Counter Description:
+Shows the current number of bytes this process has allocated that cannot be shared with other processes.
+This counter can be used for determining any memory leaks against processes.
+For the information store process, compare this counter value with database cache size to determine if there is a memory leak in the information store process. An increase in information store private bytes, together with the same increase in database cache, equals correct behavior (no memory leak).
+TechNet Exchange Guidance:
+Not applicable.
]]>Virtual Bytes is the current size, in bytes, of the virtual address space the process is using. Use of virtual address space does not necessarily imply corresponding use of either disk or main memory pages. Virtual space is finite, and the process can limit its ability to load libraries.
+TechNet Exchange Counter Description:
+Represents (in bytes) how much virtual address space the process is currently consuming.
+Used to determine if processes are consuming a large amount of virtual memory.
+TechNet Exchange Guidance:
+Not applicable.
]]>Working Set is the current size, in bytes, of the Working Set of this process. The Working Set is the set of memory pages touched recently by the threads in the process. If free memory in the computer is above a threshold, pages are left in the Working Set of a process even if they are not in use. When free memory falls below a threshold, pages are trimmed from Working Sets. If they are needed they will then be soft-faulted back into the Working Set before leaving main memory.
+TechNet Exchange Counter Description:
+Shows the current size, in bytes, of the working set of this process. The working set is the set of memory pages touched recently by the threads in the process. If free memory in the computer is above a threshold, pages are left in the working set of a process event if they are not in use. When free memory falls below a threshold, pages are trimmed from working sets. If they are needed, they will be soft-faulted back to the working set before leaving main memory.
+Large increases or decreases in working sets causes paging.
+Ensure that the paging file is set to the recommended value of RAM+10. If working sets are being trimmed, add Process(*)\Working set to see what processes are affected. This counter could indicate either system-wide or process-wide issues. Cross-reference this counter with Memory\System Cache Resident Bytes to see if system-wide working set trimming is occurring
+TechNet Exchange Guidance:
+Not applicable.
]]>The total number of handles currently open by this process. This number is equal to the sum of the handles currently open by each thread in this process.
+TechNet Exchange Counter Description:
+Shows the total number of handles currently open by this process. This number is the sum of the handles currently open by each thread in this process.
+An increase in handle counts for a particular process may be the symptom of a faulty process with handle leaks, which is causing performance issues on the server. This is not necessarily a problem, but is something to monitor over time to determine if a handle leak is occurring.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ % Time in GC is the percentage of elapsed time that was spent in performing a garbage collection (GC) since the last GC cycle. This counter is usually an indicator of the work done by the Garbage Collector on behalf of the application to collect and compact memory. This counter is updated only at the end of every GC and the counter value reflects the last observed value; its not an average.
+TechNet Exchange Counter Description:
+Shows when garbage collection has occurred. When the counter exceeds the threshold, it indicates that CPU is cleaning up and is not being used efficiently for load. Adding memory to the server would improve this situation.
+If this counter increases to a high value, there might be some objects that are surviving Gen 1 garbage collections and being promoted to Gen 2. Gen 2 collections require a full global catalog for clean up. Add other .NET memory counters to determine if this is the case.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 10% on average.
]]>
+ This counter displays the number of exceptions thrown per second. These include both .NET exceptions and unmanaged exceptions that get converted into .NET exceptions e.g. null pointer reference exception in unmanaged code would get re-thrown in managed code as a .NET System.NullReferenceException; this counter includes both handled and unhandled exceptions. Exceptions should only occur in rare situations and not in the normal control flow of the program; this counter was designed as an indicator of potential performance problems due to large (>100s) rate of exceptions thrown. This counter is not an average over time; it displays the difference between the values observed in the last two samples divided by the duration of the sample interval.
+TechNet Exchange Counter Description:
+Displays the number of exceptions thrown per second. These include both .NET exceptions and unmanaged exceptions that get converted into .NET exceptions. For example, the null pointer reference exception in unmanaged code would get thrown again in managed code as a .NET System.NullReferenceException; this counter includes both handled and unhandled exceptions. Exceptions should only occur in rare situations and not in the normal control flow of the program. This counter was designed as an indicator of potential performance problems due to a large (>100 sec) rate of exceptions thrown. This counter is not an average over time; it displays the difference between the values observed in the last two samples divided by the duration of the sample interval.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be less than 5% of total RPS (Web Service(_Total)\Connection Attempts/sec * .05).
]]>The rate that connections to the Web service are being attempted.
+TechNet Exchange Counter Description:
+Shows the rate that connections to the Web service are being attempted.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>This counter is the sum of four other counters; Gen 0 Heap Size; Gen 1 Heap Size; Gen 2 Heap Size and the Large Object Heap Size. This counter indicates the current memory allocated in bytes on the GC Heaps.
+TechNet Exchange Counter Description:
+Shows the sum of four other counters: Gen 0 Heap Size, Gen 1 Heap Size, Gen 2 Heap Size, and the Large Object Heap Size. This counter indicates the current memory allocated in bytes on the GC Heaps.
+These regions of memory are of type MEM_COMMIT. (For details, see Platform SDK documentation for VirtualAlloc.) The value of this counter is always less than the value of Process\Private Bytes, which counts all MEM_COMMIT regions for the process. Private Bytes minus # Bytes in all Heaps is the number of bytes committed by unmanaged objects.
+Used to monitor possible memory leaks or excessive memory usage of managed or unmanaged objects.
+TechNet Exchange Guidance:
+Not applicable.
]]>Bytes Total/sec is the rate at which bytes are sent and received over each network adapter, including framing characters. Network Interface\Bytes Total/sec is a sum of Network Interface\Bytes Received/sec and Network Interface\Bytes Sent/sec.
+TechNet Exchange Counter Description:
+Indicates the rate at which the network adapter is processing data bytes.
+This counter includes all application and file data, in addition to protocol information such as packet headers.
+TechNet Exchange Guidance:
+For a 100-Mbps network adapter, should be below 6 to 7 Mbps.
+For a 1000-Mbps network adapter, should be below 60 to 70 Mbps.
]]>
+ Packets Outbound Errors is the number of outbound packets that could not be transmitted because of errors.
+TechNet Exchange Counter Description:
+Indicates the number of outbound packets that could not be transmitted because of errors.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be 0 at all times.
]]>Datagrams/sec is the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
+TechNet Exchange Counter Description:
+Shows the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Datagrams/sec is the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
+TechNet Exchange Counter Description:
+Shows the rate, in incidents per second, at which IP datagrams were received from or sent to the interfaces, including those in error. Forwarded datagrams are not included in this rate.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Connections Established is the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
+TechNet Exchange Counter Description:
+Shows the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
+The number of TCP connections that can be established is constrained by the size of the nonpaged pool. When the nonpaged pool is depleted, no new connections can be established.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Connections Established is the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
+TechNet Exchange Counter Description:
+Shows the number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT.
+The number of TCP connections that can be established is constrained by the size of the nonpaged pool. When the nonpaged pool is depleted, no new connections can be established.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Segments Received/sec is the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
+TechNet Exchange Counter Description:
+Shows the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Segments Received/sec is the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
+TechNet Exchange Counter Description:
+Shows the rate at which segments are received, including those received in error. This count includes segments received on currently established connections.
+Determines current user load.
+TechNet Exchange Guidance:
+Not applicable.
]]>Connection Failures is the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
+TechNet Exchange Counter Description:
+Shows the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
+TechNet Exchange Guidance:
+An increasing number of failures, or a consistently increasing rate of failures, can indicate a bandwidth shortage.
]]>Connection Failures is the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
+TechNet Exchange Counter Description:
+Shows the number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state.
+TechNet Exchange Guidance:
+An increasing number of failures, or a consistently increasing rate of failures, can indicate a bandwidth shortage.
]]>Connections Reset is the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
+TechNet Exchange Counter Description:
+Shows the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
+Some browsers send TCP reset (RST) packets, so be cautious when using this counter to determine reset rate.
+TechNet Exchange Guidance:
+An increasing number of resets or a consistently increasing rate of resets can indicate a bandwidth shortage.
]]>Connections Reset is the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
+TechNet Exchange Counter Description:
+Shows the number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state.
+Some browsers send TCP reset (RST) packets, so be cautious when using this counter to determine reset rate.
+TechNet Exchange Guidance:
+An increasing number of resets or a consistently increasing rate of resets can indicate a bandwidth shortage.
]]>LDAP Searches/Sec is the number of LDAP search requests issued per second.
+TechNet Exchange Counter Description:
+Shows the number of Lightweight Directory Access Protocol (LDAP) search requests issued per second.
+Used to determine current LDAP search rate.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ LDAP Read Time for a given DC is the time (in ms) taken to send an LDAP read request to the specified DC and receive a response
+TechNet Exchange Counter Description:
+Shows the time in milliseconds (ms) to send an LDAP read request to the specified domain controller and receive a response.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
+ LDAP Search Time is the time (in ms) taken to send an LDAP search request and receive a response
+TechNet Exchange Counter Description:
+Shows the time (in ms) to send an LDAP search request and receive a response.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
+ LDAP Read Time is the time (in ms) taken to send an LDAP read request and receive a response
+TechNet Exchange Counter Description:
+Shows the time (in ms) to send an LDAP read request to the specified domain controller and receive a response.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
+ LDAP Search Time is the time (in ms) taken to send an LDAP search request and receive a response
+TechNet Exchange Counter Description:
+Shows the time (in ms) to send an LDAP search request and receive a response.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 50 ms on average with spikes (maximum values) of less than 100 ms.
]]>
+ LDAP Searches timed out per minute is the number of LDAP searches returned LDAP_TIMEOUT during the last minute
+TechNet Exchange Counter Description:
+Shows the number of LDAP searches that returned LDAP_Timeout during the last minute.
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be below 10 at all times for all roles.
+Higher values may indicate issues with Active Directory resources.
]]>
+ Long running LDAP operations/Min is the number of LDAP operations on this DC that took longer than the specified threshold per minute (default threshold is 15 seconds)
+TechNet Exchange Counter Description:
+Shows the number of LDAP operations on this domain controller that took longer than the specified threshold per minute. (Default threshold is 15 seconds.)
+TechNet Exchange Guidance:
+For all Exchange Server 2007 roles, this value should be less than 50 at all times.
+Higher values may indicate issues with Active Directory resources.
]]>
-
- Aggregate Delivery Queue Length (All Queues) is the number of messages queued for delivery in all queues.
-TechNet Exchange Counter Description:
-The number of messages queued for delivery in all queues.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 3,000 and not more than 5,000.
]]>
-
- Active Remote Delivery Queue Length is the number of messages in the active remote delivery queues.
-TechNet Exchange Counter Description:
-The number of messages in the active remote delivery queues.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 200. A value of 250 or more indicates a critical issue.
]]>
-
- Submission Queue Length is the number of messages in the Submission queue.
-TechNet Exchange Counter Description:
-The number of messages in the Submission queue.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 100. A value of 250 or more indicates a critical issue.
]]>
-
- Retry Remote Delivery Queue Length is the number of messages in retry in the remote delivery queues.
-TechNet Exchange Counter Description:
-The number of messages in the remote delivery queues that are waiting to be retried.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 100. A value of 250 or more indicates a critical issue. If this value is high, we recommend that you check the next hop to determine the causes for the long queues.
]]>
-
- Unreachable Queue Length is the number of messages in the Unreachable queue.
-TechNet Exchange Counter Description:
-The number of messages in the Unreachable queue.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 100. A value of 250 or more indicates a critical issue.
]]>
-
- Largest Delivery Queue Length is the number of messages in the largest delivery queue.
-TechNet Exchange Counter Description:
-The number of messages in the largest delivery queue.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 200 for the Edge Transport server. A value of 250 or more indicates a critical issue.
]]>
-
- Poison Message Queue Length is the number of messages in the poison message queue.
-TechNet Exchange Counter Description:
-The number of messages in the poison message queue.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be 0 at all times. A value of 5 or more indicates a critical issue.
]]>I/O Log Writes/sec is the rate of logfile write operations completed.
-TechNet Exchange Counter Description:
-The rate at which log file write operations are completed.
-TechNet Exchange Guidance:
-Not applicable.
]]>I/O Log Reads/sec is the rate of logfile read operations completed.
-TechNet Exchange Counter Description:
-The rate at which log file read operations are completed.
-TechNet Exchange Guidance:
-Not applicable.
]]>
-
- Log Generation Checkpoint Depth represents the amount of work, in count of log files, that will need to be redone or undone to the database file(s) if the process crashes.
-TechNet Exchange Counter Description:
-Represents the amount of work that will need to be redone or undone to the database files if the process fails. This is documented in the log file count.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 1,000 at all times. A value of 60,000 or more indicates a critical issue.
]]>
- Total number of version buckets allocated
-TechNet Exchange Counter Description:
-Shows the total number of version buckets that have been allocated for the database.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 200 at all times.
]]>I/O Database Reads/sec is the rate of database read operations completed.
-TechNet Exchange Counter Description:
-The rate at which database read operations are completed.
-TechNet Exchange Guidance:
-Not applicable.
]]>I/O Database Writes/sec is the rate of database write operations completed.
-TechNet Exchange Counter Description:
-The rate at which the database write operations are completed.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- Log Record Stalls/sec is the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
-TechNet Exchange Counter Description:
-Shows the number of log records that cannot be added each second to the log buffers because the log buffers are full. If this counter is not zero most of the time, the log buffer size may be a bottleneck.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 10 per second on average with spikes (maximum values) of less than 100 per second.
]]>
- Log Threads Waiting is the number of threads waiting for their data to be written to the log in order to complete an update of the database. If this number is too high, the log may be a bottleneck.
-TechNet Exchange Counter Description:
-Shows the number of threads waiting for their data to be written to the log to complete an update of the database. If this number is too high, the log may be a bottleneck.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 10 threads waiting on average.
]]>Database Cache Size (MB) is the amount of system memory (in MegaBytes) used by the database cache manager to hold commonly used information from the database file(s) to prevent file operations. If the database cache size seems to be too small for optimal performance and there is very little available memory on the system (see Memory/Available Bytes), adding more memory to the system may increase performance. If there is a lot of available memory on the system and the database cache size is not growing beyond a certain point, the database cache size may be capped at an artificially low limit. Increasing this limit may increase performance.
-TechNet Exchange Counter Description:
-Shows the amount of system memory, in megabytes, that is used by the database cache manager to hold commonly used information from the database files to prevent file operations. If the database cache size seems too small for optimal performance, and there is little available memory on the system, check the value of Memory/Available Bytes. If you add more memory to the system, it may increase performance. If there is enough memory on the system and if the database cache size is not growing, the database cache size may be capped at an artificially low limit. You can increase the database cache size limit to try and increase performance.
-TechNet Exchange Guidance:
-Not applicable.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, it takes to read data from the hard disk.
-NOTE: When looking at hard disks using Perfmon.exe, an understanding of the underlying hard disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, it takes to read data from the hard disk.
-NOTE: When looking at hard disks using Perfmon.exe, an understanding of the underlying hard disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Transfer is the time, in seconds, of the average disk transfer.
-TechNet Exchange Counter Description:
-Indicates how fast data is being moved (in seconds). Measures the average time of each data transfer, regardless of the number of bytes read or written.
-TechNet Exchange Guidance:
-For healthy hard disks, this counter shows approximately 20 ms. Counter values larger than 20 ms, or with large spikes, indicate a possible hard disk issue (for example, failure or slow speed).
]]>
- Avg. Disk sec/Transfer is the time, in seconds, of the average disk transfer.
-TechNet Exchange Counter Description:
-Indicates how fast data is being moved (in seconds). Measures the average time of each data transfer, regardless of the number of bytes read or written.
-TechNet Exchange Guidance:
-For healthy hard disks, this counter shows approximately 20 ms. Counter values larger than 20 ms, or with large spikes, indicate a possible hard disk issue (for example, failure or slow speed).
]]>Avg. Disk Queue Length is the average number of both read and write requests that were queued for the selected disk during the sample interval.
-TechNet Exchange Counter Description:
-The average number of read and write requests that were queried for the selected disk during the sample interval.
-TechNet Exchange Guidance:
-Not applicable.
]]>Avg. Disk Queue Length is the average number of both read and write requests that were queued for the selected disk during the sample interval.
-TechNet Exchange Counter Description:
-The average number of read and write requests that were queried for the selected disk during the sample interval.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Evaluated per second is the number of messages the rule has evaluated per second.
-TechNet Exchange Counter Description:
-Shows the number of messages the rule has evaluated per second.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Processed per second is the number of messages the rule has processed per second.
-TechNet Exchange Counter Description:
-Shows the number of messages the rule has processed per second.
-TechNet Exchange Guidance:
-Not applicable.
]]>LDAP Searches/sec is the rate at which LDAP clients perform search operations.
-TechNet Exchange Counter Description:
-Shows the rate at which LDAP clients perfom search operations.
-TechNet Exchange Guidance:
-Not applicable.
]]>LDAP Writes/sec is the rate at which LDAP clients perform write operations.
-TechNet Exchange Counter Description:
-Shows the rate at which LDAP clients perform write operations.
-TechNet Exchange Guidance:
-Not applicable.
]]>LDAP Searches/sec is the rate at which LDAP clients perform search operations.
-TechNet Exchange Counter Description:
-Shows the rate at which LDAP clients perfom search operations.
-TechNet Exchange Guidance:
-Not applicable.
]]>LDAP Writes/sec is the rate at which LDAP clients perform write operations.
-TechNet Exchange Counter Description:
-Shows the rate at which LDAP clients perform write operations.
-TechNet Exchange Guidance:
-Not applicable.
]]>Edge objects added/sec is the rate of Edge objects added per second by EdgeSync.
-TechNet Exchange Counter Description:
-Shows the rate of Edge objects added per second by EdgeSync.
-TechNet Exchange Guidance:
-Not applicable.
]]>Edge objects deleted/sec is the rate of Edge objects deleted per second by EdgeSync.
-TechNet Exchange Counter Description:
-Shows the rate of Edge objects deleted per second by EdgeSync.
-TechNet Exchange Guidance:
-Not applicable.
]]>Edge objects updated/sec is the rate of Edge objects updated per second by EdgeSync.
-TechNet Exchange Counter Description:
-Shows the rate of Edge objects updated per second by EdgeSync.
-TechNet Exchange Guidance:
-Not applicable.
]]>Scan jobs completed successfully total is the total number of scan jobs completed successfully by EdgeSync.
-TechNet Exchange Counter Description:
-Shows the total number of scan jobs completed successfully by EdgeSync.
-TechNet Exchange Guidance:
-Not applicable.
]]>Scan jobs failed because could not extend lock total is the total number of EdgeSync scan jobs that failed because EdgeSync could not extend its lease of an Edge Transport server.
-TechNet Exchange Counter Description:
-Shows the total number of EdgeSync scan jobs that failed because EdgeSync could not extend its lease of an Edge Transport server.
-TechNet Exchange Guidance:
-Not applicable.
]]>Scan jobs failed because of directory error total is the total number of EdgeSync directory errors.
-TechNet Exchange Counter Description:
-Shows the total number of EdgeSync directory errors.
-TechNet Exchange Guidance:
-Not applicable.
]]>Source objects scanned/sec is the rate of Active Directory objects scanned per second by EdgeSync.
-TechNet Exchange Counter Description:
-Shows the rate of Active Directory objects scanned per second by EdgeSync.
-TechNet Exchange Guidance:
-Not applicable.
]]>Target objects scanned/sec is the rate of Edge objects scanned per second by EdgeSync.
-TechNet Exchange Counter Description:
-Shows the rate of Edge objects scanned per second by EdgeSync.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Submitted Per Second is the number of messages that have been queued in the Submission queue per second.
-TechNet Exchange Counter Description:
-Shows the number of messages in the Submission queue per second.
-This counter determines current load, which can then be compared to the historical baselines.
-TechNet Exchange Guidance:
-To calculate the approximate expected values of these counters, use the following formula:
-Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Messages Completed Delivery Per Second is the number of messages that are delivered per second.
-TechNet Exchange Counter Description:
-Shows the number of messages that are delivered per second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-To calculate the approximate expected values of these counters, use the following formula:
-Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Items Completed Delivery Per Second is the number of items processed to completion. All recipients either succeeded or failed.
-TechNet Exchange Counter Description:
-Show the number of items processed to completion. All recipients either succeeded or failed.
-TechNet Exchange Guidance:
-To calculate the approximate expected values of these counters, use the following formula:
-Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Average Bytes/Message is the average number of message bytes per inbound message received.
-TechNet Exchange Counter Description:
-Shows the average number of message bytes per inbound message received.
-Determines sizes of messages being received for an Edge receive connector.
-TechNet Exchange Guidance:
-To calculate the approximate expected values of these counters, use the following formula:
-Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Messages Received/sec is the number of messages received by the SMTP server each second.
-TechNet Exchange Counter Description:
-Shows the number of messages received by the SMTP receive connector each second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-To calculate the approximate expected values of these counters, use the following formula:
-Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Messages Sent/sec is the number of messages sent by the SMTP Send connector each second.
-TechNet Exchange Counter Description:
-Shows the number of messages sent by the SMTP send connector each second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-To calculate the approximate expected values of these counters, use the following formula:
-Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>
- Average agent processing time in seconds per event.
-TechNet Exchange Counter Description:
-Shows the average agent processing time in seconds per event.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Edge Transport servers, this value should be less than 20 at all times.
-Sustained higher latencies may indicate that an agent is not responding.
]]>Recipients Rejected by Recipient Validation per second is the number of recipients rejected by recipient validation per second.
-TechNet Exchange Counter Description:
-Show the number of recipients rejected by recipient validation per second.
-TechNet Exchange Guidance:
-Not applicable.
]]>Recipients Rejected by Block List per second is the number of recipients rejected by block list per second.
-TechNet Exchange Counter Description:
-Show the number of recipients rejected by block list per second.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Filtered by Sender Filter per second is the number of messages filtered by the Sender Filter agent per second.
-TechNet Exchange Counter Description:
-Show the number of messages filtered by the Sender Filter agent per second.
-TechNet Exchange Guidance:
-Not applicable.
]]>DNS Queries per second is the number of DNS queries per second performed by the Sender Id agent.
-TechNet Exchange Counter Description:
-Shows the number of DNS queries per second performed by the Sender Id agent.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Attachment Filtered is the number of messages that were either blocked, attachment-stripped or silent-deleted (as per configuration) by the attachment filtering agent.
-TechNet Exchange Counter Description:
-Shows the number of messages that were blocked, stripped of attachments, or silently deleted (as per configuration) by the attachment filtering agent.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Filtered per second is the number of messages per second that the attachment filtering agent blocked, attachment-stripped or silent deleted. If this rate rises greatly beyond what is "normal" for the Exchange server, it may indicate that the organization is being flooded with malicious e-mail.
-TechNet Exchange Counter Description:
-Shows the number of messages per second that the attachment filtering agent blocked, stripped of attachments, or silently deleted. If this rate rises greatly beyond what is “normal†for the Exchange server, it may indicate that the organization is being flooded with malicious e-mail.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Deleted is the total number of messages that were deleted by Content Filter Agent.
-TechNet Exchange Counter Description:
-Shows the total number of messages that were deleted by Content Filter Agent.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Quarantined is the total number of messages that were quarantined by Content Filter Agent.
-TechNet Exchange Counter Description:
-Shows the total number of messages that were quarantined by Content Filter Agent.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages Rejected is the total number of messages that were rejected by Content Filter Agent.
-TechNet Exchange Counter Description:
-Shows the total number of messages that were rejected by Content Filter Agent.
-TechNet Exchange Guidance:
-Not applicable.
]]>The total number of messages that bypass scanning
-TechNet Exchange Counter Description:
-Shows the total number of messages that bypass scanning.
-TechNet Exchange Guidance:
-Not applicable.
]]>The number of messages scanned per second
-TechNet Exchange Counter Description:
-Shows the number of messages scanned per second.
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 0.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 1.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 2.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 3.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 4.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 5.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 6.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 7.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 8.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 9.
-TechNet Exchange Counter Description:
-Shows the total number of messages assigned to a particular SCL level
-TechNet Exchange Guidance:
-Not applicable.
]]>Block Senders is the total number of blocked senders.
-TechNet Exchange Counter Description:
-Shows the total number of blocked senders.
-TechNet Exchange Guidance:
-Not applicable.
]]>
+
+ Aggregate Delivery Queue Length (All Queues) is the number of messages queued for delivery in all queues.
+TechNet Exchange Counter Description:
+The number of messages queued for delivery in all queues.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 3,000 and not more than 5,000.
]]>
+
+ Active Remote Delivery Queue Length is the number of messages in the active remote delivery queues.
+TechNet Exchange Counter Description:
+The number of messages in the active remote delivery queues.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 200. A value of 250 or more indicates a critical issue.
]]>
+
+ Submission Queue Length is the number of messages in the Submission queue.
+TechNet Exchange Counter Description:
+The number of messages in the Submission queue.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 100. A value of 250 or more indicates a critical issue.
]]>
+
+ Retry Remote Delivery Queue Length is the number of messages in retry in the remote delivery queues.
+TechNet Exchange Counter Description:
+The number of messages in the remote delivery queues that are waiting to be retried.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 100. A value of 250 or more indicates a critical issue. If this value is high, we recommend that you check the next hop to determine the causes for the long queues.
]]>
+
+ Unreachable Queue Length is the number of messages in the Unreachable queue.
+TechNet Exchange Counter Description:
+The number of messages in the Unreachable queue.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 100. A value of 250 or more indicates a critical issue.
]]>
+
+ Largest Delivery Queue Length is the number of messages in the largest delivery queue.
+TechNet Exchange Counter Description:
+The number of messages in the largest delivery queue.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 200 for the Edge Transport server. A value of 250 or more indicates a critical issue.
]]>
+
+ Poison Message Queue Length is the number of messages in the poison message queue.
+TechNet Exchange Counter Description:
+The number of messages in the poison message queue.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be 0 at all times. A value of 5 or more indicates a critical issue.
]]>I/O Log Writes/sec is the rate of logfile write operations completed.
+TechNet Exchange Counter Description:
+The rate at which log file write operations are completed.
+TechNet Exchange Guidance:
+Not applicable.
]]>I/O Log Reads/sec is the rate of logfile read operations completed.
+TechNet Exchange Counter Description:
+The rate at which log file read operations are completed.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+
+ Log Generation Checkpoint Depth represents the amount of work, in count of log files, that will need to be redone or undone to the database file(s) if the process crashes.
+TechNet Exchange Counter Description:
+Represents the amount of work that will need to be redone or undone to the database files if the process fails. This is documented in the log file count.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 1,000 at all times. A value of 60,000 or more indicates a critical issue.
]]>
+ Total number of version buckets allocated
+TechNet Exchange Counter Description:
+Shows the total number of version buckets that have been allocated for the database.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 200 at all times.
]]>I/O Database Reads/sec is the rate of database read operations completed.
+TechNet Exchange Counter Description:
+The rate at which database read operations are completed.
+TechNet Exchange Guidance:
+Not applicable.
]]>I/O Database Writes/sec is the rate of database write operations completed.
+TechNet Exchange Counter Description:
+The rate at which the database write operations are completed.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ Log Record Stalls/sec is the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
+TechNet Exchange Counter Description:
+Shows the number of log records that cannot be added each second to the log buffers because the log buffers are full. If this counter is not zero most of the time, the log buffer size may be a bottleneck.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 10 per second on average with spikes (maximum values) of less than 100 per second.
]]>
+ Log Threads Waiting is the number of threads waiting for their data to be written to the log in order to complete an update of the database. If this number is too high, the log may be a bottleneck.
+TechNet Exchange Counter Description:
+Shows the number of threads waiting for their data to be written to the log to complete an update of the database. If this number is too high, the log may be a bottleneck.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 10 threads waiting on average.
]]>Database Cache Size (MB) is the amount of system memory (in MegaBytes) used by the database cache manager to hold commonly used information from the database file(s) to prevent file operations. If the database cache size seems to be too small for optimal performance and there is very little available memory on the system (see Memory/Available Bytes), adding more memory to the system may increase performance. If there is a lot of available memory on the system and the database cache size is not growing beyond a certain point, the database cache size may be capped at an artificially low limit. Increasing this limit may increase performance.
+TechNet Exchange Counter Description:
+Shows the amount of system memory, in megabytes, that is used by the database cache manager to hold commonly used information from the database files to prevent file operations. If the database cache size seems too small for optimal performance, and there is little available memory on the system, check the value of Memory/Available Bytes. If you add more memory to the system, it may increase performance. If there is enough memory on the system and if the database cache size is not growing, the database cache size may be capped at an artificially low limit. You can increase the database cache size limit to try and increase performance.
+TechNet Exchange Guidance:
+Not applicable.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, it takes to read data from the hard disk.
+NOTE: When looking at hard disks using Perfmon.exe, an understanding of the underlying hard disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, it takes to read data from the hard disk.
+NOTE: When looking at hard disks using Perfmon.exe, an understanding of the underlying hard disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is required to determine which counters (physical disk or logical disk) to look at.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Transfer is the time, in seconds, of the average disk transfer.
+TechNet Exchange Counter Description:
+Indicates how fast data is being moved (in seconds). Measures the average time of each data transfer, regardless of the number of bytes read or written.
+TechNet Exchange Guidance:
+For healthy hard disks, this counter shows approximately 20 ms. Counter values larger than 20 ms, or with large spikes, indicate a possible hard disk issue (for example, failure or slow speed).
]]>
+ Avg. Disk sec/Transfer is the time, in seconds, of the average disk transfer.
+TechNet Exchange Counter Description:
+Indicates how fast data is being moved (in seconds). Measures the average time of each data transfer, regardless of the number of bytes read or written.
+TechNet Exchange Guidance:
+For healthy hard disks, this counter shows approximately 20 ms. Counter values larger than 20 ms, or with large spikes, indicate a possible hard disk issue (for example, failure or slow speed).
]]>Avg. Disk Queue Length is the average number of both read and write requests that were queued for the selected disk during the sample interval.
+TechNet Exchange Counter Description:
+The average number of read and write requests that were queried for the selected disk during the sample interval.
+TechNet Exchange Guidance:
+Not applicable.
]]>Avg. Disk Queue Length is the average number of both read and write requests that were queued for the selected disk during the sample interval.
+TechNet Exchange Counter Description:
+The average number of read and write requests that were queried for the selected disk during the sample interval.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Evaluated per second is the number of messages the rule has evaluated per second.
+TechNet Exchange Counter Description:
+Shows the number of messages the rule has evaluated per second.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Processed per second is the number of messages the rule has processed per second.
+TechNet Exchange Counter Description:
+Shows the number of messages the rule has processed per second.
+TechNet Exchange Guidance:
+Not applicable.
]]>LDAP Searches/sec is the rate at which LDAP clients perform search operations.
+TechNet Exchange Counter Description:
+Shows the rate at which LDAP clients perfom search operations.
+TechNet Exchange Guidance:
+Not applicable.
]]>LDAP Writes/sec is the rate at which LDAP clients perform write operations.
+TechNet Exchange Counter Description:
+Shows the rate at which LDAP clients perform write operations.
+TechNet Exchange Guidance:
+Not applicable.
]]>LDAP Searches/sec is the rate at which LDAP clients perform search operations.
+TechNet Exchange Counter Description:
+Shows the rate at which LDAP clients perfom search operations.
+TechNet Exchange Guidance:
+Not applicable.
]]>LDAP Writes/sec is the rate at which LDAP clients perform write operations.
+TechNet Exchange Counter Description:
+Shows the rate at which LDAP clients perform write operations.
+TechNet Exchange Guidance:
+Not applicable.
]]>Edge objects added/sec is the rate of Edge objects added per second by EdgeSync.
+TechNet Exchange Counter Description:
+Shows the rate of Edge objects added per second by EdgeSync.
+TechNet Exchange Guidance:
+Not applicable.
]]>Edge objects deleted/sec is the rate of Edge objects deleted per second by EdgeSync.
+TechNet Exchange Counter Description:
+Shows the rate of Edge objects deleted per second by EdgeSync.
+TechNet Exchange Guidance:
+Not applicable.
]]>Edge objects updated/sec is the rate of Edge objects updated per second by EdgeSync.
+TechNet Exchange Counter Description:
+Shows the rate of Edge objects updated per second by EdgeSync.
+TechNet Exchange Guidance:
+Not applicable.
]]>Scan jobs completed successfully total is the total number of scan jobs completed successfully by EdgeSync.
+TechNet Exchange Counter Description:
+Shows the total number of scan jobs completed successfully by EdgeSync.
+TechNet Exchange Guidance:
+Not applicable.
]]>Scan jobs failed because could not extend lock total is the total number of EdgeSync scan jobs that failed because EdgeSync could not extend its lease of an Edge Transport server.
+TechNet Exchange Counter Description:
+Shows the total number of EdgeSync scan jobs that failed because EdgeSync could not extend its lease of an Edge Transport server.
+TechNet Exchange Guidance:
+Not applicable.
]]>Scan jobs failed because of directory error total is the total number of EdgeSync directory errors.
+TechNet Exchange Counter Description:
+Shows the total number of EdgeSync directory errors.
+TechNet Exchange Guidance:
+Not applicable.
]]>Source objects scanned/sec is the rate of Active Directory objects scanned per second by EdgeSync.
+TechNet Exchange Counter Description:
+Shows the rate of Active Directory objects scanned per second by EdgeSync.
+TechNet Exchange Guidance:
+Not applicable.
]]>Target objects scanned/sec is the rate of Edge objects scanned per second by EdgeSync.
+TechNet Exchange Counter Description:
+Shows the rate of Edge objects scanned per second by EdgeSync.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Submitted Per Second is the number of messages that have been queued in the Submission queue per second.
+TechNet Exchange Counter Description:
+Shows the number of messages in the Submission queue per second.
+This counter determines current load, which can then be compared to the historical baselines.
+TechNet Exchange Guidance:
+To calculate the approximate expected values of these counters, use the following formula:
+Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Messages Completed Delivery Per Second is the number of messages that are delivered per second.
+TechNet Exchange Counter Description:
+Shows the number of messages that are delivered per second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+To calculate the approximate expected values of these counters, use the following formula:
+Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Items Completed Delivery Per Second is the number of items processed to completion. All recipients either succeeded or failed.
+TechNet Exchange Counter Description:
+Show the number of items processed to completion. All recipients either succeeded or failed.
+TechNet Exchange Guidance:
+To calculate the approximate expected values of these counters, use the following formula:
+Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Average Bytes/Message is the average number of message bytes per inbound message received.
+TechNet Exchange Counter Description:
+Shows the average number of message bytes per inbound message received.
+Determines sizes of messages being received for an Edge receive connector.
+TechNet Exchange Guidance:
+To calculate the approximate expected values of these counters, use the following formula:
+Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Messages Received/sec is the number of messages received by the SMTP server each second.
+TechNet Exchange Counter Description:
+Shows the number of messages received by the SMTP receive connector each second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+To calculate the approximate expected values of these counters, use the following formula:
+Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>Messages Sent/sec is the number of messages sent by the SMTP Send connector each second.
+TechNet Exchange Counter Description:
+Shows the number of messages sent by the SMTP send connector each second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+To calculate the approximate expected values of these counters, use the following formula:
+Queue threshold = (SLA in seconds) x (average number of messages handled per second)
]]>
+ Average agent processing time in seconds per event.
+TechNet Exchange Counter Description:
+Shows the average agent processing time in seconds per event.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Edge Transport servers, this value should be less than 20 at all times.
+Sustained higher latencies may indicate that an agent is not responding.
]]>Recipients Rejected by Recipient Validation per second is the number of recipients rejected by recipient validation per second.
+TechNet Exchange Counter Description:
+Show the number of recipients rejected by recipient validation per second.
+TechNet Exchange Guidance:
+Not applicable.
]]>Recipients Rejected by Block List per second is the number of recipients rejected by block list per second.
+TechNet Exchange Counter Description:
+Show the number of recipients rejected by block list per second.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Filtered by Sender Filter per second is the number of messages filtered by the Sender Filter agent per second.
+TechNet Exchange Counter Description:
+Show the number of messages filtered by the Sender Filter agent per second.
+TechNet Exchange Guidance:
+Not applicable.
]]>DNS Queries per second is the number of DNS queries per second performed by the Sender Id agent.
+TechNet Exchange Counter Description:
+Shows the number of DNS queries per second performed by the Sender Id agent.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Attachment Filtered is the number of messages that were either blocked, attachment-stripped or silent-deleted (as per configuration) by the attachment filtering agent.
+TechNet Exchange Counter Description:
+Shows the number of messages that were blocked, stripped of attachments, or silently deleted (as per configuration) by the attachment filtering agent.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Filtered per second is the number of messages per second that the attachment filtering agent blocked, attachment-stripped or silent deleted. If this rate rises greatly beyond what is "normal" for the Exchange server, it may indicate that the organization is being flooded with malicious e-mail.
+TechNet Exchange Counter Description:
+Shows the number of messages per second that the attachment filtering agent blocked, stripped of attachments, or silently deleted. If this rate rises greatly beyond what is “normal†for the Exchange server, it may indicate that the organization is being flooded with malicious e-mail.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Deleted is the total number of messages that were deleted by Content Filter Agent.
+TechNet Exchange Counter Description:
+Shows the total number of messages that were deleted by Content Filter Agent.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Quarantined is the total number of messages that were quarantined by Content Filter Agent.
+TechNet Exchange Counter Description:
+Shows the total number of messages that were quarantined by Content Filter Agent.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages Rejected is the total number of messages that were rejected by Content Filter Agent.
+TechNet Exchange Counter Description:
+Shows the total number of messages that were rejected by Content Filter Agent.
+TechNet Exchange Guidance:
+Not applicable.
]]>The total number of messages that bypass scanning
+TechNet Exchange Counter Description:
+Shows the total number of messages that bypass scanning.
+TechNet Exchange Guidance:
+Not applicable.
]]>The number of messages scanned per second
+TechNet Exchange Counter Description:
+Shows the number of messages scanned per second.
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 0.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 1.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 2.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 3.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 4.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 5.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 6.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 7.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 8.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Messages with SCL 0 is the total number of messages assigned an SCL rating of 9.
+TechNet Exchange Counter Description:
+Shows the total number of messages assigned to a particular SCL level
+TechNet Exchange Guidance:
+Not applicable.
]]>Block Senders is the total number of blocked senders.
+TechNet Exchange Counter Description:
+Shows the total number of blocked senders.
+TechNet Exchange Guidance:
+Not applicable.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 20 ms on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 20 ms on average with spikes (maximum values) of less than 50 ms.
]]>
-
- Aggregate Delivery Queue Length (All Queues) is the number of messages queued for delivery in all queues.
-TechNet Exchange Counter Description:
-Shows the number of messages queued for delivery in all queues.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 3,000 and not more than 5,000.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Active Remote Delivery Queue Length is the number of messages in the active remote delivery queues.
-TechNet Exchange Counter Description:
-Shows the number of messages in the active remote delivery queues.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 250 at all times.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Active Mailbox Delivery Queue Length is the number of messages in the active mailbox queues.
-TechNet Exchange Counter Description:
-Shows the number of messages in the active mailbox queues.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 250 at all times.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Submission Queue Length is the number of messages in the Submission queue.
-TechNet Exchange Counter Description:
-Shows the number of messages in the submission queue.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
-If sustained high values are occurring, investigate Active Directory and Mailbox servers for bottlenecks or performance-related issues.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Active Non-SMTP Delivery Queue Length is the number of messages in the Drop directory that is used by a Foreign connector.
-TechNet Exchange Counter Description:
-Shows the number of messages in the Drop directory that is used by a Foreign connector.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 250 at all times.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Retry Mailbox Delivery Queue Length is the number of messages in retry.
-TechNet Exchange Counter Description:
-Shows the number of messages in a retry state attempting to deliver a message to a remote mailbox.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 100 at all times.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Retry Non-SMTP Delivery Queue Length is the number of messages in retry in the non-SMTP gateway delivery queues.
-TechNet Exchange Counter Description:
-Shows the number of messages in a retry state in the non-Simple Mail Transfer Protocol (SMTP) gateway delivery queues.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Retry Remote Delivery Queue Length is the number of messages in retry in the remote delivery queues.
-TechNet Exchange Counter Description:
-Shows the number of messages in a retry state in the remote delivery queues.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
-We recommend that you check the next hop to determine the causes for queuing.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Unreachable Queue Length is the number of messages in the Unreachable queue.
-TechNet Exchange Counter Description:
-Shows the number of messages in the Unreachable queue.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
- Largest Delivery Queue Length is the number of messages in the largest delivery queue.
-TechNet Exchange Counter Description:
-Shows the number of messages in the largest delivery queues.
-TechNet Exchange Guidance:
-Tthis value should be less than 200 for the Edge Transport and Hub Transport server roles.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>Poison Message Queue Length is the number of messages in the poison message queue.
-TechNet Exchange Counter Description:
-Shows the number of messages in the poison message queue.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be 0 at all times.
-NOTE:
-When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
-For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
-Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>Messages Submitted Per Second is the number of messages that have been queued in the Submission queue per second.
-TechNet Exchange Counter Description:
-Shows the number of messages that have been queued in the Submission queue per second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Messages Completed Delivery Per Second is the number of messages that are delivered per second.
-TechNet Exchange Counter Description:
-Shows the number of messages that are delivered per second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Inbound: Local Delivery Calls Per Second is the number of local delivery attempts per second.
-TechNet Exchange Counter Description:
-Shows the number of local delivery attempts per second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Outbound: Submitted Mail Items Per Second is the number of mail items per second being submitted.
-TechNet Exchange Counter Description:
-Shows the number of mail items per second being submitted.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Average Bytes/Message is the average number of message bytes per inbound message received.
-TechNet Exchange Counter Description:
-Shows the average number of message bytes per inbound message received.
-Determines sizes of messages being received for an SMTP receive connector.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Messages Received/sec is the number of messages received by the SMTP server each second.
-TechNet Exchange Counter Description:
-Shows the number of messages received by the SMTP server each second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Messages Sent/sec is the number of messages sent by the SMTP Send connector each second.
-TechNet Exchange Counter Description:
-Shows the number of messages sent by the SMTP send connector each second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Inbound: Message Delivery Attempts Per Second is the number of attempts for delivering transport mailitem per second.
-TechNet Exchange Counter Description:
-Shows the number of attempts for delivering transport mail items per second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Recipients delivered per second
-TechNet Exchange Counter Description:
-Shows the number of inbound recipients delivered per second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Messages Queued for Delivery Per Second is the number of messages that have been queued for delivery per second.
-TechNet Exchange Counter Description:
-Shows the number of messages that have been queued for delivery per second.
-Determines current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Item Size is the total size (in bytes) of mail items that are currently in the Transport Dumpster on this server.
-TechNet Exchange Counter Description:
-Shows the total size (in bytes) of mail items that are currently in the transport dumpster on this server.
-Shows overall size of the transport dumpster.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Insert Rate is the rate at which items are inserted into the Transport Dumpster on this server.
-TechNet Exchange Counter Description:
-Shows the rate at which items are inserted into the transport dumpster on this server.
-Determines the current rate of transport dumpster inserts.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Item Count is the total number of mail items that are currently in the Transport Dumpster on this server.
-TechNet Exchange Counter Description:
-Shows the total number of mail items that are currently in the transport dumpster on this server.
-Shows the current amount of items that are being held in the transport dumpster.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Delete Rate is the rate at which items are deleted from the Transport Dumpster on this server.
-TechNet Exchange Counter Description:
-Shows the rate at which items are deleted from the transport dumpster on this server.
-Determines the current rate of transport dumpster deletions.
-TechNet Exchange Guidance:
-Not Applicable.
]]>I/O Log Writes/sec is the rate of logfile write operations completed.
-TechNet Exchange Counter Description:
-Shows the rate of log file write operations completed.
-Determines the current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>I/O Log Reads/sec is the rate of logfile read operations completed.
-TechNet Exchange Counter Description:
-Shows the rate of log file read operations completed.
-Determines the current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>InstancesLogGenerationCheckpointDepth -Operator 'ge' -Threshold 1000]]>
- Log Generation Checkpoint Depth represents the amount of work, in count of log files, that will need to be redone or undone to the database file(s) if the process crashes.
-TechNet Exchange Counter Description:
-Represents the amount of work, in count of log files, that need to be redone or undone to the database files if the process fails.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 1,000 at all times.
]]>InstancesVersionbucketsallocated -Operator 'ge' -Threshold 200]]>
- Total number of version buckets allocated
-TechNet Exchange Counter Description:
-Total number of version buckets allocated
-Shows the default backpressure values as listed in the edgetransport.exe.config file.
-Exchange 2007 release to manufacturing (RTM) version - Medium=60, High=100
-Exchange 2007 SP1 version – Medium=120, High=200
-For more database cache size guidance, see New maximum database cache size guidance for Exchange 2007 Hub Transport server role.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 200 at all times.
]]>I/O Database Reads/sec is the rate of database read operations completed.
-TechNet Exchange Counter Description:
-Shows the rate of database read operations completed.
-Determines the current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>I/O Database Writes/sec is the rate of database write operations completed.
-TechNet Exchange Counter Description:
-Shows the rate of database write operations completed.
-Determines the current load. Compare values to historical baselines.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- Log Record Stalls/sec is the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
-TechNet Exchange Counter Description:
-Shows the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 10 per second on average with spikes (maximum values) of less than 100 per second.
]]>
- Log Threads Waiting is the number of threads waiting for their data to be written to the log in order to complete an update of the database. If this number is too high, the log may be a bottleneck.
-TechNet Exchange Counter Description:
-Shows the number of threads waiting for their data to be written to the log to complete an update of the database. If this number is too high, the log may be a bottleneck.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 10 threads waiting on average.
]]>
- Average agent processing time in seconds per event.
-TechNet Exchange Counter Description:
-Shows the average agent processing time in seconds per event.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Hub Transport servers, this value should be less than 20 at all times.
-Sustained higher latencies may indicate a hung agent.
]]>Total number of agent invocations since the last restart.
-TechNet Exchange Counter Description:
-Shows the total number of invocations since the last restart.
-Shows the current invocation rate.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 20 ms on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 20 milliseconds (ms) on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 20 ms on average with spikes (maximum values) of less than 50 ms.
]]>
+
+ Aggregate Delivery Queue Length (All Queues) is the number of messages queued for delivery in all queues.
+TechNet Exchange Counter Description:
+Shows the number of messages queued for delivery in all queues.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 3,000 and not more than 5,000.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Active Remote Delivery Queue Length is the number of messages in the active remote delivery queues.
+TechNet Exchange Counter Description:
+Shows the number of messages in the active remote delivery queues.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 250 at all times.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Active Mailbox Delivery Queue Length is the number of messages in the active mailbox queues.
+TechNet Exchange Counter Description:
+Shows the number of messages in the active mailbox queues.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 250 at all times.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Submission Queue Length is the number of messages in the Submission queue.
+TechNet Exchange Counter Description:
+Shows the number of messages in the submission queue.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
+If sustained high values are occurring, investigate Active Directory and Mailbox servers for bottlenecks or performance-related issues.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Active Non-SMTP Delivery Queue Length is the number of messages in the Drop directory that is used by a Foreign connector.
+TechNet Exchange Counter Description:
+Shows the number of messages in the Drop directory that is used by a Foreign connector.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 250 at all times.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Retry Mailbox Delivery Queue Length is the number of messages in retry.
+TechNet Exchange Counter Description:
+Shows the number of messages in a retry state attempting to deliver a message to a remote mailbox.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 100 at all times.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Retry Non-SMTP Delivery Queue Length is the number of messages in retry in the non-SMTP gateway delivery queues.
+TechNet Exchange Counter Description:
+Shows the number of messages in a retry state in the non-Simple Mail Transfer Protocol (SMTP) gateway delivery queues.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Retry Remote Delivery Queue Length is the number of messages in retry in the remote delivery queues.
+TechNet Exchange Counter Description:
+Shows the number of messages in a retry state in the remote delivery queues.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
+We recommend that you check the next hop to determine the causes for queuing.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Unreachable Queue Length is the number of messages in the Unreachable queue.
+TechNet Exchange Counter Description:
+Shows the number of messages in the Unreachable queue.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should not exceed 100.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>
+ Largest Delivery Queue Length is the number of messages in the largest delivery queue.
+TechNet Exchange Counter Description:
+Shows the number of messages in the largest delivery queues.
+TechNet Exchange Guidance:
+Tthis value should be less than 200 for the Edge Transport and Hub Transport server roles.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>Poison Message Queue Length is the number of messages in the poison message queue.
+TechNet Exchange Counter Description:
+Shows the number of messages in the poison message queue.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be 0 at all times.
+NOTE:
+When determining thresholds for queue lengths, you should consider server throughput as well as acceptable delivery delays before you need to be informed. The recommended maximum of 5,000 for the \MSExchangeTransport Queues(_total)\Aggregate Delivery Queue Length (All Queues) counter is for typical medium-size servers and typical maximum delays. You can consider customizing both the aggregate and other limits to optimize performance for your environment.
+For example, if the server normally processes 25 messages per second and you want the average message delay to be no more than 5 minutes before you are alerted, you would set the threshold to be 25 messages/second x 5 minutes x 60 seconds/minute = 7,500 messages. However, if your server only processes an average of 5 messages/second, you would set the alert to be only 1,500 messages.
+Keep in mind that this calculation does not mean that messages will always be delivered in under 5 minutes. Instead, the average delivery time for one particular server should be under 5 minutes. (Consider how many hops your messages may need to determine actual delivery time.) Also, consider the server hardware in your calculations because having large message queues can be a drain on server resources.
]]>Messages Submitted Per Second is the number of messages that have been queued in the Submission queue per second.
+TechNet Exchange Counter Description:
+Shows the number of messages that have been queued in the Submission queue per second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Messages Completed Delivery Per Second is the number of messages that are delivered per second.
+TechNet Exchange Counter Description:
+Shows the number of messages that are delivered per second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Inbound: Local Delivery Calls Per Second is the number of local delivery attempts per second.
+TechNet Exchange Counter Description:
+Shows the number of local delivery attempts per second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Outbound: Submitted Mail Items Per Second is the number of mail items per second being submitted.
+TechNet Exchange Counter Description:
+Shows the number of mail items per second being submitted.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Average Bytes/Message is the average number of message bytes per inbound message received.
+TechNet Exchange Counter Description:
+Shows the average number of message bytes per inbound message received.
+Determines sizes of messages being received for an SMTP receive connector.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Messages Received/sec is the number of messages received by the SMTP server each second.
+TechNet Exchange Counter Description:
+Shows the number of messages received by the SMTP server each second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Messages Sent/sec is the number of messages sent by the SMTP Send connector each second.
+TechNet Exchange Counter Description:
+Shows the number of messages sent by the SMTP send connector each second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Inbound: Message Delivery Attempts Per Second is the number of attempts for delivering transport mailitem per second.
+TechNet Exchange Counter Description:
+Shows the number of attempts for delivering transport mail items per second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Recipients delivered per second
+TechNet Exchange Counter Description:
+Shows the number of inbound recipients delivered per second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Messages Queued for Delivery Per Second is the number of messages that have been queued for delivery per second.
+TechNet Exchange Counter Description:
+Shows the number of messages that have been queued for delivery per second.
+Determines current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Item Size is the total size (in bytes) of mail items that are currently in the Transport Dumpster on this server.
+TechNet Exchange Counter Description:
+Shows the total size (in bytes) of mail items that are currently in the transport dumpster on this server.
+Shows overall size of the transport dumpster.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Insert Rate is the rate at which items are inserted into the Transport Dumpster on this server.
+TechNet Exchange Counter Description:
+Shows the rate at which items are inserted into the transport dumpster on this server.
+Determines the current rate of transport dumpster inserts.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Item Count is the total number of mail items that are currently in the Transport Dumpster on this server.
+TechNet Exchange Counter Description:
+Shows the total number of mail items that are currently in the transport dumpster on this server.
+Shows the current amount of items that are being held in the transport dumpster.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Delete Rate is the rate at which items are deleted from the Transport Dumpster on this server.
+TechNet Exchange Counter Description:
+Shows the rate at which items are deleted from the transport dumpster on this server.
+Determines the current rate of transport dumpster deletions.
+TechNet Exchange Guidance:
+Not Applicable.
]]>I/O Log Writes/sec is the rate of logfile write operations completed.
+TechNet Exchange Counter Description:
+Shows the rate of log file write operations completed.
+Determines the current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>I/O Log Reads/sec is the rate of logfile read operations completed.
+TechNet Exchange Counter Description:
+Shows the rate of log file read operations completed.
+Determines the current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>InstancesLogGenerationCheckpointDepth -Operator 'ge' -Threshold 1000]]>
+ Log Generation Checkpoint Depth represents the amount of work, in count of log files, that will need to be redone or undone to the database file(s) if the process crashes.
+TechNet Exchange Counter Description:
+Represents the amount of work, in count of log files, that need to be redone or undone to the database files if the process fails.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 1,000 at all times.
]]>InstancesVersionbucketsallocated -Operator 'ge' -Threshold 200]]>
+ Total number of version buckets allocated
+TechNet Exchange Counter Description:
+Total number of version buckets allocated
+Shows the default backpressure values as listed in the edgetransport.exe.config file.
+Exchange 2007 release to manufacturing (RTM) version - Medium=60, High=100
+Exchange 2007 SP1 version – Medium=120, High=200
+For more database cache size guidance, see New maximum database cache size guidance for Exchange 2007 Hub Transport server role.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 200 at all times.
]]>I/O Database Reads/sec is the rate of database read operations completed.
+TechNet Exchange Counter Description:
+Shows the rate of database read operations completed.
+Determines the current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>I/O Database Writes/sec is the rate of database write operations completed.
+TechNet Exchange Counter Description:
+Shows the rate of database write operations completed.
+Determines the current load. Compare values to historical baselines.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ Log Record Stalls/sec is the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
+TechNet Exchange Counter Description:
+Shows the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 10 per second on average with spikes (maximum values) of less than 100 per second.
]]>
+ Log Threads Waiting is the number of threads waiting for their data to be written to the log in order to complete an update of the database. If this number is too high, the log may be a bottleneck.
+TechNet Exchange Counter Description:
+Shows the number of threads waiting for their data to be written to the log to complete an update of the database. If this number is too high, the log may be a bottleneck.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 10 threads waiting on average.
]]>
+ Average agent processing time in seconds per event.
+TechNet Exchange Counter Description:
+Shows the average agent processing time in seconds per event.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Hub Transport servers, this value should be less than 20 at all times.
+Sustained higher latencies may indicate a hung agent.
]]>Total number of agent invocations since the last restart.
+TechNet Exchange Counter Description:
+Shows the total number of invocations since the last restart.
+Shows the current invocation rate.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
-
-
-
-
-
-
-
-
- 75 Then
- IsAvgThresholdBroken = True
- End If
-
- If MaxProcessor > 75 Then
- IsMaxThresholdBroken = True
- End If
- ]]>
- 90 Then
- IsAvgThresholdBroken = True
- End If
-
- If MaxProcessor > 90 Then
- IsMaxThresholdBroken = True
- End If
- ]]>
- 0 AND AvgNetworkInterfaceCurrentBandwidth > 0 Then
- PercentNetworkUtilizationThreshold = 50
- BitsPerSec = Round(AvgNetworkInterfaceBytesTotalPerSec) * 8
- PercentOfNetworkUtilized = (BitsPerSec * 100) / AvgNetworkInterfaceCurrentBandwidth
- If AvgNetworkInterfaceBytesTotalPerSec > 0 Then
- If PercentOfNetworkUtilized > PercentNetworkUtilizationThreshold Then
- IsAvgThresholdBroken = True
- End If
- End If
-End If]]>
- 0 AND AvgNetworkInterfaceCurrentBandwidth > 0 Then
- PercentNetworkUtilizationThreshold = 80
- BitsPerSec = Round(AvgNetworkInterfaceBytesTotalPerSec) * 8
- PercentOfNetworkUtilized = (BitsPerSec * 100) / AvgNetworkInterfaceCurrentBandwidth
- If AvgNetworkInterfaceBytesTotalPerSec > 0 Then
- If PercentOfNetworkUtilized > PercentNetworkUtilizationThreshold Then
- IsAvgThresholdBroken = True
- End If
- End If
-End If]]>
- (NumberOfProcessors * 10) Then
- IsAvgThresholdBroken = True
-End If
-If MaxQueue => (NumberOfProcessors * 10) Then
- IsMaxThresholdBroken = True
-End If
- ]]>
- (NumberOfProcessors * 20) Then
- IsAvgThresholdBroken = True
- End If
- If MaxQueue => (NumberOfProcessors * 20) Then
- IsMaxThresholdBroken = True
- End If
- ]]>
- 0 Then
- PercentageOfPrivilegedModeCPUToTotalCPU = (AvgPrivProcessor * 100) / AvgProcessor
- If PercentageOfPrivilegedModeCPUToTotalCPU > 30 AND AvgProcessor > 50 Then
- IsAvgThresholdBroken = True
- End If
-End If]]>
- 0 Then
- PercentageOfPrivilegedModeCPUToTotalCPU = (MaxPrivProcessor * 100) / MaxProcessor
- If PercentageOfPrivilegedModeCPUToTotalCPU > 30 AND MaxProcessor > 50 Then
- IsMaxThresholdBroken = True
- End If
-End If]]>
- 1 Then
- IsAvgThresholdBroken = True
-End If]]>
- 2 Then
- IsAvgThresholdBroken = True
-End If
-
- ]]>
- 0 AND AvgProcessor > 0 AND AvgContextSwitchesPerSec > 0 AND NumberOfProcessors > 0 Then
- PercentageOfPrivilegedModeCPUToTotalCPU = (AvgPrivProcessor * 100) / AvgProcessor
- If PercentageOfPrivilegedModeCPU > 20 AND AvgProcessor > 50 AND (AvgContextSwitchesPerSec / NumberOfProcessors) > 5000 Then
- IsAvgThresholdBroken = True
- End If
- End If
- ]]>
- 0 AND AvgProcessor > 0 AND AvgContextSwitchesPerSec > 0 AND NumberOfProcessors > 0 Then
- PercentageOfPrivilegedModeCPUToTotalCPU = (AvgPrivProcessor * 100) / AvgProcessor
- If PercentageOfPrivilegedModeCPUToTotalCPU > 20 AND AvgProcessor > 50 AND (AvgContextSwitchesPerSec / NumberOfProcessors) > 15000 Then
- IsAvgThresholdBroken = True
- End If
- End If
- ]]>
- 0 AND MaxPercentProcessorTime > 0 AND NumberOfProcessors > 0 Then
- AvgProcessorTimeDividedByNumberOfProcessors = AvgPercentProcessorTime / NumberOfProcessors
- MaxProcessorTimeDividedByNumberOfProcessors = MaxPercentProcessorTime / NumberOfProcessors
- If AvgProcessorTimeDividedByNumberOfProcessors > 60 Then
- IsAvgThresholdBroken = True
- End If
- If MaxProcessorTimeDividedByNumberOfProcessors > 60 Then
- IsMaxThresholdBroken = True
- End If
-End If
- ]]>
- 0 AND MaxPercentProcessorTime > 0 AND NumberOfProcessors > 0 Then
- AvgProcessorTimeDividedByNumberOfProcessors = AvgPercentProcessorTime / NumberOfProcessors
- MaxProcessorTimeDividedByNumberOfProcessors = MaxPercentProcessorTime / NumberOfProcessors
- If AvgProcessorTimeDividedByNumberOfProcessors > 80 Then
- IsAvgThresholdBroken = True
- End If
- If MaxProcessorTimeDividedByNumberOfProcessors > 80 Then
- IsMaxThresholdBroken = True
- End If
-End If]]>
- 30 Then
- IsAvgThreshold = True
-End If]]>
- 50 Then
- IsAvgThreshold = True
-End If]]>
- 70 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxPagingFile_PercentUsage > 70 Then
- IsMaxThresholdBroken = True
-End If]]>
- 70 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxPagingFile_PercentUsagePeak > 70 Then
- IsMaxThresholdBroken = True
-End If]]>
- 50 Then
-IsAvgThresholdBroken = True
-End If
-
-]]>
- 100 Then
-IsAvgThresholdBroken = True
-End If
-]]>
- 500 Then
-IsMaxThresholdBroken = True
-End If]]>
- 50 Then
-IsAvgThresholdBroken = True
-End If
-]]>
- 100 Then
-IsAvgThresholdBroken = True
-End If
-]]>
- 500 Then
-IsMaxThresholdBroken = True
-End If]]>
- 10 Then
-IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeADAccessDomainControllers_LDAPSearchestimedoutperminute >10 Then
-IsMaxThresholdBroken = True
-End If
-]]>
- 20 Then
-IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeADAccessDomainControllers_LDAPSearchestimedoutperminute >20 Then
-IsMaxThresholdBroken = True
-End If
-]]>
- 50 Then
-IsAvgThresholdBroken = True
-End If
-
-If AvgMSExchangeADAccessDomainControllers_LongrunningLDAPoperationsMin >50 Then
-IsMaxThresholdBroken = True
-End If
-]]>
- 100 Then
-IsAvgThresholdBroken = True
-End If
-
-If AvgMSExchangeADAccessDomainControllers_LongrunningLDAPoperationsMin >100 Then
-IsMaxThresholdBroken = True
-End If]]>
- 1 Then
-IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase_DatabasePageFaultStallssec > 1 Then
-IsMaxThresholdBroken = True
-End If
-]]>
- If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]> 249 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_ActiveMailboxDeliveryQueueLength > 249 Then
- IsMaxThresholdBroken = True
-End If]]>
- 199 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_ActiveMailboxDeliveryQueueLength > 199 Then
- IsMaxThresholdBroken = True
-End If]]>
- 249 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_ActiveNon-SmtpDeliveryQueueLength > 249 Then
- IsMaxThresholdBroken = True
-End If]]>
- 199 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_ActiveNon-SmtpDeliveryQueueLength > 199 Then
- IsMaxThresholdBroken = True
-End If]]>
- 199 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_ActiveRemoteDeliveryQueueLength > 199 Then
- IsMaxThresholdBroken = True
-End If]]>
- 349 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_ActiveRemoteDeliveryQueueLength > 349 Then
- IsMaxThresholdBroken = True
-End If]]>
- To resolve this problem, do one or more of the following:
- 99 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_RetryMailboxDeliveryQueueLength > 99 Then
- IsMaxThresholdBroken = True
-End If]]>
- 249 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_RetryMailboxDeliveryQueueLength > 249 Then
- IsMaxThresholdBroken = True
-End If]]>
- Messages that cannot be successfully delivered are subject to various retry, resubmit, and expiration deadlines based on the message's source and destination. Retry is a renewed connection attempt with the destination domain, smart host, or Mailbox server. Resubmit is the act of sending messages back to the Submission queue for the categorizer to reprocess. The message is said to "time-out", or expire, after all delivery efforts have failed during a specified period of time. After a message expires, the sender is notified of the delivery failure. Then the message is deleted from the queue.
-In all three cases of retry, resubmit, or expire, you can manually intervene before the automatic actions are performed on the messages.
-This problem may be caused by any of the following:
-Determine the root cause of the issue by doing one or more of the following:
- 99 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_RetryNon-SmtpDeliveryQueueLength > 99 Then
- IsMaxThresholdBroken = True
-End If]]>
- 99 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_RetryNon-SmtpDeliveryQueueLength > 99 Then
- IsMaxThresholdBroken = True
-End If]]>
- 249 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_RetryRemoteDeliveryQueueLength > 249 Then
- IsMaxThresholdBroken = True
-End If]]>
- 99 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_RetryRemoteDeliveryQueueLength > 99 Then
- IsMaxThresholdBroken = True
-End If]]>
- 249 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_SubmissionQueueLength > 249 Then
- IsMaxThresholdBroken = True
-End If]]>
- 99 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_SubmissionQueueLength > 99 Then
- IsMaxThresholdBroken = True
-End If]]>
- A sustained high Submission Queue Length value may indicate that an excessive amount of inbound messages have over-loaded the categorizer. It may also indicate that there is an issue with message categorization. Message resubmission sends undelivered messages back to the submission queue to be processed again by the categorizer.
-A sustained high Submission Queue Length may be caused by one or more of the following:
- 99 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_UnreachableQueueLength > 99 Then
- IsMaxThresholdBroken = True
-End If]]>
- 249 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_UnreachableQueueLength > 249 Then
- IsMaxThresholdBroken = True
-End If]]>
- By default, the messages in the unreachable queue have the status of Ready. Messages remain in the unreachable queue until they are manually resubmitted by an administrator, removed by an administrator, an Active Directory Exchange topology configuration change is detected or the value specified in the MessageExpirationTimeOut parameter passes.
-To resolve this problem, determine what messages are in the queue and then determine their destination. Using this information correct any configuration issues and ensure that a delivery path exists for these messages. Finally, resubmit the messages in the unreachable message queue. You can do this by using Exchange Queue Viewer or by running the Resume-Message cmdlet in the Exchange Management Shell.
]]> 20 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeExtensibilityAgents_AverageAgentProcessingTimesec > 20 Then
- IsMaxThresholdBroken = True
-End If]]>
- 2999 Then
- IsMaxThresholdBroken = True
-End If
-
-If AvgMSExchangeTransportQueues_AggregateDeliveryQueueLengthAllQueues > 2999 Then
- IsAvgThresholdBroken = True
-End If
-]]>
- 4999 Then
- IsMaxThresholdBroken = True
-End If
-
-If AvgMSExchangeTransportQueues_AggregateDeliveryQueueLengthAllQueues > 4999 Then
- IsAvgThresholdBroken = True
-End If]]>
- 200 Then
- IsMaxThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_LargestDeliveryQueueLength > 200 Then
- IsAvgThresholdBroken = True
-End If]]>
- When this value is high, the server cannot establish a SMTP session to the other Hub Transport or Edge Transport server. Other symptoms you may experience when this threshold is reached are reduced intra-site, inter-site, and external mail flow. This alert may be caused by one or more of the following conditions:
-To resolve this problem, do one or more of the following:
- 0 Then
- IsMaxThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_PoisonQueueLength > 0 Then
- IsAvgThresholdBroken = True
-End If]]>
- 5 Then
- IsMaxThresholdBroken = True
-End If
-
-If MaxMSExchangeTransportQueues_PoisonQueueLength > 5 Then
- IsAvgThresholdBroken = True
-End If]]>
- The poison message queue contains messages that are determined to be potentially harmful to the Microsoft Exchange Server 2007 server after causing a server failure. The messages may be genuinely harmful in their content and format. Alternatively, they may be the results of a poorly-written agent that has caused the Exchange server to fail when it processed the supposedly bad messages.
-Messages remain in the poison message queue until they are manually resumed or removed by an administrator. The messages in the poison message queue are never automatically resumed or expired.
-To resolve this problem, resubmit or remove the messages from the poison message queue.. You can resubmit the messages by using the Exchange Queue Viewer or by running the Resume-Message cmdlet. You can remove messages by using Queue Viewer or by running the Remove-Message cmdlet.
]]> 119 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase_Instances_Versionbucketsallocated > 119 Then
- IsMaxThresholdBroken = True
-End If]]>
- 199 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase_Instances_Versionbucketsallocated > 199 Then
- IsMaxThresholdBroken = True
-End If]]>
- This alert indicates that you have more than 120 version buckets allocated. This alert usually indicates that you have a database transaction that is taking a long time to be saved to disk.
-The number of version buckets may increase to unacceptably high levels because of virus issues, problems with the message queue database integrity, or hard disk drive performance.
]]> 10 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase_Instances_LogRecordStallssec > 10 Then
- IsMaxThresholdBroken = True
-End If
-]]>
- 100 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase_Instances_LogRecordStallssec >100 Then
- IsMaxThresholdBroken = True
-End If
-]]>
- 10 Then
- IsAvgThresholdBroken = True
-End If
-
-If AvgMSExchangeDatabase_Instances_LogThreadsWaiting > 10 Then
- IsMaxThresholdBroken = True
-End If]]>
- 500 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase_Instances_LogGenerationCheckpointDepth > 500 Then
- IsMaxThresholdBroken = True
-End If]]>
- 1000 Then
- IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase_Instances_LogGenerationCheckpointDepth > 1000 Then
- IsMaxThresholdBroken = True
-End If]]>
- To resolve this problem, do one or more of the following:
-1 Then
- IsAvgThresholdBroken = True
-End If
-If MaxNetworkInterface_PacketsOutboundErrors >1 Then
- IsMaxThresholdBroken = True
-End If
-
-]]>
- Instances_LogGenerationCheckpointDepth > 250 Then
-IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase==>Instances_LogGenerationCheckpointDepth > 250 Then
-IsMaxThresholdBroken = True
-End If
-
-]]>
- Instances_LogGenerationCheckpointDepth > 500 Then
-IsAvgThresholdBroken = True
-End If
-
-If MaxMSExchangeDatabase==>Instances_LogGenerationCheckpointDepth > 500 Then
-IsMaxThresholdBroken = True
-End If
-]]>
- The Log Generation Checkpoint Depth performance counter reports the number of transaction log files that have not yet been saved to the database. This number represents the number of transaction log files that must be replayed to the database if the Microsoft Exchange Information Store service process (Store.exe) stops and needs to be restarted. As the log file generation depth increases, the Exchange Information Store startup time increases. If the transaction log file depth of a storage group reaches 5,000, the Extensible Storage Engine (ESE) dismounts all the databases that are in the affected storage group.
-This alert indicates that the checkpoint depth is greater than 2500. The transaction log file depth may grow during periods when the server is busy. However, large values typically occur when there is a failure or when a backup fails.
-User Action:
-To resolve this error, do one or more of the following:
+
+
+
+
+
+
+
+
+ 75 Then
+ IsAvgThresholdBroken = True
+ End If
+
+ If MaxProcessor > 75 Then
+ IsMaxThresholdBroken = True
+ End If
+ ]]>
+ 90 Then
+ IsAvgThresholdBroken = True
+ End If
+
+ If MaxProcessor > 90 Then
+ IsMaxThresholdBroken = True
+ End If
+ ]]>
+ 0 AND AvgNetworkInterfaceCurrentBandwidth > 0 Then
+ PercentNetworkUtilizationThreshold = 50
+ BitsPerSec = Round(AvgNetworkInterfaceBytesTotalPerSec) * 8
+ PercentOfNetworkUtilized = (BitsPerSec * 100) / AvgNetworkInterfaceCurrentBandwidth
+ If AvgNetworkInterfaceBytesTotalPerSec > 0 Then
+ If PercentOfNetworkUtilized > PercentNetworkUtilizationThreshold Then
+ IsAvgThresholdBroken = True
+ End If
+ End If
+End If]]>
+ 0 AND AvgNetworkInterfaceCurrentBandwidth > 0 Then
+ PercentNetworkUtilizationThreshold = 80
+ BitsPerSec = Round(AvgNetworkInterfaceBytesTotalPerSec) * 8
+ PercentOfNetworkUtilized = (BitsPerSec * 100) / AvgNetworkInterfaceCurrentBandwidth
+ If AvgNetworkInterfaceBytesTotalPerSec > 0 Then
+ If PercentOfNetworkUtilized > PercentNetworkUtilizationThreshold Then
+ IsAvgThresholdBroken = True
+ End If
+ End If
+End If]]>
+ (NumberOfProcessors * 10) Then
+ IsAvgThresholdBroken = True
+End If
+If MaxQueue => (NumberOfProcessors * 10) Then
+ IsMaxThresholdBroken = True
+End If
+ ]]>
+ (NumberOfProcessors * 20) Then
+ IsAvgThresholdBroken = True
+ End If
+ If MaxQueue => (NumberOfProcessors * 20) Then
+ IsMaxThresholdBroken = True
+ End If
+ ]]>
+ 0 Then
+ PercentageOfPrivilegedModeCPUToTotalCPU = (AvgPrivProcessor * 100) / AvgProcessor
+ If PercentageOfPrivilegedModeCPUToTotalCPU > 30 AND AvgProcessor > 50 Then
+ IsAvgThresholdBroken = True
+ End If
+End If]]>
+ 0 Then
+ PercentageOfPrivilegedModeCPUToTotalCPU = (MaxPrivProcessor * 100) / MaxProcessor
+ If PercentageOfPrivilegedModeCPUToTotalCPU > 30 AND MaxProcessor > 50 Then
+ IsMaxThresholdBroken = True
+ End If
+End If]]>
+ 1 Then
+ IsAvgThresholdBroken = True
+End If]]>
+ 2 Then
+ IsAvgThresholdBroken = True
+End If
+
+ ]]>
+ 0 AND AvgProcessor > 0 AND AvgContextSwitchesPerSec > 0 AND NumberOfProcessors > 0 Then
+ PercentageOfPrivilegedModeCPUToTotalCPU = (AvgPrivProcessor * 100) / AvgProcessor
+ If PercentageOfPrivilegedModeCPU > 20 AND AvgProcessor > 50 AND (AvgContextSwitchesPerSec / NumberOfProcessors) > 5000 Then
+ IsAvgThresholdBroken = True
+ End If
+ End If
+ ]]>
+ 0 AND AvgProcessor > 0 AND AvgContextSwitchesPerSec > 0 AND NumberOfProcessors > 0 Then
+ PercentageOfPrivilegedModeCPUToTotalCPU = (AvgPrivProcessor * 100) / AvgProcessor
+ If PercentageOfPrivilegedModeCPUToTotalCPU > 20 AND AvgProcessor > 50 AND (AvgContextSwitchesPerSec / NumberOfProcessors) > 15000 Then
+ IsAvgThresholdBroken = True
+ End If
+ End If
+ ]]>
+ 0 AND MaxPercentProcessorTime > 0 AND NumberOfProcessors > 0 Then
+ AvgProcessorTimeDividedByNumberOfProcessors = AvgPercentProcessorTime / NumberOfProcessors
+ MaxProcessorTimeDividedByNumberOfProcessors = MaxPercentProcessorTime / NumberOfProcessors
+ If AvgProcessorTimeDividedByNumberOfProcessors > 60 Then
+ IsAvgThresholdBroken = True
+ End If
+ If MaxProcessorTimeDividedByNumberOfProcessors > 60 Then
+ IsMaxThresholdBroken = True
+ End If
+End If
+ ]]>
+ 0 AND MaxPercentProcessorTime > 0 AND NumberOfProcessors > 0 Then
+ AvgProcessorTimeDividedByNumberOfProcessors = AvgPercentProcessorTime / NumberOfProcessors
+ MaxProcessorTimeDividedByNumberOfProcessors = MaxPercentProcessorTime / NumberOfProcessors
+ If AvgProcessorTimeDividedByNumberOfProcessors > 80 Then
+ IsAvgThresholdBroken = True
+ End If
+ If MaxProcessorTimeDividedByNumberOfProcessors > 80 Then
+ IsMaxThresholdBroken = True
+ End If
+End If]]>
+ 30 Then
+ IsAvgThreshold = True
+End If]]>
+ 50 Then
+ IsAvgThreshold = True
+End If]]>
+ 70 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxPagingFile_PercentUsage > 70 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 70 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxPagingFile_PercentUsagePeak > 70 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 50 Then
+IsAvgThresholdBroken = True
+End If
+
+]]>
+ 100 Then
+IsAvgThresholdBroken = True
+End If
+]]>
+ 500 Then
+IsMaxThresholdBroken = True
+End If]]>
+ 50 Then
+IsAvgThresholdBroken = True
+End If
+]]>
+ 100 Then
+IsAvgThresholdBroken = True
+End If
+]]>
+ 500 Then
+IsMaxThresholdBroken = True
+End If]]>
+ 10 Then
+IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeADAccessDomainControllers_LDAPSearchestimedoutperminute >10 Then
+IsMaxThresholdBroken = True
+End If
+]]>
+ 20 Then
+IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeADAccessDomainControllers_LDAPSearchestimedoutperminute >20 Then
+IsMaxThresholdBroken = True
+End If
+]]>
+ 50 Then
+IsAvgThresholdBroken = True
+End If
+
+If AvgMSExchangeADAccessDomainControllers_LongrunningLDAPoperationsMin >50 Then
+IsMaxThresholdBroken = True
+End If
+]]>
+ 100 Then
+IsAvgThresholdBroken = True
+End If
+
+If AvgMSExchangeADAccessDomainControllers_LongrunningLDAPoperationsMin >100 Then
+IsMaxThresholdBroken = True
+End If]]>
+ 1 Then
+IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase_DatabasePageFaultStallssec > 1 Then
+IsMaxThresholdBroken = True
+End If
+]]>
+ If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]> 249 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_ActiveMailboxDeliveryQueueLength > 249 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 199 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_ActiveMailboxDeliveryQueueLength > 199 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 249 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_ActiveNon-SmtpDeliveryQueueLength > 249 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 199 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_ActiveNon-SmtpDeliveryQueueLength > 199 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 199 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_ActiveRemoteDeliveryQueueLength > 199 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 349 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_ActiveRemoteDeliveryQueueLength > 349 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ To resolve this problem, do one or more of the following:
+ 99 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_RetryMailboxDeliveryQueueLength > 99 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 249 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_RetryMailboxDeliveryQueueLength > 249 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ Messages that cannot be successfully delivered are subject to various retry, resubmit, and expiration deadlines based on the message's source and destination. Retry is a renewed connection attempt with the destination domain, smart host, or Mailbox server. Resubmit is the act of sending messages back to the Submission queue for the categorizer to reprocess. The message is said to "time-out", or expire, after all delivery efforts have failed during a specified period of time. After a message expires, the sender is notified of the delivery failure. Then the message is deleted from the queue.
+In all three cases of retry, resubmit, or expire, you can manually intervene before the automatic actions are performed on the messages.
+This problem may be caused by any of the following:
+Determine the root cause of the issue by doing one or more of the following:
+ 99 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_RetryNon-SmtpDeliveryQueueLength > 99 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 99 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_RetryNon-SmtpDeliveryQueueLength > 99 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 249 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_RetryRemoteDeliveryQueueLength > 249 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 99 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_RetryRemoteDeliveryQueueLength > 99 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 249 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_SubmissionQueueLength > 249 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 99 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_SubmissionQueueLength > 99 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ A sustained high Submission Queue Length value may indicate that an excessive amount of inbound messages have over-loaded the categorizer. It may also indicate that there is an issue with message categorization. Message resubmission sends undelivered messages back to the submission queue to be processed again by the categorizer.
+A sustained high Submission Queue Length may be caused by one or more of the following:
+ 99 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_UnreachableQueueLength > 99 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 249 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_UnreachableQueueLength > 249 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ By default, the messages in the unreachable queue have the status of Ready. Messages remain in the unreachable queue until they are manually resubmitted by an administrator, removed by an administrator, an Active Directory Exchange topology configuration change is detected or the value specified in the MessageExpirationTimeOut parameter passes.
+To resolve this problem, determine what messages are in the queue and then determine their destination. Using this information correct any configuration issues and ensure that a delivery path exists for these messages. Finally, resubmit the messages in the unreachable message queue. You can do this by using Exchange Queue Viewer or by running the Resume-Message cmdlet in the Exchange Management Shell.
]]> 20 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeExtensibilityAgents_AverageAgentProcessingTimesec > 20 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 2999 Then
+ IsMaxThresholdBroken = True
+End If
+
+If AvgMSExchangeTransportQueues_AggregateDeliveryQueueLengthAllQueues > 2999 Then
+ IsAvgThresholdBroken = True
+End If
+]]>
+ 4999 Then
+ IsMaxThresholdBroken = True
+End If
+
+If AvgMSExchangeTransportQueues_AggregateDeliveryQueueLengthAllQueues > 4999 Then
+ IsAvgThresholdBroken = True
+End If]]>
+ 200 Then
+ IsMaxThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_LargestDeliveryQueueLength > 200 Then
+ IsAvgThresholdBroken = True
+End If]]>
+ When this value is high, the server cannot establish a SMTP session to the other Hub Transport or Edge Transport server. Other symptoms you may experience when this threshold is reached are reduced intra-site, inter-site, and external mail flow. This alert may be caused by one or more of the following conditions:
+To resolve this problem, do one or more of the following:
+ 0 Then
+ IsMaxThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_PoisonQueueLength > 0 Then
+ IsAvgThresholdBroken = True
+End If]]>
+ 5 Then
+ IsMaxThresholdBroken = True
+End If
+
+If MaxMSExchangeTransportQueues_PoisonQueueLength > 5 Then
+ IsAvgThresholdBroken = True
+End If]]>
+ The poison message queue contains messages that are determined to be potentially harmful to the Microsoft Exchange Server 2007 server after causing a server failure. The messages may be genuinely harmful in their content and format. Alternatively, they may be the results of a poorly-written agent that has caused the Exchange server to fail when it processed the supposedly bad messages.
+Messages remain in the poison message queue until they are manually resumed or removed by an administrator. The messages in the poison message queue are never automatically resumed or expired.
+To resolve this problem, resubmit or remove the messages from the poison message queue.. You can resubmit the messages by using the Exchange Queue Viewer or by running the Resume-Message cmdlet. You can remove messages by using Queue Viewer or by running the Remove-Message cmdlet.
]]> 119 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase_Instances_Versionbucketsallocated > 119 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 199 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase_Instances_Versionbucketsallocated > 199 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ This alert indicates that you have more than 120 version buckets allocated. This alert usually indicates that you have a database transaction that is taking a long time to be saved to disk.
+The number of version buckets may increase to unacceptably high levels because of virus issues, problems with the message queue database integrity, or hard disk drive performance.
]]> 10 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase_Instances_LogRecordStallssec > 10 Then
+ IsMaxThresholdBroken = True
+End If
+]]>
+ 100 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase_Instances_LogRecordStallssec >100 Then
+ IsMaxThresholdBroken = True
+End If
+]]>
+ 10 Then
+ IsAvgThresholdBroken = True
+End If
+
+If AvgMSExchangeDatabase_Instances_LogThreadsWaiting > 10 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 500 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase_Instances_LogGenerationCheckpointDepth > 500 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ 1000 Then
+ IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase_Instances_LogGenerationCheckpointDepth > 1000 Then
+ IsMaxThresholdBroken = True
+End If]]>
+ To resolve this problem, do one or more of the following:
+1 Then
+ IsAvgThresholdBroken = True
+End If
+If MaxNetworkInterface_PacketsOutboundErrors >1 Then
+ IsMaxThresholdBroken = True
+End If
+
+]]>
+ Instances_LogGenerationCheckpointDepth > 250 Then
+IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase==>Instances_LogGenerationCheckpointDepth > 250 Then
+IsMaxThresholdBroken = True
+End If
+
+]]>
+ Instances_LogGenerationCheckpointDepth > 500 Then
+IsAvgThresholdBroken = True
+End If
+
+If MaxMSExchangeDatabase==>Instances_LogGenerationCheckpointDepth > 500 Then
+IsMaxThresholdBroken = True
+End If
+]]>
+ The Log Generation Checkpoint Depth performance counter reports the number of transaction log files that have not yet been saved to the database. This number represents the number of transaction log files that must be replayed to the database if the Microsoft Exchange Information Store service process (Store.exe) stops and needs to be restarted. As the log file generation depth increases, the Exchange Information Store startup time increases. If the transaction log file depth of a storage group reaches 5,000, the Extensible Storage Engine (ESE) dismounts all the databases that are in the affected storage group.
+This alert indicates that the checkpoint depth is greater than 2500. The transaction log file depth may grow during periods when the server is busy. However, large values typically occur when there is a failure or when a backup fails.
+User Action:
+To resolve this error, do one or more of the following:
Disk Reads/sec is the rate of read operations on the disk.
-TechNet Exchange Counter Description:
-Indicates that a paging situation may exist because data was read from disk instead of memory.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
-Disk Writes/sec + Disk Reads/sec = < 50
-NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>Disk Writes/sec is the rate of write operations on the disk.
-TechNet Exchange Counter Description:
-Indicates that a paging situation may exist because data was written to disk instead of being stored in memory.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
-Disk Writes/sec + Disk Reads/sec = < 50
-NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>Disk Reads/sec is the rate of read operations on the disk.
+TechNet Exchange Counter Description:
+Indicates that a paging situation may exist because data was read from disk instead of memory.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
+Disk Writes/sec + Disk Reads/sec = < 50
+NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>Disk Writes/sec is the rate of write operations on the disk.
+TechNet Exchange Counter Description:
+Indicates that a paging situation may exist because data was written to disk instead of being stored in memory.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
+Disk Writes/sec + Disk Reads/sec = < 50
+NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>
-
-
-
-
-
-
-
-
- - Excessive managed memory usage is commonly caused by: -
If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]>To resolve this problem, do one or more of the following:
-Messages that cannot be successfully delivered are subject to various retry, resubmit, and expiration deadlines based on the message's source and destination. Retry is a renewed connection attempt with the destination domain, smart host, or Mailbox server. Resubmit is the act of sending messages back to the Submission queue for the categorizer to reprocess. The message is said to "time-out", or expire, after all delivery efforts have failed during a specified period of time. After a message expires, the sender is notified of the delivery failure. Then the message is deleted from the queue.
-In all three cases of retry, resubmit, or expire, you can manually intervene before the automatic actions are performed on the messages.
-This problem may be caused by any of the following:
-Determine the root cause of the issue by doing one or more of the following:
-A sustained high Submission Queue Length value may indicate that an excessive amount of inbound messages have over-loaded the categorizer. It may also indicate that there is an issue with message categorization. Message resubmission sends undelivered messages back to the submission queue to be processed again by the categorizer.
-A sustained high Submission Queue Length may be caused by one or more of the following:
-By default, the messages in the unreachable queue have the status of Ready. Messages remain in the unreachable queue until they are manually resubmitted by an administrator, removed by an administrator, an Active Directory Exchange topology configuration change is detected or the value specified in the MessageExpirationTimeOut parameter passes.
-To resolve this problem, determine what messages are in the queue and then determine their destination. Using this information correct any configuration issues and ensure that a delivery path exists for these messages. Finally, resubmit the messages in the unreachable message queue. You can do this by using Exchange Queue Viewer or by running the Resume-Message cmdlet in the Exchange Management Shell.
]]>When this value is high, the server cannot establish a SMTP session to the other Hub Transport or Edge Transport server. Other symptoms you may experience when this threshold is reached are reduced intra-site, inter-site, and external mail flow. This alert may be caused by one or more of the following conditions:
-To resolve this problem, do one or more of the following:
-The poison message queue contains messages that are determined to be potentially harmful to the Microsoft Exchange Server 2007 server after causing a server failure. The messages may be genuinely harmful in their content and format. Alternatively, they may be the results of a poorly-written agent that has caused the Exchange server to fail when it processed the supposedly bad messages.
-Messages remain in the poison message queue until they are manually resumed or removed by an administrator. The messages in the poison message queue are never automatically resumed or expired.
-To resolve this problem, resubmit or remove the messages from the poison message queue.. You can resubmit the messages by using the Exchange Queue Viewer or by running the Resume-Message cmdlet. You can remove messages by using Queue Viewer or by running the Remove-Message cmdlet.
]]>This alert indicates that you have more than 120 version buckets allocated. This alert usually indicates that you have a database transaction that is taking a long time to be saved to disk.
-The number of version buckets may increase to unacceptably high levels because of virus issues, problems with the message queue database integrity, or hard disk drive performance.
]]>To resolve this problem, do one or more of the following:
-For Web farms in production, it is recommended that a server be removed from rotation prior to updating content for best performance and reliability. For a single Web server in production, content can be updated while the server is under load. The hotfix described in Knowledge Base Article 810281 is of interest to anyone experiencing errors after an application restarts, such as sharing violations with an error similar to "Cannot access file <FileName> because it is being used by another process." - -
An issue involving anti-virus software and applications restarts is fixed in Knowledge Base Article 820746: FIX: Some Antivirus Programs May Cause Web Applications to Restart Unexpectedly for v1.0, and in Knowledge Base Article 821438 for v1.1. - -
Threshold: 0. In a perfect world, the application domain will survive for the life of the process. Excessive values should be investigated, and a new threshold should be set as necessary.
-
- Reference:
- ASP.NET Performance
- ]]>
+
+
+
+
+
+
+
+
+ + Excessive managed memory usage is commonly caused by: +
If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]>To resolve this problem, do one or more of the following:
+Messages that cannot be successfully delivered are subject to various retry, resubmit, and expiration deadlines based on the message's source and destination. Retry is a renewed connection attempt with the destination domain, smart host, or Mailbox server. Resubmit is the act of sending messages back to the Submission queue for the categorizer to reprocess. The message is said to "time-out", or expire, after all delivery efforts have failed during a specified period of time. After a message expires, the sender is notified of the delivery failure. Then the message is deleted from the queue.
+In all three cases of retry, resubmit, or expire, you can manually intervene before the automatic actions are performed on the messages.
+This problem may be caused by any of the following:
+Determine the root cause of the issue by doing one or more of the following:
+A sustained high Submission Queue Length value may indicate that an excessive amount of inbound messages have over-loaded the categorizer. It may also indicate that there is an issue with message categorization. Message resubmission sends undelivered messages back to the submission queue to be processed again by the categorizer.
+A sustained high Submission Queue Length may be caused by one or more of the following:
+By default, the messages in the unreachable queue have the status of Ready. Messages remain in the unreachable queue until they are manually resubmitted by an administrator, removed by an administrator, an Active Directory Exchange topology configuration change is detected or the value specified in the MessageExpirationTimeOut parameter passes.
+To resolve this problem, determine what messages are in the queue and then determine their destination. Using this information correct any configuration issues and ensure that a delivery path exists for these messages. Finally, resubmit the messages in the unreachable message queue. You can do this by using Exchange Queue Viewer or by running the Resume-Message cmdlet in the Exchange Management Shell.
]]>When this value is high, the server cannot establish a SMTP session to the other Hub Transport or Edge Transport server. Other symptoms you may experience when this threshold is reached are reduced intra-site, inter-site, and external mail flow. This alert may be caused by one or more of the following conditions:
+To resolve this problem, do one or more of the following:
+The poison message queue contains messages that are determined to be potentially harmful to the Microsoft Exchange Server 2007 server after causing a server failure. The messages may be genuinely harmful in their content and format. Alternatively, they may be the results of a poorly-written agent that has caused the Exchange server to fail when it processed the supposedly bad messages.
+Messages remain in the poison message queue until they are manually resumed or removed by an administrator. The messages in the poison message queue are never automatically resumed or expired.
+To resolve this problem, resubmit or remove the messages from the poison message queue.. You can resubmit the messages by using the Exchange Queue Viewer or by running the Resume-Message cmdlet. You can remove messages by using Queue Viewer or by running the Remove-Message cmdlet.
]]>This alert indicates that you have more than 120 version buckets allocated. This alert usually indicates that you have a database transaction that is taking a long time to be saved to disk.
+The number of version buckets may increase to unacceptably high levels because of virus issues, problems with the message queue database integrity, or hard disk drive performance.
]]>To resolve this problem, do one or more of the following:
+For Web farms in production, it is recommended that a server be removed from rotation prior to updating content for best performance and reliability. For a single Web server in production, content can be updated while the server is under load. The hotfix described in Knowledge Base Article 810281 is of interest to anyone experiencing errors after an application restarts, such as sharing violations with an error similar to "Cannot access file <FileName> because it is being used by another process." + +
An issue involving anti-virus software and applications restarts is fixed in Knowledge Base Article 820746: FIX: Some Antivirus Programs May Cause Web Applications to Restart Unexpectedly for v1.0, and in Knowledge Base Article 821438 for v1.1. + +
Threshold: 0. In a perfect world, the application domain will survive for the life of the process. Excessive values should be investigated, and a new threshold should be set as necessary.
+
+ Reference:
+ ASP.NET Performance
+ ]]>
Disk Reads/sec is the rate of read operations on the disk.
-TechNet Exchange Counter Description:
-Indicates that a paging situation may exist because data was read from disk instead of memory.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
-Disk Writes/sec + Disk Reads/sec = < 50
-NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>Disk Writes/sec is the rate of write operations on the disk.
-TechNet Exchange Counter Description:
-Indicates that a paging situation may exist because data was written to disk instead of being stored in memory.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
-Disk Writes/sec + Disk Reads/sec = < 50
-NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>Disk Reads/sec is the rate of read operations on the disk.
+TechNet Exchange Counter Description:
+Indicates that a paging situation may exist because data was read from disk instead of memory.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
+Disk Writes/sec + Disk Reads/sec = < 50
+NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>Disk Writes/sec is the rate of write operations on the disk.
+TechNet Exchange Counter Description:
+Indicates that a paging situation may exist because data was written to disk instead of being stored in memory.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Client Access Servers, this value should be less than 50 at all times.
+Disk Writes/sec + Disk Reads/sec = < 50
+NOTE: These counters assume that the server is performing only the Client Access server role. These counters are inappropriate for servers performing multiple role functions, because their profile is markedly different.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 20 milliseconds (ms) at all times on average.
-For servers with more than 1,000 users, 20-ms disk times may not be fast enough to return responses to the client to accommodate user load. Check remote procedure call (RPC) averaged latencies to ensure these are within recommended values and adjust the disk subsystem for increased I/Os.
-NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 20 ms on average.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 100 ms at all times on average.
-If disk writes are high, it is possible that read latencies are also affected as a direct correlation with high write times.
-NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average.
-NOTE: Processes such as sync replication can increase latencies for this counter.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 20 milliseconds (ms) at all times on average.
-For servers with more than 1,000 users, 20-ms disk times may not be fast enough to return responses to the client to accommodate user load. Check remote procedure call (RPC) averaged latencies to ensure these are within recommended values and adjust the disk subsystem for increased I/Os.
-NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 100 ms at all times on average.
-If disk writes are high, it is possible that read latencies are also affected as a direct correlation with high write times.
-NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a read of data from the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Counter Description:
-Shows the average time, in seconds, of a write of data to the disk.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
- Avg. Disk sec/Transfer is the time, in seconds, of the average disk transfer.
-TechNet Exchange Counter Description:
-For healthy disks, this counter shows approximately 20 ms. Counter values larger than 20 ms, or with large spikes, indicate a possible disk issue (for example, failure or slow speed).
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 20 ms on average with spikes (maximum values) of less than 50 ms.
]]>
- RPC Requests is the number of client requests that are currently being processed by the information store.
-TechNet Exchange Counter Description:
-Indicates the overall RPC requests that are currently executing within the information store process.
-The maximum value in Exchange 2007 is 500 RPC requests that can execute at any designated time before the information store starts rejecting any new connections from clients.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 70 at all times.
]]>
- RPC latency in milliseconds averaged for the past 1024 packets.
-TechNet Exchange Counter Description:
-Indicates the RPC latency, in milliseconds, averaged for all operations in the last 1,024 packets.
-For information about how clients are affected when overall server RPC averaged latencies increase, see RPC Client Throttling.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should not be higher than 25 ms on average.
-To determine if certain protocols are causing overall RPC latencies, monitor MSExchangeIS Client (*)\RPC Average Latency to separate latencies based on client protocol.
-Cross-reference MSExchangeIS\RPC Client Backoff/sec to ensure higher latencies are not causing client throttling.
]]>RPC Operations/sec is the rate that RPC operations occur.
-TechNet Exchange Counter Description:
-Indicates the current number of RPC operations that are occurring per second.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should closely correspond to historical baselines. Values much higher than expected indicate that the workload has changed, while values much lower than expected indicate a bottleneck preventing client requests from reaching the server.
-For online mode clients, between .75 and 1 IOPS/Mailbox would be considered a moderate user. For more information about how to calculate this value, see the Mailbox Server Storage Design information in the "Understanding IOPS" section of the How to Measure IOPS per Mailbox topic.
-NOTE: Cached Exchange Mode clients have a slightly higher rate due to other sync-related functions.
]]>
- Number of RPC packets in the past 1024 that have latencies longer than 2 seconds.
-TechNet Exchange Counter Description:
-Shows the number of RPC packets in the past 1,024 packets that have latencies longer than 2 seconds.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 1 on average, and should be less than 3 at all times.
]]>
- RPC Average Latency is a server RPC latency in milliseconds averaged for the past 1024 packets.
-TechNet Exchange Counter Description:
-Shows a server RPC latency, in milliseconds, averaged for the past 1,024 packets for a particular client protocol.
-The following is a list of client protocols that can be gathered:
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 50 ms on average.
-Wide disparities between different client types, such as IMAP4, Outlook Anywhere, or Other Clients (MAPI), can help direct troubleshooting to appropriate subcomponents.
]]>RPC Operations/sec is the rate that RPC operations occur.
-TechNet Exchange Counter Description:
-Shows which client protocol is performing an excessive amount of RPC Operations/sec.
-High IMAP4, POP3, or Outlook Anywhere latency can indicate problems with Client Access servers rather than Mailbox servers. This is especially true when Other Clients (which includes MAPI) latency is lower in comparison.
-In some instances, high IMAP latencies could indicate a bottleneck on the Mailbox server in addition to the latencies that the Client Access server is experiencing.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Messages Delivered/sec is the rate that messages are delivered to all recipients.
-TechNet Exchange Counter Description:
-Shows the rate that messages are delivered to all recipients.
-Indicates current message delivery rate to the store.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Messages Sent/sec is the rate that messages are sent to the transport.
-TechNet Exchange Counter Description:
-Shows the rate that messages are sent to transport.
-TechNet Exchange Guidance:
-Used to determine current messages sent to transport.
]]>Messages Submitted/sec is the rate that messages are submitted by clients.
-TechNet Exchange Counter Description:
-Shows the rate that messages are submitted by clients.
-TechNet Exchange Guidance:
-Used to determine current rate that messages are being submitted by clients.
]]>JET Log Records/sec is the rate that database log records are generated while processing requests for the client.
-TechNet Exchange Counter Description:
-Shows the rate that database log records are generated while processing requests for the client.
-TechNet Exchange Guidance:
-Used to determine current load.
]]>JET Pages Read/sec is the rate that database pages are read from disk while processing requests for the client.
-TechNet Exchange Counter Description:
-Shows the rate that database pages are read from disk while processing requests for the client.
-TechNet Exchange Guidance:
-Used to determine current load.
]]>Directory Access: LDAP Reads/sec is the rate LDAP reads occur while processing requests for the client.
-
TechNet Exchange Counter Description:
-Shows the current rate that the Lightweight Directory Access Protocol (LDAP) reads occur while processing requests for the client.
-TechNet Exchange Guidance:
-Used to determine the current LDAP read rate per protocol.
]]>Directory Access: LDAP Searches/sec is the rate LDAP searches occur while processing requests for the client.
-TechNet Exchange Counter Description:
-Shows the current rate that the LDAP searches occur while processing requests for the client.
-TechNet Exchange Guidance:
-Used to determine the current LDAP search rate per protocol.
]]>RPC Client Backoff/sec is the rate that server notifies a client to back off.
-TechNet Exchange Counter Description:
-Shows the rate that the server notifies the client to back off.
-Indicates the rate at which client backoffs are occurring.
-Higher values may indicate that the server may be incurring a higher load resulting in an increase in overall averaged RPC latencies, causing client throttling to occur.
-This can also occur when certain client user actions are being performed. Depending on what the client is doing and the rate at which RPC operations are occurring, it may be normal to see backoffs occurring.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- The client-reported rate of failed RPCs (since the store was started) due to the Server Too Busy RPC error.
-TechNet Exchange Counter Description:
-Shows the client-reported rate of failed RPCs (since the store was started) due to the Server Too Busy ROC error.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
-Higher values may indicate RPC threads are exhausted or client throttling is occurring for clients running versions of Outlook earlier than Microsoft Office Outlook 2007.
-]]>
- The client-reported number of failed RPCs (since the store was started) due to the Server Too Busy RPC error.
-TechNet Exchange Counter Description:
-The client-reported number of failed RPCs (since the store was started) due to the Server Too Busy ROC error.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
-]]>
- Messages Queued For Submission is the current number of submitted messages which are not yet processed by transport.
-TechNet Exchange Counter Description:
-Shows the current number of submitted messages that are not yet processed by the transport layer.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be below 50 at all times and should not be sustained for more than 15 minutes.
-This may indicate that there are connectivity issues to the transport servers or that backpressure is occurring.
]]>
- Messages Queued For Submission is the current number of submitted messages which are not yet processed by transport.
-TechNet Exchange Counter Description:
-Shows the current number of submitted messages that are not yet processed by the transport layer.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 20 at all times.
]]>
-
- Log Generation Checkpoint Depth represents the amount of work, in count of log files, that will need to be redone or undone to the database file(s) if the process crashes.
-TechNet Exchange Counter Description:
-Represents the amount of work in the log file count that will need to be redone or undone to the database files if the process fails.
-TechNet Exchange Guidance:
-Should be below 500 at all times for the Mailbox server role. A healthy server should indicate between 20 and 30 for each storage group instance.
-If checkpoint depth increases continually for a sustained period, this is an indicator of either a long-running transaction (which will impact the version store) or of a bottleneck involving the database disks.
-Should be below 1,000 at all times for the Edge Transport server role.
]]>
- Database Page Fault Stalls/sec is the rate of page faults that cannot be serviced because there are no pages available for allocation from the database cache. If this counter is non-zero most of the time, the clean threshold may be too low.
-TechNet Exchange Counter Description:
-Shows the rate that database file page requests require of the database cache manager to allocate a new page from the database cache.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
-If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]>
- Log Record Stalls/sec is the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
-TechNet Exchange Counter Description:
-Shows the number of log records that cannot be added to the log buffers per second because the log buffers are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
-If I/O log write latencies are high, check for RAID5 or sync replication on log devices.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value average should be below 10 per second with spikes (maximum values) of less than 100 per second.
]]>
- Log Threads Waiting is the number of threads waiting for their data to be written to the log in order to complete an update of the database. If this number is too high, the log may be a bottleneck.
-TechNet Exchange Counter Description:
-Shows the number of threads waiting for their data to be written to the log to complete an update of the database. If this number is too high, the log may be a bottleneck.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 10 on average.
-Regular spikes concurrent with log record stall spikes indicate that the transaction log disks are a bottleneck.
-If the value for log threads waiting is more than the spindles available for the logs, there is a bottleneck on the log disks.
]]>
- Total number of version buckets allocated
-TechNet Exchange Counter Description:
-Shows the total number of version buckets allocated.
-The maximum default version is 16,384. If version buckets reach 70 percent of maximum, the server is at risk of running out of the version store.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 12,000 at all times.
]]>
- I/O Database Reads Average Latency is the average length of time, in milliseconds, per database read operation.
-TechNet Exchange Counter Description:
-Shows the average length of time, in milliseconds, per database read operation.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 20 ms on average with spikes of less than 50 ms.
]]>
- I/O Database Writes Average Latency is the average length of time, in milliseconds, per database write operation.
-TechNet Exchange Counter Description:
-Shows the average length of time, in milliseconds, per database write operation.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 50 ms on average. Spikes of up to 100 ms are acceptable if not accompanied by database page fault stalls.
]]>Database Cache Size (MB) is the amount of system memory (in MegaBytes) used by the database cache manager to hold commonly used information from the database file(s) to prevent file operations. If the database cache size seems to be too small for optimal performance and there is very little available memory on the system (see Memory/Available Bytes), adding more memory to the system may increase performance. If there is a lot of available memory on the system and the database cache size is not growing beyond a certain point, the database cache size may be capped at an artificially low limit. Increasing this limit may increase performance.
-TechNet Exchange Counter Description:
-Shows the amount of system memory, in megabytes, used by the database cache manager to hold commonly used information from the database files to prevent file operations. If the database cache size seems too small for optimal performance and there is little available memory on the system (check the value of Memory/Available Bytes), adding more memory to the system may increase performance. If there is ample memory on the system and the database cache size is not growing beyond a certain point, the database cache size may be capped at an artificially low limit. Increasing this limit may increase performance.
-TechNet Exchange Guidance:
-Maximum value is RAM-2GB (RAM-3GB for servers with sync replication enabled). This and Database Cache Hit % are extremely useful counters for gauging whether a server's performance problems might be resolved by adding more physical memory.
-Use this counter along with store private bytes to determine if there are store memory leaks.
]]>
-
- Database Cache % Hit is the percentage of database file page requests that were fulfilled by the database cache without causing a file operation. If this percentage is too low, the database cache size may be too small.
-TechNet Exchange Counter Description:
-Shows the percentage of database file page requests that were fulfilled by the database cache without causing a file operation. If this percentage is too low, the database cache size may be too small.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be over 90% for companies with majority online mode clients, and should be over 99% for companies with majority cached mode clients.
-If the hit ratio is less than these numbers, the database cache may be insufficient.
]]>
- Log Bytes Write per second is the rate bytes are written to the log.
-TechNet Exchange Counter Description:
-Shows the rate bytes are written to the log.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 10,000,000 at all times.
-With each log file being 1,000,000 bytes in size, 10,000,000 bytes/sec would yield 10 logs/sec. This may indicate a large message being sent or a looping message.
]]>User Count is the number of users connected to the information store.
-TechNet Exchange Counter Description:
-Shows the number of users connected to the information store.
-Used to determine current user load.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- Replication Receive Queue Size is the number of replication messages waiting to be processed.
-TechNet Exchange Counter Description:
-Shows the number of replication messages waiting to be processed.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 100 at all times.
-This value should return to a minimum value between replication intervals.
]]>
- Slow FindRow Rate is the rate at which the slower FindRow needs to be used in the mailbox store.
-TechNet Exchange Counter Description:
-Shows the rate at which the slower FindRow needs to be used in the mailbox store.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be no more than 10 for any specific mailbox store.
-Higher values indicate applications are crawling or searching mailboxes, which is affecting server performance. These include desktop search engines, customer relationship management (CRM), or other third-party applications.
]]>
- Number of search tasks created per second.
-TechNet Exchange Counter Description:
-Shows the number of search tasks created per second.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times.
]]>
- Number of Query Processor threads currently running unoptimized queries.
-TechNet Exchange Counter Description:
-Shows the number of query processor threads currently running queries that are not optimized.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times.
]]>
- Number of Search threads currently running unoptimized queries.
-TechNet Exchange Counter Description:
-Shows the number of search threads currently running queries that are not optimized.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times.
]]>Categorization Count is the number of categorizations that exist in the mailbox store. Categorizations are created when a user creates a filtered view or performs a search. When the information store must maintain an excessive number of categorizations, performance can be affected.
-TechNet Exchange Counter Description:
-Shows the categorization count in the number of categorizations that exist in the mailbox store. Categorizations are created when a user creates a filtered view or performs a search. When the information store must maintain an excessive number of categorizations, performance can be affected.
-Indicates an overall number of restricted search folders and regular search folders in the system. Sharp increases, especially after implementing any third-party application that takes advantage of MAPI interfaces, should be checked.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- % Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
-TechNet Exchange Counter Description:
-Shows the amount of processor time that is currently being consumed by the Exchange Search service.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 1% of overall CPU typically and not sustained above 5%
]]>
- % Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
-TechNet Exchange Counter Description:
-Shows the amount of processor time that is being consumed to update content indexing within the store process.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 10% of what the store process is during steady state.
-Full crawls will increase overall processing time, but should never exceed overall store CPU capacity. Check throttling counters to determine if throttling is occurring due to server performance bottlenecks.
]]>Recent Average Latency of RPCs Used to Obtain Content is the average latency (in milliseconds) of the most recent RPCs to the Exchange Information Store service. These RPCs are used to get content for the filter daemon for the given database.
-TechNet Exchange Counter Description:
-Shows the average latency, in milliseconds, of the most recent RPCs to the Microsoft Exchange Information Store service. These RPCs are used to get content for the filter daemon for the specified database.
-TechNet Exchange Guidance:
-Should coincide with the latencies that Outlook clients are experiencing.
]]>Throttling Delay Value is the total Total time (in milliseconds) a worker thread sleeps before it retrieves a document from the Exchange Information Store service. This is set by the throttling monitor thread.
-TechNet Exchange Counter Description:
-Shows the total time, in milliseconds, a worker thread sleeps before it retrieves a document from the Microsoft Exchange Information Store service. This is set by the throttling monitor thread.
-Indicates the current throttling delay value. If this value is non-zero, this indicates a potential server bottleneck causing delay values to be introduced to throttle the rate at which indexing is occurring.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- Average Document Indexing Time is the average (in milliseconds) of how long it takes to index documents.
-TechNet Exchange Counter Description:
-Shows the average, in milliseconds, of how long it takes to index documents.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 30 seconds at all time.
]]>Full Crawl Mode Status indicates whether this MDB is going through a full crawl (value=1) or not (value=0).
-TechNet Exchange Counter Description:
-Indicates whether this .mdb file is going through a full crawl (value=1) or not (value=0).
-Used to determine if a full crawl is occurring for any specified database.
-If CPU resources are high, it is possible content indexing is occurring for a database or set of databases.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- % Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
-TechNet Exchange Counter Description:
-Shows the amount of processor time that is being consumed by mailbox assistants.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 5% of overall CPU capacity.
]]>Mailboxes processed/sec is the rate of mailboxes processed by time-based assistants per second.
-TechNet Exchange Counter Description:
-Shows the rate of mailboxes processed by time-based assistants per second.
-Determines current load statistics for this counter.
-TechNet Exchange Guidance:
-Not Applicable.
]]>The number of events polled per second.
-TechNet Exchange Counter Description:
-Shows the number of events polled per second.
-Determines current load statistics for this counter.
-TechNet Exchange Guidance:
-Not Applicable.
]]>Events in queue is the current number of events in the in-memory queue waiting to be processed by the assistants.
-TechNet Exchange Counter Description:
-Shows the number of events in the in-memory queue waiting to be processed by the assistants.
-TechNet Exchange Guidance:
-Should be a low value at all times. High values may indicate a performance bottleneck.
]]>
- Average processing time of the events chosen as interesting.
-TechNet Exchange Counter Description:
-Shows the average processing time of the events chosen.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 2 at all times.
]]>Average Resource Booking Processing Time is the average time to process an event in the Resource Booking Attendant.
-TechNet Exchange Counter Description:
-Shows the average time to process an event in the Resource Booking Attendant.
-TechNet Exchange Guidance:
-Should be a low value at all times. High values may indicate a performance bottleneck.
]]>
- Requests Failed is the total number of failures that occurred while Resource Booking Attendant was processing events.
-TechNet Exchange Counter Description:
-Shows the total number of failures that occurred while the Resource Booking Attendant was processing events.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>Average Calendar Attendant Processing Time is the average time to process an event in the Calendar Attendant.
-TechNet Exchange Counter Description:
-Shows the average time to process an event in the Calendar Attendant.
-TechNet Exchange Guidance:
-Should be a low value at all times. High values may indicate a performance bottleneck.
]]>
- Requests Failed is the total number of failures that occurred while Calendar Attendant was processing events.
-TechNet Exchange Counter Description:
-Shows the total number of failures that occurred while the Calendar Attendant was processing events.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>
- RPC Latency average (msec) is the average latency in milliseconds of RPC requests. Average is calculated over all RPCs since exrpc32 was loaded.
-TechNet Exchange Counter Description:
-Shows the average latency, in milliseconds, of RPC requests. The average is calculated over all RPCs since exrpc32 was loaded.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 100 ms at all times.
]]>
- RPC Requests outstanding is the current number of outstanding RPC requests.
-TechNet Exchange Counter Description:
-Shows the current number of outstanding RPC requests.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>ROP Requests outstanding is the total number of outstanding ROP requests.
-TechNet Exchange Counter Description:
-Shows the total number of outstanding remote operations (ROP) requests.
-Used for determining current load.
-TechNet Exchange Guidance:
-Not Applicable.
]]>RPC Requests outstanding is the current number of outstanding RPC requests.
-TechNet Exchange Counter Description:
-Shows the total number of outstanding RPC requests.
-Used for determining current load.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- RPC Requests failed (%) is the percent of failed requests in total number of RPC requests. Here, failed means the sum of failed with error code plus failed with exception.
-TechNet Exchange Counter Description:
-Shows the percentage of failed requests in the total number of RPC requests. Here, failed means the sum of failed with error code plus failed with exception.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>RPC Requests sent/sec is the current rate of initiated RPC requests per second.
-TechNet Exchange Counter Description:
-Shows the current rate of initiated RPC requests per second.
-Used for determining current load.
-TechNet Exchange Guidance:
-Not Applicable.
-]]>
- RPC Slow requests (%) is the percent of slow RPC requests among all RPC requests.
-TechNet Exchange Counter Description:
-Shows the percentage of slow RPC requests among all RPC requests.
-A slow RPC request is one that has taken more than 500 ms.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 1 at all times.
]]>RPC Slow requests latency average (msec) is the average latency in milliseconds of slow RPC requests.
-TechNet Exchange Counter Description:
-Shows the average latency, in milliseconds, of slow requests.
-Used for determining the average latencies of RPC slow requests.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
- Hub Servers In Retry is the number of hub servers in retry.
-TechNet Exchange Counter Description:
-Shows the number of Hub Transport servers in retry mode.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>Successful Submissions Per Second
-TechNet Exchange Counter Description:
-Determines current mail submission rate.
-TechNet Exchange Guidance:
-Not Applicable.
-]]>
- Failed Submissions Per Second
-TechNet Exchange Counter Description:
- -TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>
- Temporary Submission Failures per second is the number of temporary submission failures per second.
-TechNet Exchange Counter Description:
-Shows the number of temporary submission failures per second.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>Replay Queue Length is the number of log generations waiting to be replayed.
-TechNet Exchange Counter Description:
-Shows the number of transaction log files waiting to be replayed into the passive copy.
-NOTE: Both nodes of the CCR clusters should be monitored for this counter depending on the passive node.
-TechNet Exchange Guidance:
-Indicates the current replay queue length. Higher values cause longer store mount times when a handoff, failover, or activation is performed.
]]>Seeding Finished % is the finished percentage of seeding. Its value is from 0 to 100 percent.
-TechNet Exchange Counter Description:
-Shows the finished percentage of seeding. Its value is from 0 to 100 percent.
-Used to determine if seeding is occurring for a particular database, which is possibly affecting overall server performance or current network bandwidth.
-TechNet Exchange Guidance:
-Not Applicable.
]]>
-
- Copy Queue Length is the number of log generations waiting to be both copied and inspected successfully.
-TechNet Exchange Counter Description:
-Shows the number of transaction log files waiting to be copied to the passive copy log file folder. A copy is not considered complete until it has been checked for corruption.
-NOTE: Both nodes of the cluster continuous replication (CCR) clusters should be monitored for this counter depending on the passive node.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times for CCR, and should be less than 1 at all times for local continuous replication (LCR).
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 20 milliseconds (ms) at all times on average.
+For servers with more than 1,000 users, 20-ms disk times may not be fast enough to return responses to the client to accommodate user load. Check remote procedure call (RPC) averaged latencies to ensure these are within recommended values and adjust the disk subsystem for increased I/Os.
+NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 20 ms on average.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 100 ms at all times on average.
+If disk writes are high, it is possible that read latencies are also affected as a direct correlation with high write times.
+NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average.
+NOTE: Processes such as sync replication can increase latencies for this counter.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 20 milliseconds (ms) at all times on average.
+For servers with more than 1,000 users, 20-ms disk times may not be fast enough to return responses to the client to accommodate user load. Check remote procedure call (RPC) averaged latencies to ensure these are within recommended values and adjust the disk subsystem for increased I/Os.
+NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 100 ms at all times on average.
+If disk writes are high, it is possible that read latencies are also affected as a direct correlation with high write times.
+NOTE: When looking at disks using Perfmon.exe, an understanding of the underlying disk subsystem is key to determining which counters (physical disk or logical disk) to look at. Windows Clustering can use volume mount points to overcome the 26-drive limitation of the operating system, so drives may show up as numbers indicating physical disks rather than having drive letters. For more information about volume mount points, see Volume Mount Points and File Systems.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Read is the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a read of data from the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Write is the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Counter Description:
+Shows the average time, in seconds, of a write of data to the disk.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 10 ms on average with spikes (maximum values) of less than 50 ms.
]]>
+ Avg. Disk sec/Transfer is the time, in seconds, of the average disk transfer.
+TechNet Exchange Counter Description:
+For healthy disks, this counter shows approximately 20 ms. Counter values larger than 20 ms, or with large spikes, indicate a possible disk issue (for example, failure or slow speed).
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 20 ms on average with spikes (maximum values) of less than 50 ms.
]]>
+ RPC Requests is the number of client requests that are currently being processed by the information store.
+TechNet Exchange Counter Description:
+Indicates the overall RPC requests that are currently executing within the information store process.
+The maximum value in Exchange 2007 is 500 RPC requests that can execute at any designated time before the information store starts rejecting any new connections from clients.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 70 at all times.
]]>
+ RPC latency in milliseconds averaged for the past 1024 packets.
+TechNet Exchange Counter Description:
+Indicates the RPC latency, in milliseconds, averaged for all operations in the last 1,024 packets.
+For information about how clients are affected when overall server RPC averaged latencies increase, see RPC Client Throttling.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should not be higher than 25 ms on average.
+To determine if certain protocols are causing overall RPC latencies, monitor MSExchangeIS Client (*)\RPC Average Latency to separate latencies based on client protocol.
+Cross-reference MSExchangeIS\RPC Client Backoff/sec to ensure higher latencies are not causing client throttling.
]]>RPC Operations/sec is the rate that RPC operations occur.
+TechNet Exchange Counter Description:
+Indicates the current number of RPC operations that are occurring per second.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should closely correspond to historical baselines. Values much higher than expected indicate that the workload has changed, while values much lower than expected indicate a bottleneck preventing client requests from reaching the server.
+For online mode clients, between .75 and 1 IOPS/Mailbox would be considered a moderate user. For more information about how to calculate this value, see the Mailbox Server Storage Design information in the "Understanding IOPS" section of the How to Measure IOPS per Mailbox topic.
+NOTE: Cached Exchange Mode clients have a slightly higher rate due to other sync-related functions.
]]>
+ Number of RPC packets in the past 1024 that have latencies longer than 2 seconds.
+TechNet Exchange Counter Description:
+Shows the number of RPC packets in the past 1,024 packets that have latencies longer than 2 seconds.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 1 on average, and should be less than 3 at all times.
]]>
+ RPC Average Latency is a server RPC latency in milliseconds averaged for the past 1024 packets.
+TechNet Exchange Counter Description:
+Shows a server RPC latency, in milliseconds, averaged for the past 1,024 packets for a particular client protocol.
+The following is a list of client protocols that can be gathered:
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 50 ms on average.
+Wide disparities between different client types, such as IMAP4, Outlook Anywhere, or Other Clients (MAPI), can help direct troubleshooting to appropriate subcomponents.
]]>RPC Operations/sec is the rate that RPC operations occur.
+TechNet Exchange Counter Description:
+Shows which client protocol is performing an excessive amount of RPC Operations/sec.
+High IMAP4, POP3, or Outlook Anywhere latency can indicate problems with Client Access servers rather than Mailbox servers. This is especially true when Other Clients (which includes MAPI) latency is lower in comparison.
+In some instances, high IMAP latencies could indicate a bottleneck on the Mailbox server in addition to the latencies that the Client Access server is experiencing.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Messages Delivered/sec is the rate that messages are delivered to all recipients.
+TechNet Exchange Counter Description:
+Shows the rate that messages are delivered to all recipients.
+Indicates current message delivery rate to the store.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Messages Sent/sec is the rate that messages are sent to the transport.
+TechNet Exchange Counter Description:
+Shows the rate that messages are sent to transport.
+TechNet Exchange Guidance:
+Used to determine current messages sent to transport.
]]>Messages Submitted/sec is the rate that messages are submitted by clients.
+TechNet Exchange Counter Description:
+Shows the rate that messages are submitted by clients.
+TechNet Exchange Guidance:
+Used to determine current rate that messages are being submitted by clients.
]]>JET Log Records/sec is the rate that database log records are generated while processing requests for the client.
+TechNet Exchange Counter Description:
+Shows the rate that database log records are generated while processing requests for the client.
+TechNet Exchange Guidance:
+Used to determine current load.
]]>JET Pages Read/sec is the rate that database pages are read from disk while processing requests for the client.
+TechNet Exchange Counter Description:
+Shows the rate that database pages are read from disk while processing requests for the client.
+TechNet Exchange Guidance:
+Used to determine current load.
]]>Directory Access: LDAP Reads/sec is the rate LDAP reads occur while processing requests for the client.
+
TechNet Exchange Counter Description:
+Shows the current rate that the Lightweight Directory Access Protocol (LDAP) reads occur while processing requests for the client.
+TechNet Exchange Guidance:
+Used to determine the current LDAP read rate per protocol.
]]>Directory Access: LDAP Searches/sec is the rate LDAP searches occur while processing requests for the client.
+TechNet Exchange Counter Description:
+Shows the current rate that the LDAP searches occur while processing requests for the client.
+TechNet Exchange Guidance:
+Used to determine the current LDAP search rate per protocol.
]]>RPC Client Backoff/sec is the rate that server notifies a client to back off.
+TechNet Exchange Counter Description:
+Shows the rate that the server notifies the client to back off.
+Indicates the rate at which client backoffs are occurring.
+Higher values may indicate that the server may be incurring a higher load resulting in an increase in overall averaged RPC latencies, causing client throttling to occur.
+This can also occur when certain client user actions are being performed. Depending on what the client is doing and the rate at which RPC operations are occurring, it may be normal to see backoffs occurring.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ The client-reported rate of failed RPCs (since the store was started) due to the Server Too Busy RPC error.
+TechNet Exchange Counter Description:
+Shows the client-reported rate of failed RPCs (since the store was started) due to the Server Too Busy ROC error.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
+Higher values may indicate RPC threads are exhausted or client throttling is occurring for clients running versions of Outlook earlier than Microsoft Office Outlook 2007.
+]]>
+ The client-reported number of failed RPCs (since the store was started) due to the Server Too Busy RPC error.
+TechNet Exchange Counter Description:
+The client-reported number of failed RPCs (since the store was started) due to the Server Too Busy ROC error.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
+]]>
+ Messages Queued For Submission is the current number of submitted messages which are not yet processed by transport.
+TechNet Exchange Counter Description:
+Shows the current number of submitted messages that are not yet processed by the transport layer.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be below 50 at all times and should not be sustained for more than 15 minutes.
+This may indicate that there are connectivity issues to the transport servers or that backpressure is occurring.
]]>
+ Messages Queued For Submission is the current number of submitted messages which are not yet processed by transport.
+TechNet Exchange Counter Description:
+Shows the current number of submitted messages that are not yet processed by the transport layer.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 20 at all times.
]]>
+
+ Log Generation Checkpoint Depth represents the amount of work, in count of log files, that will need to be redone or undone to the database file(s) if the process crashes.
+TechNet Exchange Counter Description:
+Represents the amount of work in the log file count that will need to be redone or undone to the database files if the process fails.
+TechNet Exchange Guidance:
+Should be below 500 at all times for the Mailbox server role. A healthy server should indicate between 20 and 30 for each storage group instance.
+If checkpoint depth increases continually for a sustained period, this is an indicator of either a long-running transaction (which will impact the version store) or of a bottleneck involving the database disks.
+Should be below 1,000 at all times for the Edge Transport server role.
]]>
+ Database Page Fault Stalls/sec is the rate of page faults that cannot be serviced because there are no pages available for allocation from the database cache. If this counter is non-zero most of the time, the clean threshold may be too low.
+TechNet Exchange Counter Description:
+Shows the rate that database file page requests require of the database cache manager to allocate a new page from the database cache.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
+If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]>
+ Log Record Stalls/sec is the number of log records that cannot be added to the log buffers per second because they are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
+TechNet Exchange Counter Description:
+Shows the number of log records that cannot be added to the log buffers per second because the log buffers are full. If this counter is non-zero most of the time, the log buffer size may be a bottleneck.
+If I/O log write latencies are high, check for RAID5 or sync replication on log devices.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value average should be below 10 per second with spikes (maximum values) of less than 100 per second.
]]>
+ Log Threads Waiting is the number of threads waiting for their data to be written to the log in order to complete an update of the database. If this number is too high, the log may be a bottleneck.
+TechNet Exchange Counter Description:
+Shows the number of threads waiting for their data to be written to the log to complete an update of the database. If this number is too high, the log may be a bottleneck.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 10 on average.
+Regular spikes concurrent with log record stall spikes indicate that the transaction log disks are a bottleneck.
+If the value for log threads waiting is more than the spindles available for the logs, there is a bottleneck on the log disks.
]]>
+ Total number of version buckets allocated
+TechNet Exchange Counter Description:
+Shows the total number of version buckets allocated.
+The maximum default version is 16,384. If version buckets reach 70 percent of maximum, the server is at risk of running out of the version store.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 12,000 at all times.
]]>
+ I/O Database Reads Average Latency is the average length of time, in milliseconds, per database read operation.
+TechNet Exchange Counter Description:
+Shows the average length of time, in milliseconds, per database read operation.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 20 ms on average with spikes of less than 50 ms.
]]>
+ I/O Database Writes Average Latency is the average length of time, in milliseconds, per database write operation.
+TechNet Exchange Counter Description:
+Shows the average length of time, in milliseconds, per database write operation.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 50 ms on average. Spikes of up to 100 ms are acceptable if not accompanied by database page fault stalls.
]]>Database Cache Size (MB) is the amount of system memory (in MegaBytes) used by the database cache manager to hold commonly used information from the database file(s) to prevent file operations. If the database cache size seems to be too small for optimal performance and there is very little available memory on the system (see Memory/Available Bytes), adding more memory to the system may increase performance. If there is a lot of available memory on the system and the database cache size is not growing beyond a certain point, the database cache size may be capped at an artificially low limit. Increasing this limit may increase performance.
+TechNet Exchange Counter Description:
+Shows the amount of system memory, in megabytes, used by the database cache manager to hold commonly used information from the database files to prevent file operations. If the database cache size seems too small for optimal performance and there is little available memory on the system (check the value of Memory/Available Bytes), adding more memory to the system may increase performance. If there is ample memory on the system and the database cache size is not growing beyond a certain point, the database cache size may be capped at an artificially low limit. Increasing this limit may increase performance.
+TechNet Exchange Guidance:
+Maximum value is RAM-2GB (RAM-3GB for servers with sync replication enabled). This and Database Cache Hit % are extremely useful counters for gauging whether a server's performance problems might be resolved by adding more physical memory.
+Use this counter along with store private bytes to determine if there are store memory leaks.
]]>
+
+ Database Cache % Hit is the percentage of database file page requests that were fulfilled by the database cache without causing a file operation. If this percentage is too low, the database cache size may be too small.
+TechNet Exchange Counter Description:
+Shows the percentage of database file page requests that were fulfilled by the database cache without causing a file operation. If this percentage is too low, the database cache size may be too small.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be over 90% for companies with majority online mode clients, and should be over 99% for companies with majority cached mode clients.
+If the hit ratio is less than these numbers, the database cache may be insufficient.
]]>
+ Log Bytes Write per second is the rate bytes are written to the log.
+TechNet Exchange Counter Description:
+Shows the rate bytes are written to the log.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 10,000,000 at all times.
+With each log file being 1,000,000 bytes in size, 10,000,000 bytes/sec would yield 10 logs/sec. This may indicate a large message being sent or a looping message.
]]>User Count is the number of users connected to the information store.
+TechNet Exchange Counter Description:
+Shows the number of users connected to the information store.
+Used to determine current user load.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ Replication Receive Queue Size is the number of replication messages waiting to be processed.
+TechNet Exchange Counter Description:
+Shows the number of replication messages waiting to be processed.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 100 at all times.
+This value should return to a minimum value between replication intervals.
]]>
+ Slow FindRow Rate is the rate at which the slower FindRow needs to be used in the mailbox store.
+TechNet Exchange Counter Description:
+Shows the rate at which the slower FindRow needs to be used in the mailbox store.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be no more than 10 for any specific mailbox store.
+Higher values indicate applications are crawling or searching mailboxes, which is affecting server performance. These include desktop search engines, customer relationship management (CRM), or other third-party applications.
]]>
+ Number of search tasks created per second.
+TechNet Exchange Counter Description:
+Shows the number of search tasks created per second.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times.
]]>
+ Number of Query Processor threads currently running unoptimized queries.
+TechNet Exchange Counter Description:
+Shows the number of query processor threads currently running queries that are not optimized.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times.
]]>
+ Number of Search threads currently running unoptimized queries.
+TechNet Exchange Counter Description:
+Shows the number of search threads currently running queries that are not optimized.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times.
]]>Categorization Count is the number of categorizations that exist in the mailbox store. Categorizations are created when a user creates a filtered view or performs a search. When the information store must maintain an excessive number of categorizations, performance can be affected.
+TechNet Exchange Counter Description:
+Shows the categorization count in the number of categorizations that exist in the mailbox store. Categorizations are created when a user creates a filtered view or performs a search. When the information store must maintain an excessive number of categorizations, performance can be affected.
+Indicates an overall number of restricted search folders and regular search folders in the system. Sharp increases, especially after implementing any third-party application that takes advantage of MAPI interfaces, should be checked.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ % Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
+TechNet Exchange Counter Description:
+Shows the amount of processor time that is currently being consumed by the Exchange Search service.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 1% of overall CPU typically and not sustained above 5%
]]>
+ % Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
+TechNet Exchange Counter Description:
+Shows the amount of processor time that is being consumed to update content indexing within the store process.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 10% of what the store process is during steady state.
+Full crawls will increase overall processing time, but should never exceed overall store CPU capacity. Check throttling counters to determine if throttling is occurring due to server performance bottlenecks.
]]>Recent Average Latency of RPCs Used to Obtain Content is the average latency (in milliseconds) of the most recent RPCs to the Exchange Information Store service. These RPCs are used to get content for the filter daemon for the given database.
+TechNet Exchange Counter Description:
+Shows the average latency, in milliseconds, of the most recent RPCs to the Microsoft Exchange Information Store service. These RPCs are used to get content for the filter daemon for the specified database.
+TechNet Exchange Guidance:
+Should coincide with the latencies that Outlook clients are experiencing.
]]>Throttling Delay Value is the total Total time (in milliseconds) a worker thread sleeps before it retrieves a document from the Exchange Information Store service. This is set by the throttling monitor thread.
+TechNet Exchange Counter Description:
+Shows the total time, in milliseconds, a worker thread sleeps before it retrieves a document from the Microsoft Exchange Information Store service. This is set by the throttling monitor thread.
+Indicates the current throttling delay value. If this value is non-zero, this indicates a potential server bottleneck causing delay values to be introduced to throttle the rate at which indexing is occurring.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ Average Document Indexing Time is the average (in milliseconds) of how long it takes to index documents.
+TechNet Exchange Counter Description:
+Shows the average, in milliseconds, of how long it takes to index documents.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 30 seconds at all time.
]]>Full Crawl Mode Status indicates whether this MDB is going through a full crawl (value=1) or not (value=0).
+TechNet Exchange Counter Description:
+Indicates whether this .mdb file is going through a full crawl (value=1) or not (value=0).
+Used to determine if a full crawl is occurring for any specified database.
+If CPU resources are high, it is possible content indexing is occurring for a database or set of databases.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ % Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.
+TechNet Exchange Counter Description:
+Shows the amount of processor time that is being consumed by mailbox assistants.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 5% of overall CPU capacity.
]]>Mailboxes processed/sec is the rate of mailboxes processed by time-based assistants per second.
+TechNet Exchange Counter Description:
+Shows the rate of mailboxes processed by time-based assistants per second.
+Determines current load statistics for this counter.
+TechNet Exchange Guidance:
+Not Applicable.
]]>The number of events polled per second.
+TechNet Exchange Counter Description:
+Shows the number of events polled per second.
+Determines current load statistics for this counter.
+TechNet Exchange Guidance:
+Not Applicable.
]]>Events in queue is the current number of events in the in-memory queue waiting to be processed by the assistants.
+TechNet Exchange Counter Description:
+Shows the number of events in the in-memory queue waiting to be processed by the assistants.
+TechNet Exchange Guidance:
+Should be a low value at all times. High values may indicate a performance bottleneck.
]]>
+ Average processing time of the events chosen as interesting.
+TechNet Exchange Counter Description:
+Shows the average processing time of the events chosen.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 2 at all times.
]]>Average Resource Booking Processing Time is the average time to process an event in the Resource Booking Attendant.
+TechNet Exchange Counter Description:
+Shows the average time to process an event in the Resource Booking Attendant.
+TechNet Exchange Guidance:
+Should be a low value at all times. High values may indicate a performance bottleneck.
]]>
+ Requests Failed is the total number of failures that occurred while Resource Booking Attendant was processing events.
+TechNet Exchange Counter Description:
+Shows the total number of failures that occurred while the Resource Booking Attendant was processing events.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>Average Calendar Attendant Processing Time is the average time to process an event in the Calendar Attendant.
+TechNet Exchange Counter Description:
+Shows the average time to process an event in the Calendar Attendant.
+TechNet Exchange Guidance:
+Should be a low value at all times. High values may indicate a performance bottleneck.
]]>
+ Requests Failed is the total number of failures that occurred while Calendar Attendant was processing events.
+TechNet Exchange Counter Description:
+Shows the total number of failures that occurred while the Calendar Attendant was processing events.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>
+ RPC Latency average (msec) is the average latency in milliseconds of RPC requests. Average is calculated over all RPCs since exrpc32 was loaded.
+TechNet Exchange Counter Description:
+Shows the average latency, in milliseconds, of RPC requests. The average is calculated over all RPCs since exrpc32 was loaded.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 100 ms at all times.
]]>
+ RPC Requests outstanding is the current number of outstanding RPC requests.
+TechNet Exchange Counter Description:
+Shows the current number of outstanding RPC requests.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>ROP Requests outstanding is the total number of outstanding ROP requests.
+TechNet Exchange Counter Description:
+Shows the total number of outstanding remote operations (ROP) requests.
+Used for determining current load.
+TechNet Exchange Guidance:
+Not Applicable.
]]>RPC Requests outstanding is the current number of outstanding RPC requests.
+TechNet Exchange Counter Description:
+Shows the total number of outstanding RPC requests.
+Used for determining current load.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ RPC Requests failed (%) is the percent of failed requests in total number of RPC requests. Here, failed means the sum of failed with error code plus failed with exception.
+TechNet Exchange Counter Description:
+Shows the percentage of failed requests in the total number of RPC requests. Here, failed means the sum of failed with error code plus failed with exception.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>RPC Requests sent/sec is the current rate of initiated RPC requests per second.
+TechNet Exchange Counter Description:
+Shows the current rate of initiated RPC requests per second.
+Used for determining current load.
+TechNet Exchange Guidance:
+Not Applicable.
+]]>
+ RPC Slow requests (%) is the percent of slow RPC requests among all RPC requests.
+TechNet Exchange Counter Description:
+Shows the percentage of slow RPC requests among all RPC requests.
+A slow RPC request is one that has taken more than 500 ms.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 1 at all times.
]]>RPC Slow requests latency average (msec) is the average latency in milliseconds of slow RPC requests.
+TechNet Exchange Counter Description:
+Shows the average latency, in milliseconds, of slow requests.
+Used for determining the average latencies of RPC slow requests.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+ Hub Servers In Retry is the number of hub servers in retry.
+TechNet Exchange Counter Description:
+Shows the number of Hub Transport servers in retry mode.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>Successful Submissions Per Second
+TechNet Exchange Counter Description:
+Determines current mail submission rate.
+TechNet Exchange Guidance:
+Not Applicable.
+]]>
+ Failed Submissions Per Second
+TechNet Exchange Counter Description:
+ +TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>
+ Temporary Submission Failures per second is the number of temporary submission failures per second.
+TechNet Exchange Counter Description:
+Shows the number of temporary submission failures per second.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be 0 at all times.
]]>Replay Queue Length is the number of log generations waiting to be replayed.
+TechNet Exchange Counter Description:
+Shows the number of transaction log files waiting to be replayed into the passive copy.
+NOTE: Both nodes of the CCR clusters should be monitored for this counter depending on the passive node.
+TechNet Exchange Guidance:
+Indicates the current replay queue length. Higher values cause longer store mount times when a handoff, failover, or activation is performed.
]]>Seeding Finished % is the finished percentage of seeding. Its value is from 0 to 100 percent.
+TechNet Exchange Counter Description:
+Shows the finished percentage of seeding. Its value is from 0 to 100 percent.
+Used to determine if seeding is occurring for a particular database, which is possibly affecting overall server performance or current network bandwidth.
+TechNet Exchange Guidance:
+Not Applicable.
]]>
+
+ Copy Queue Length is the number of log generations waiting to be both copied and inspected successfully.
+TechNet Exchange Counter Description:
+Shows the number of transaction log files waiting to be copied to the passive copy log file folder. A copy is not considered complete until it has been checked for corruption.
+NOTE: Both nodes of the cluster continuous replication (CCR) clusters should be monitored for this counter depending on the passive node.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Mailbox servers, this value should be less than 10 at all times for CCR, and should be less than 1 at all times for local continuous replication (LCR).
]]>
-
-
-
-
-
-
-
-
- - Excessive managed memory usage is commonly caused by: -
The Messages Queued for Submission performance counter indicates the number of messages in the mailbox store that are ready to be sent and are waiting to be submitted to a transport server.
-High Queuing can be caused by problems with the Microsoft Exchange Mail Submission service on the Mailbox server, higher than normal load on the transport server, or a resource bottleneck on the transport server.
-To resolve this warning, do one or more of the following:
-If the RPC Averaged Latency performance counter has stayed greater than 50 ms for a sustained period, users may experience delays. Another symptom you may experience when this threshold is reached is a popup window on the Microsoft Outlook client stating that Outlook is retrieving data from the Microsoft Exchange server. This alert may be caused by increased remote procedure call (RPC) load, or a bottleneck in one or more resources.
-User Action:
-To resolve this problem, determine the resources that are creating a bottleneck and attempt to mitigate the problem. Possible bottlenecks include disk reads or writes, processor time, available memory, and network configuration.
-
If this counter is sustained over 10, store version buckets is very high, and RPC requests flatlines at the same time, then 623 events might be occurring due to a queued transaction inside Store. See http://blogs.technet.com/mikelag/archive/2008/08/22/search-folder-performance-problem-detection.aspx for additional information
]]>The Log Generation Checkpoint Depth performance counter reports the number of transaction log files that have not yet been saved to the database. This number represents the number of transaction log files that must be replayed to the database if the Microsoft Exchange Information Store service process (Store.exe) stops and needs to be restarted. As the log file generation depth increases, the Exchange Information Store startup time increases. If the transaction log file depth of a storage group reaches 5,000, the Extensible Storage Engine (ESE) dismounts all the databases that are in the affected storage group.
-This alert indicates that the checkpoint depth is greater than 2500. The transaction log file depth may grow during periods when the server is busy. However, large values typically occur when there is a failure or when a backup fails.
-User Action:
-To resolve this error, do one or more of the following:
The Messages Queued for Submission performance counter indicates the number of messages in the public folder store that are ready to be sent and are waiting to be submitted to a transport server.
-High Queuing can be caused by problems with the Microsoft Exchange Mail Submission service on the Mailbox server, higher than normal load on the transport server, or a resource bottleneck on the transport server.
-To resolve this warning, do one or more of the following:
-If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]>
+
+
+
+
+
+
+
+
+ + Excessive managed memory usage is commonly caused by: +
The Messages Queued for Submission performance counter indicates the number of messages in the mailbox store that are ready to be sent and are waiting to be submitted to a transport server.
+High Queuing can be caused by problems with the Microsoft Exchange Mail Submission service on the Mailbox server, higher than normal load on the transport server, or a resource bottleneck on the transport server.
+To resolve this warning, do one or more of the following:
+If the RPC Averaged Latency performance counter has stayed greater than 50 ms for a sustained period, users may experience delays. Another symptom you may experience when this threshold is reached is a popup window on the Microsoft Outlook client stating that Outlook is retrieving data from the Microsoft Exchange server. This alert may be caused by increased remote procedure call (RPC) load, or a bottleneck in one or more resources.
+User Action:
+To resolve this problem, determine the resources that are creating a bottleneck and attempt to mitigate the problem. Possible bottlenecks include disk reads or writes, processor time, available memory, and network configuration.
+
If this counter is sustained over 10, store version buckets is very high, and RPC requests flatlines at the same time, then 623 events might be occurring due to a queued transaction inside Store. See http://blogs.technet.com/mikelag/archive/2008/08/22/search-folder-performance-problem-detection.aspx for additional information
]]>The Log Generation Checkpoint Depth performance counter reports the number of transaction log files that have not yet been saved to the database. This number represents the number of transaction log files that must be replayed to the database if the Microsoft Exchange Information Store service process (Store.exe) stops and needs to be restarted. As the log file generation depth increases, the Exchange Information Store startup time increases. If the transaction log file depth of a storage group reaches 5,000, the Extensible Storage Engine (ESE) dismounts all the databases that are in the affected storage group.
+This alert indicates that the checkpoint depth is greater than 2500. The transaction log file depth may grow during periods when the server is busy. However, large values typically occur when there is a failure or when a backup fails.
+User Action:
+To resolve this error, do one or more of the following:
The Messages Queued for Submission performance counter indicates the number of messages in the public folder store that are ready to be sent and are waiting to be submitted to a transport server.
+High Queuing can be caused by problems with the Microsoft Exchange Mail Submission service on the Mailbox server, higher than normal load on the transport server, or a resource bottleneck on the transport server.
+To resolve this warning, do one or more of the following:
+If this value is non-zero, this indicates that the database is not able to flush dirty pages to the database file fast enough to make pages free for new page allocations.
]]>
- Call Answer Queued Messages is the number of messages created and not yet submitted for delivery.
-TechNet Exchange Counter Description:
-Shows the number of messages created and not yet submitted for delivery.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be less than 50 at all times.
]]>
- Directory Access Failures is the number of times that attempts to access Active Directory failed.
-TechNet Exchange Counter Description:
-Shows the number of times that attempts to access Active Directory failed.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
- Hub Transport Access Failures is the number of times that attempts to access a Hub Transport server failed. This number is only incremented if all Hub Transport servers were unavailable.
-TechNet Exchange Counter Description:
-Shows the number of times that attempts to access a Hub Transport server failed. This number is only incremented if all Hub Transport servers were unavailable.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
- Calls Disconnected on Irrecoverable Internal Error Per Second is the number of calls that were disconnected after an internal system error occurred in the last second.
-TechNet Exchange Counter Description:
-Shows the number of calls that were disconnected after an internal system error occurred in the last second.
-NOTE: The name of this performance counter (Calls Disconnected on Irrecoverable Internal Error Per Second) does not match the name listed in its description.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
- Queued OCS User Event Notifications is the number of notifications that have been created and not yet submitted for delivery.
-TechNet Exchange Counter Description:
-Shows the number of notifications that have been created and not yet submitted for delivery.
-Represents the number of missed call notifications that have been generated in the Office Communications Server environment and have not been submitted for delivery.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
- Operations over Six Seconds is the number of all UM operations that took more than 6 seconds to complete. This is the time during which a caller was waiting for UM to respond.
-TechNet Exchange Counter Description:
-Shows the number of all Unified Messaging operations that took more than six seconds to complete. This is the time during which a caller was waiting for Unified Messaging to respond.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
- Mailbox Server Access Failures is the number of times the system did not access a Mailbox server.
-TechNet Exchange Counter Description:
-Shows the number of times the system did not access a Mailbox server.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
-A non-zero value indicates that Unified Messaging is having problems with MAPI connectivity to mbx servers.
]]>
- Calls Disconnected by Callers During UM Audio Hourglass is the number of calls during which the caller disconnected while Unified Messaging was playing the audio hourglass tones.
-TechNet Exchange Counter Description:
-Shows the number of calls during which the caller disconnected while Unified Messaging was playing the audio hourglass tones.
-TechNet Exchange Guidance:
-For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
-A non-zero value suggests excessive latency between a Unified Messaging server and targeted domain controller.
]]>
+ Call Answer Queued Messages is the number of messages created and not yet submitted for delivery.
+TechNet Exchange Counter Description:
+Shows the number of messages created and not yet submitted for delivery.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be less than 50 at all times.
]]>
+ Directory Access Failures is the number of times that attempts to access Active Directory failed.
+TechNet Exchange Counter Description:
+Shows the number of times that attempts to access Active Directory failed.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
+ Hub Transport Access Failures is the number of times that attempts to access a Hub Transport server failed. This number is only incremented if all Hub Transport servers were unavailable.
+TechNet Exchange Counter Description:
+Shows the number of times that attempts to access a Hub Transport server failed. This number is only incremented if all Hub Transport servers were unavailable.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
+ Calls Disconnected on Irrecoverable Internal Error Per Second is the number of calls that were disconnected after an internal system error occurred in the last second.
+TechNet Exchange Counter Description:
+Shows the number of calls that were disconnected after an internal system error occurred in the last second.
+NOTE: The name of this performance counter (Calls Disconnected on Irrecoverable Internal Error Per Second) does not match the name listed in its description.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
+ Queued OCS User Event Notifications is the number of notifications that have been created and not yet submitted for delivery.
+TechNet Exchange Counter Description:
+Shows the number of notifications that have been created and not yet submitted for delivery.
+Represents the number of missed call notifications that have been generated in the Office Communications Server environment and have not been submitted for delivery.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
+ Operations over Six Seconds is the number of all UM operations that took more than 6 seconds to complete. This is the time during which a caller was waiting for UM to respond.
+TechNet Exchange Counter Description:
+Shows the number of all Unified Messaging operations that took more than six seconds to complete. This is the time during which a caller was waiting for Unified Messaging to respond.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
]]>
+ Mailbox Server Access Failures is the number of times the system did not access a Mailbox server.
+TechNet Exchange Counter Description:
+Shows the number of times the system did not access a Mailbox server.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
+A non-zero value indicates that Unified Messaging is having problems with MAPI connectivity to mbx servers.
]]>
+ Calls Disconnected by Callers During UM Audio Hourglass is the number of calls during which the caller disconnected while Unified Messaging was playing the audio hourglass tones.
+TechNet Exchange Counter Description:
+Shows the number of calls during which the caller disconnected while Unified Messaging was playing the audio hourglass tones.
+TechNet Exchange Guidance:
+For Exchange Server 2007 Unified Messaging servers, this value should be 0 at all times.
+A non-zero value suggests excessive latency between a Unified Messaging server and targeted domain controller.
]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- InstancesIODatabaseReadsAverageLatencyALL -MinThreshold 20 -MaxThreshold 21.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
- InstancesIODatabaseReadsAverageLatencyALL -Operator 'ge' -Threshold 20
-
-#// The -Operator parameter accepts gt for greater than, ge for greater than or equal to, lt for less than,
-#// and le for less than or equal to. -Threshold is the static value for the threshold.]]>
- InstancesIODatabaseWritesAverageLatencyALL -MinThreshold 50 -MaxThreshold 59.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
- InstancesIODatabaseWritesAverageLatencyALL -Operator 'gt' -Threshold 50
-
-#// The -Operator parameter accepts gt for greater than, ge for greater than or equal to, lt for less than,
-#// and le for less than or equal to. -Threshold is the static value for the threshold.]]>
-
-
-
-
-
-
-
-
-
-
-
-
- If this counter is sustained over 20, store version buckets is very high, and RPC requests flatlines at the same time, then 623 events might be occurring due to a queued transaction inside Store. See http://blogs.technet.com/mikelag/archive/2008/08/22/search-folder-performance-problem-detection.aspx for additional information
http://technet.microsoft.com/en-us/library/ff367871(v=exchg.141).aspx]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ InstancesIODatabaseReadsAverageLatencyALL -MinThreshold 20 -MaxThreshold 21.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
+ InstancesIODatabaseReadsAverageLatencyALL -Operator 'ge' -Threshold 20
+
+#// The -Operator parameter accepts gt for greater than, ge for greater than or equal to, lt for less than,
+#// and le for less than or equal to. -Threshold is the static value for the threshold.]]>
+ InstancesIODatabaseWritesAverageLatencyALL -MinThreshold 50 -MaxThreshold 59.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
+ InstancesIODatabaseWritesAverageLatencyALL -Operator 'gt' -Threshold 50
+
+#// The -Operator parameter accepts gt for greater than, ge for greater than or equal to, lt for less than,
+#// and le for less than or equal to. -Threshold is the static value for the threshold.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ If this counter is sustained over 20, store version buckets is very high, and RPC requests flatlines at the same time, then 623 events might be occurring due to a queued transaction inside Store. See http://blogs.technet.com/mikelag/archive/2008/08/22/search-folder-performance-problem-detection.aspx for additional information
http://technet.microsoft.com/en-us/library/ff367871(v=exchg.141).aspx]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 1 -MaxThreshold 20 -IsOperatorGreaterThan $True -UseMaxValue $False]]>
- InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 20 -MaxThreshold 49.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
- InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 20]]>
- InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 75]]>
- InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 50]]>
- InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 100]]>
- InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 10]]>
- InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 30]]>
- InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 200]]>
- InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 400]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 1 -MaxThreshold 20 -IsOperatorGreaterThan $True -UseMaxValue $False]]>
+ InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 20 -MaxThreshold 49.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
+ InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 20]]>
+ InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 75]]>
+ InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 50]]>
+ InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 100]]>
+ InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 10]]>
+ InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 30]]>
+ InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 200]]>
+ InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 400]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 1 -MaxThreshold 20 -IsOperatorGreaterThan $True -UseMaxValue $False]]>
- InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 20 -MaxThreshold 49.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
- InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 20]]>
- InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 75]]>
- InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 50]]>
- InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 100]]>
- InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 10]]>
- InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 30]]>
- InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 200]]>
- InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 400]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 1 -MaxThreshold 20 -IsOperatorGreaterThan $True -UseMaxValue $False]]>
+ InstancesIODatabaseReadsAttachedAverageLatency -MinThreshold 20 -MaxThreshold 49.999 -IsOperatorGreaterThan $True -UseMaxValue $True]]>
+ InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 20]]>
+ InstancesIODatabaseReadsAttachedAverageLatency -Operator 'gt' -Threshold 75]]>
+ InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 50]]>
+ InstancesIODatabaseWritesAttachedAverageLatency -Operator 'gt' -Threshold 100]]>
+ InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 10]]>
+ InstancesIOLogWritesAverageLatency -Operator 'gt' -Threshold 30]]>
+ InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 200]]>
+ InstancesIODatabaseReadsRecoveryAverageLatency -Operator 'gt' -Threshold 400]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ > .\debugoutput.txt
- #// $CollectionOfPhysicalDiskDiskWritessecAll[$i].QuantizedAvg[$v] >> .\debugoutput.txt
- #// $iIOPS >> .\debugoutput.txt
- #// $iWriteMultiplier >> .\debugoutput.txt
- #// '=========================' >> .\debugoutput.txt
-
- If ($iIOPS -gt 0)
- {
- [void] $alValues.Add($iIOPS)
- }
- Else
- {
- [void] $alValues.Add(0)
- }
-
- }
- $sGeneratedInstanceName = "\\$($CollectionOfPhysicalDiskDiskReadssec[$i].CounterComputer)\$($CollectionOfPhysicalDiskDiskReadssec[$i].CounterObject)($($CollectionOfPhysicalDiskDiskReadssec[$i].CounterInstance))\Calculated IOPS"
- $CollectionOfPhysicalDiskCalculatedIOPSAll.Add($sGeneratedInstanceName,$alValues)
-}
- ]]>
- > .\debugoutput.txt
+ #// $CollectionOfPhysicalDiskDiskWritessecAll[$i].QuantizedAvg[$v] >> .\debugoutput.txt
+ #// $iIOPS >> .\debugoutput.txt
+ #// $iWriteMultiplier >> .\debugoutput.txt
+ #// '=========================' >> .\debugoutput.txt
+
+ If ($iIOPS -gt 0)
+ {
+ [void] $alValues.Add($iIOPS)
+ }
+ Else
+ {
+ [void] $alValues.Add(0)
+ }
+
+ }
+ $sGeneratedInstanceName = "\\$($CollectionOfPhysicalDiskDiskReadssec[$i].CounterComputer)\$($CollectionOfPhysicalDiskDiskReadssec[$i].CounterObject)($($CollectionOfPhysicalDiskDiskReadssec[$i].CounterInstance))\Calculated IOPS"
+ $CollectionOfPhysicalDiskCalculatedIOPSAll.Add($sGeneratedInstanceName,$alValues)
+}
+ ]]>
+
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ | ' >> $h
+ 'Error processing "' + $($Log) + '" ' >> $h + 'Report Generated at: ' + "$((get-date).tostring($global:sDateTimePattern))" + '' >> $h + ' | PALv2' >> $h + ' |
| ' >> $h
+ 'Analysis of "' + $(GetLogNameFromLogParameter) + '" ' >> $h + ## Updated to format with globalised date time JonnyG 2010-06-11 + 'Report Generated at: ' + "$((get-date).tostring($global:sDateTimePattern))" + '' >> $h + ' | PALv2' >> $h + ' |
' >> $h
+ 'Tool Parameters:' >> $h + '' >> $h + ' |
| Name | Value |
|---|---|
| Log Time Range: | ' + $(GetLogTimeRange) + ' |
| Log(s): | ' + $Log + ' |
| AnalysisInterval: | ' + $(ConvertAnalysisIntervalIntoHumanReadableTime) + ' |
| Threshold File: | ' + $($ThresholdFile) + ' |
| AllCounterStats: | ' + $($AllCounterStats) + ' |
| NumberOfThreads: | ' + $($NumberOfThreads) + ' |
| IsLowPriority: | ' + $($IsLowPriority) + ' |
| DisplayReport: | ' + $($DisplayReport) + ' |
| Script Execution Duration: | ' + $($Global:dDurationTime) + ' |
| ' + $sKey + ': | ' + $($global:oPal.QuestionVariables[$sKey]) + ' |
' >> $h
+ 'Alerts by Chronological Order' >> $h + '' >> $h + ' |
Description: This section displays all of the alerts in chronological order.' >> $h + '
| ' >> $h + 'An alert is generated if any of the thresholds were broken during one of the time ranges analyzed. The background of each of the values represents the highest priority threshold that the value broke. See each of the counter' + "'" + 's respective analysis section for more details about what the threshold means.' >> $h + ' |
| No Alerts Found |
|---|
| Time Range | ||||||
|---|---|---|---|---|---|---|
| ' + $TimeRange + ' | Condition | Counter | Min | Avg | Max | Hourly Trend |
| ' + [string] $sPart01 = Add-WhiteFont -Text $($XmlAlert.CONDITIONNAME) -Color $($XmlAlert.CONDITIONCOLOR) + [string] $sPart02 = ' | ' + $($XmlAlert.COUNTER) + ' | ' + [string] $sPart03 = Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.MIN) -Color $($XmlAlert.MINCOLOR) + [string] $sPart04 = ' | ' + [string] $sPart05 = Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.AVG) -Color $($XmlAlert.AVGCOLOR) + [string] $sPart06 = ' | ' + [string] $sPart07 = Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.MAX) -Color $($XmlAlert.MAXCOLOR) + [string] $sPart08 = ' | ' + [string] $sPart09 = Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.TREND) -Color $($XmlAlert.TRENDCOLOR) + [string] $sPart10 = ' |
' + '' + "$Category" + ' |
Description: ' + $sDescription + '' >> $h + '
| No data to chart |
| No data to chart |
| ' >> $h + #'Overall statistics of each of the counter instances. Min, Avg, and Max are the minimum, average, and Maximum values in the entire log. Hourly Trend is the calculated hourly slope of the entire log. 10%, 20%, and 30% of Outliers Removed is the average of the values after the percentage of outliers furthest away from the average have been removed. This is to help determine if a small percentage of the values are extreme which can skew the average.' >> $h + #' |
| Condition | ' + "$($XmlChart.DATASOURCE)" + ' | Min | Avg | Max | Hourly Trend | Std Deviation | 10% of Outliers Removed | 20% of Outliers Removed | 30% of Outliers Removed |
|---|---|---|---|---|---|---|---|---|---|
| ' + $(Add-WhiteFont -Text $XmlAlert.CONDITIONNAME -Color $XmlAlert.CONDITIONCOLOR) + ' | ' + $sCounterInstance + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlCounterInstance.MIN) -Color $XmlAlert.MINCOLOR) + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlCounterInstance.AVG) -Color $XmlAlert.AVGCOLOR) + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlCounterInstance.MAX) -Color $XmlAlert.MAXCOLOR) + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlCounterInstance.TREND) -Color $XmlAlert.TRENDCOLOR) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.STDDEV) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILENINETYTH) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILEEIGHTYTH) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILESEVENTYTH) + ' |
| OK | ' + $sCounterInstance + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.MIN) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.AVG) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.MAX) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.TREND) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.STDDEV) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILENINETYTH) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILEEIGHTYTH) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILESEVENTYTH) + ' |
| No Thresholds | ' + $sCounterInstance + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.MIN) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.AVG) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.MAX) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.TREND) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.STDDEV) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILENINETYTH) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILEEIGHTYTH) + ' | ' + $(AddThousandsSeparator -Value $XmlCounterInstance.PERCENTILESEVENTYTH) + ' |
| ' >> $h + #'An alert is generated if any of the thresholds were broken during one of the time ranges analyzed. The background of each of the values represents the highest priority threshold that the value broke. See each of the counter' + "'" + 's respective analysis section for more details about what the threshold means.' >> $h + #' |
| No Alerts Found |
|---|
| Time Range | ||||||
|---|---|---|---|---|---|---|
| ' + $TimeRange + ' | Condition | Counter | Min | Avg | Max | Hourly Trend |
| ' + $(Add-WhiteFont -Text $($XmlAlert.CONDITIONNAME) -Color $($XmlAlert.CONDITIONCOLOR)) + ' | ' + $($XmlAlert.COUNTER) + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.MIN) -Color $($XmlAlert.MINCOLOR)) + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.AVG) -Color $($XmlAlert.AVGCOLOR)) + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.MAX) -Color $($XmlAlert.MAXCOLOR)) + ' | ' + $(Add-WhiteFont -Text $(AddThousandsSeparator -Value $XmlAlert.TREND) -Color $($XmlAlert.TRENDCOLOR)) + ' |
| Name | Status |
|---|---|
| ' + $oJob.Name + ' | ' + $sStatus + ' |
| All analyses | Completed |
| Disclaimer: This report was generated using the Performance Analysis of Logs (PAL) tool. The information provided in this report is provided "as-is" and is intended for information purposes only. The software is licensed "as-is". You bear the risk of using it. The contributors give no express warranties, guarantees or conditions. You may have additional consumer rights under your local laws which this license cannot change. To the extent permitted under your local laws, the contributors exclude the implied warranties of merchantability, fitness for a particular purpose and non-infringement. |
-
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+ - Excessive managed memory usage is commonly caused by: -
For Web farms in production, it is recommended that a server be removed from rotation prior to updating content for best performance and reliability. For a single Web server in production, content can be updated while the server is under load. The hotfix described in Knowledge Base Article 810281 is of interest to anyone experiencing errors after an application restarts, such as sharing violations with an error similar to "Cannot access file <FileName> because it is being used by another process." - -
An issue involving anti-virus software and applications restarts is fixed in Knowledge Base Article 820746: FIX: Some Antivirus Programs May Cause Web Applications to Restart Unexpectedly for v1.0, and in Knowledge Base Article 821438 for v1.1. - -
Threshold: 0. In a perfect world, the application domain will survive for the life of the process. Excessive values should be investigated, and a new threshold should be set as necessary.
-
+ Excessive managed memory usage is commonly caused by:
+ For Web farms in production, it is recommended that a server be removed from rotation prior to updating content for best performance and reliability. For a single Web server in production, content can be updated while the server is under load. The hotfix described in Knowledge Base Article 810281 is of interest to anyone experiencing errors after an application restarts, such as sharing violations with an error similar to "Cannot access file <FileName> because it is being used by another process."
+
+ An issue involving anti-virus software and applications restarts is fixed in Knowledge Base Article 820746: FIX: Some Antivirus Programs May Cause Web Applications to Restart Unexpectedly for v1.0, and in Knowledge Base Article 821438 for v1.1.
+
+ Threshold: 0. In a perfect world, the application domain will survive for the life of the process. Excessive values should be investigated, and a new threshold should be set as necessary.
+
- Reference:
- ASP.NET Performance
- ]]>
-
-This analysis checks if the amount of Commited memory is becoming close to the Commit Limit (RAM plus total page file sizes), If so, then identify if you have a memory leak. If no memory leak is identified, then consider adding more physical RAM or increase the size of your page files.]]>
+If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See “How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment†article in the references section for more information.
+
+References:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx
+
+How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment http://www.codeplex.com/PerfTesting/Wiki/View.aspx?title=How%20To%3a%20Identify%20a%20Disk%20Performance%20Bottleneck%20Using%20SPA&referringTitle=How%20Tos ]]>
+This analysis converts Bytes Total/sec to bits and compares it to the current bandwidth of the network adapter to calculate network utilization. Next, it checks for utilization above 50%.
+
+Reference:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx
+
]]>
+This analysis determines if the average processor queue length exceeds the number of processors. If so, then this could indicate a processor bottleneck. Use this analysis in correlation with Privileged Mode CPU Analysis and “Excessive Processor Use by Process†analysis.
+
+If there are more tasks ready to run than there are processors, threads queue up. The processor queue is the collection of threads that are ready but not able to be executed by the processor because another active thread is currently executing. A sustained or recurring queue of more threads than number of processors is a good indication of a processor bottleneck.
+
+You can use this counter in conjunction with the Processor\% Processor Time counter to determine if your application can benefit from more CPUs. There is a single queue for processor time, even on multiprocessor computers. Therefore, in a multiprocessor computer, divide the Processor Queue Length (PQL) value by the number of processors servicing the workload.
+
+If the CPU is very busy (90 percent and higher utilization) and the PQL average is consistently higher than the number of processors, then you may have a processor bottleneck that could benefit from additional CPUs. Or, you could reduce the number of threads and queue more at the application level. This will cause less context switching, and less context switching is good for reducing CPU load. The common reason for a high PQL with low CPU utilization is that requests for processor time arrive randomly and threads demand irregular amounts of time from the processor. This means that the processor is not a bottleneck but that it is your threading logic that needs to be improved.
+
+If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See “How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment†article in the references section for more information.
+
+Reference:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx
+
+How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment http://www.codeplex.com/PerfTesting/Wiki/View.aspx?title=How%20To%3a%20Identify%20a%20Disk%20Performance%20Bottleneck%20Using%20SPA&referringTitle=How%20Tos ]]>
+High privileged mode CPU indicates that computer is spending too much time in system I/O versus real (user mode) work. % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service in called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+
+This analysis checks to see if privileged mode CPU is consuming more than 30% of total CPU. If so, then the CPU consumption is likely caused by another bottleneck other than the processor such as network, memory, or disk I/O.
+
+References:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx]]>
+If the response times are greater than .015 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .025 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+
]]>
+If the response times are greater than .015 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .025 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+
]]>
+If the response times are greater than .015 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .025 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+
]]>
+If the response times are greater than .015 (15 milliseconds), then the disk subsystem is keeping up with demand, but does not have much overhead left.
+
+If the response times are greater than .025 (25 milliseconds), then noticeable slow downs and performance issues affecting users may be occurring.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+
]]>
+The Performance Monitor “Memory\Free System Page Table Entries†counter is inaccurate on installations of Windows Server 2003 without Service Pack 1. For more information about this counter, see Microsoft Knowledge Base article 894067 “The Performance tool does not accurately show the available Free System Page Table entries in Windows Server 2003†http://go.microsoft.com/fwlink/?linkid=3052&kbid=894067
+
+Fix for Win2003 SP1 systems with /3GB and low on PTE’s: If the system is low on PTE’s, running Windows 2003, and using /3GB switch, then consider using the /USERVA switch to give back some of the memory to the kernel. Note, this only works for Free System PTE issues.
+
+For more information on the USERVA switch, go to:
+How to use the /userva switch with the /3GB switch to tune the User-mode space to a value between 2 GB and 3 GB
+http://support.microsoft.com/kb/316739
+
+Reference:
+Ruling Out Memory-Bound Problems
+http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx
+
+Microsoft Knowledge Base article 894067 “The Performance tool does not accurately show the available Free System Page Table entries in Windows Server 2003†http://go.microsoft.com/fwlink/?linkid=3052&kbid=894067
+
+“How to use the /userva switch with the /3GB switch to tune the User-mode space to a value between 2 GB and 3 GB†http://support.microsoft.com/kb/316739">http://support.microsoft.com/kb/316739
+
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654
+]]>
+This analysis checks to see if the system is becoming close to the maximum Pool Nonpaged memory size. It does this by estimating the pool sizes taking into consideration /3GB, physical memory size, and 32-bit/64-bit, then determining if the value is higher than 60% of the estimated pool size. If the system becomes close to the maximum size, then the system could experience system wide hangs. Checks both 32-bit and 64-bit memory pools. Warning: The /3GB switch option in the boot.ini file significantly reduces the size of this memory pool.
+
+If the system is low on Paged Pool or non-Paged pool memory, then it is recommended to open a support case with Microsoft to address this. Alternatively, you can use a free and public tool called Poolmon.exe to see what DLL’s are using kernel memory (see the article below). Most kernel memory leaks can be tracked back to a usermode process. To identify which user mode process is responsible, reboot the system (so you start off with a clean system), start a performance monitor log intending to run for a week or more capturing the Memory and Process objects, then analyze the perfmon log looking for memory leaks and/or handle leaks in one or more of the processes. In any case, migrating to a 64-bit version of Windows should alleviate this issue.
+
+References:
+How to Use Memory Pool Monitor (Poolmon.exe) to Troubleshoot Kernel Mode Memory Leaks
+http://support.microsoft.com/kb/177415
+
+Ruling Out Memory-Bound Problems
+http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx
+
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654]]>
+This analysis checks to see if the system is becoming close to the maximum Pool Paged memory size. It does this by estimating the pool sizes taking into consideration /3GB, physical memory size, and 32-bit/64-bit, then determining if the value is higher than 60% of the estimated pool size. If the system becomes close to the maximum size, then the system could experience system wide hangs. Checks both 32-bit and 64-bit memory pools. Warning: The /3GB switch option in the boot.ini file significantly reduces the size of this memory pool.
+
+If the system is low on Paged Pool or non-Paged pool memory, then it is recommended to open a support case with Microsoft to address this. Alternatively, you can use a free and public tool called Poolmon.exe to see what DLL’s are using kernel memory (see the article below). Most kernel memory leaks can be tracked back to a usermode process. To identify which user mode process is responsible, reboot the system (so you start off with a clean system), start a performance monitor log intending to run for a week or more capturing the Memory and Process objects, then analyze the perfmon log looking for memory leaks and/or handle leaks in one or more of the processes. In any case, migrating to a 64-bit version of Windows should alleviate this issue.
+
+Reference:
+How to Use Memory Pool Monitor (Poolmon.exe) to Troubleshoot Kernel Mode Memory Leaks
+http://support.microsoft.com/kb/177415
+
+Ruling Out Memory-Bound Problems
+http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx
+
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654]]>
+Reference:
+Ruling Out Memory-Bound Problems
+http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx
+
+Detecting Memory Bottlenecks
+http://www.microsoft.com/resources/documentation/windowsnt/4/workstation/reskit/en-us/04memory.mspx?mfr=true ]]>
+This counter should always be below 1000, therefore this analysis checks for values above 1000. Use this analysis in correlation with Available Memory Analysis and Memory Leak Analysis. All are throwing alerts at the same time, then this may indicate the system is running out of memory and the suspected processes involved and follow analysis steps mentioned in the Memory Leak analysis.
+
+Reference:
+Ruling Out Memory-Bound Problems
+http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx
+
+ ]]>
+Output Queue Length is the length of the output packet queue (in packets). If this is longer than two, there are delays and the bottleneck should be found and eliminated, if possible. Since the requests are queued by the Network Driver Interface Specification (NDIS) in this implementation, this will always be 0.]]>
+Threshold: As a general rule, context switching rates of less than 5,000 per second per processor are not worth worrying about. If context switching rates exceed 15,000 per second per processor, then there is a constraint.
+
+Significance: Context switching happens when a higher priority thread preempts a lower priority thread that is currently running or when a high priority thread blocks. High levels of context switching can occur when many threads share the same priority level. This often indicates that there are too many threads competing for the processors on the system. If you do not see much processor utilization and you see very low levels of context switching, it could indicate that threads are blocked.
+
+Reference:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx
+
]]>
+Also, keep in mind that newly started processes will initially appear as a memory leak when it is simply normal start up behavior. A memory leak is when a process continues to consume memory and not releasing memory over a long period of time.
+
+Use this analysis in correlation with the Available Memory analysis. If you suspect a memory leak condition, then install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
+
+References:
+
+Debug Diagnostic Tool v1.1 http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
+This analysis checks all of the processes to determine if a process has more than 500 threads and if it is on an increasing trend of 50 threads per hour. A process with a large number of threads and/or an aggressive upward trend could indicate a thread leak which typically results in either a memory leak or high context switching. High context switching will result in high privileged mode CPU. ]]>
+If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See “How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment†article in the references section for more information.
+
+References:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx
+
+How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment http://www.codeplex.com/PerfTesting/Wiki/View.aspx?title=How%20To%3a%20Identify%20a%20Disk%20Performance%20Bottleneck%20Using%20SPA&referringTitle=How%20Tos ]]>
+This analysis checks for % Interrupt Time greater than 30%. If this occurs, then consider updating devices drivers for hardware that correlates to this alert.
+
+References:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx]]>
+
+References:
+
+Detecting Memory Bottlenecks http://www.microsoft.com/resources/documentation/windowsnt/4/workstation/reskit/en-us/04memory.mspx?mfr=true ]]>
+This analysis checks for an increasing trend of 10MB’s per hour. Under load, a server might use the System Cache in order to cache I/O activity such as disk. Use in correlation with Process IO Data Operations/sec and Process IO Other Operations/sec Analyses.
+
+References:
+File Cache Performance and Tuning http://technet.microsoft.com/en-us/library/bb742613.aspx
+]]>
+Reference:
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654
+
]]>
+This analysis checks if the percentage of usage is greater than 70%.
+
+Reference:
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654
+
]]>
+Reference:
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654
+
]]>
+Debug Diagnostic Tool v1.1 Download
+http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&DisplayLang=en]]>
+ASP.NET Performance ]]>
+ Reference:
+ ASP.NET Performance
+ ]]>
+ Reference:
+ ASP.NET Performance
+ ]]>
+
+
+ Reference:
+ ASP.NET Performance
+ ]]>
+ Reference:
+ ASP.NET Performance
+ ]]>
+This analysis checks if the amount of Commited memory is becoming close to the Commit Limit (RAM plus total page file sizes), If so, then identify if you have a memory leak. If no memory leak is identified, then consider adding more physical RAM or increase the size of your page files.]]>
-
-
-
-
-
-References:
-
-
-
-
-]]>
-
-
-
-
-
-This analysis creates a Warning alert for utilization greater than 50% on any processor and creates a critical alert for utilization greater than 80%.
-
-
-If average processor utilization is high based on the thresholds witin this analysis, then check if it is high user mode CPU or high privileged mode. If high privileged mode CPU is suspected, then see the Privileged Mode CPU Analysis. If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment article in the references section for more information.
-
-References:
-
-
-
-
-]]>
-
-
-
-
-High privileged mode CPU indicates that computer is spending too much time in system I/O versus real (user mode) work. % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service in called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
-
-This analysis throws a warning alert if privileged mode CPU is consuming more than 20% of total CPU and a critical alert if consuming More than 30% of total CPU.
-
-
-Next steps
-The CPU consumption might be caused by another busy resource such as network, memory, or disk I/O. High privileged mode CPU can also by caused by high amounts of Context Switches/second. See the High Context Switches/second analysis. The KernRate (KrView) tool can be used to profile the kernel to see what component is consuming the most kernel resources. To see more information about how KernRate can be used to analyze high priviledge mode CPU problems, see Mark Russinovich's blog entry in the references section below.
-
-References:
-
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This analysis determines if the average processor queue length exceeds the number of processors. If so, then this could indicate a processor bottleneck. Use this analysis in correlation with Privileged Mode CPU Analysis and Excessive Processor Use by Process analysis.
-
-
-Note: Due to the way in which this counter is collected, ignore this counter and alerts for it when collected from a virtual computer.
-
-
-If there are more tasks ready to run than there are processors, threads queue up. The processor queue is the collection of threads that are ready but not able to be executed by the processor because another active thread is currently executing. A sustained or recurring queue of more threads than number of processors is a good indication of a processor bottleneck.
-
-
-
-You can use this counter in conjunction with the \Processor\% Processor Time counter to determine if your application can benefit from more CPUs.
-
-
-
-Reference:
-
- ]]>
-
-
-
-
-High context switching is only a problem if overall CPU is high as well. This analysis checks for high CPU, high privileged mode CPU, and high system context switches per sec.
-
-Threshold: As a general rule, context switching rates of greater than 5,000 per second per processor are considered a warning. If context switching rates exceed 10,000 per second per processor, then there is a constraint.
-
-Significance: Context switching happens when a higher priority thread preempts a lower priority thread that is currently running or when a high priority thread blocks. High levels of context switching can occur when many threads share the same priority level. This often indicates that there are too many threads competing for the processors on the system. If you do not see much processor utilization and you see very low levels of context switching, it could indicate that threads are blocked.
-
-Reference:
-Measuring .NET Application Performance
-http://msdn2.microsoft.com/en-us/library/ms998579.aspx
-
]]>
-
-Use this analysis in correlation with the Available Memory analysis. If you suspect a memory leak condition, then install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
-
-References:
-
-Debug Diagnostic Tool v1.1 http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
-
-
-This analysis checks all of the processes to determine if a process has more than 500 threads and if it is on an increasing trend of 50 threads per hour. A process with a large number of threads and/or an aggressive upward trend could indicate a thread leak which typically results in either a memory leak or high context switching. High context switching will result in high privileged mode CPU. ]]>
-
-
-
-
-
-This analysis takes into consideration the workload of the disk queue, the size of the IO, and the response times to compute a good or bad condition in regards to if the disk is overwhelmed or not. If Avg Disk Queue Length is greater than 1 and response times are greater than 25 ms for IO sizes of 64 KB or smaller or 35 ms for IO sizes greater than 64 KB, then the disk is overwhelmed. The reasoning is that the disk has a nearly constant IO demand (Avg Disk Queue Length is a calculation of Transfers/sec and sec/Transfer) and the response times are higher than what it would take a 7200 RPM disk drive to return the appropriate IO sizes. This analysis requires \LogicalDisk(*)\Avg. Disk Queue Length, \LogicalDisk(*)\Avg. Disk Bytes/Transfer, and \LogicalDisk(*)\Avg. Disk sec/Transfer counters to be in the counter log. Instances of _Total are ignored because they are aggregates of all counter instances.
-
-If the PAL generated counter of \LogicalDisk(*)\Disk Overwhelmed has a value of 1 (Warning), then it means that the Avg Disk Queue Length is greater than 1 and the response times (Avg. Disk sec/Transfer) are greater than 15 ms. If this counter has a value of 2 (Critical), then it means thatAvg Disk Queue Length is greater than 1 and the response times are greater than 25 ms for IO of 64 KB or smaller and 35 ms for IO sizes greater than 64 KB.]]>
-
-
-
-
-
-
-
-
-This analysis checks for % Interrupt Time greater than 30%. If this occurs, then consider updating devices drivers for hardware that correlates to this alert.
-
-References:
-Measuring .NET Application Performance
-http://msdn2.microsoft.com/en-us/library/ms998579.aspx]]>
-
-
-
-
-If the response times are greater than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
-
-If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
-
]]>
-
-
-
-
-If the response times are greater than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
-
-If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
-
]]>
-
-
-
-
-
-The following thresholds are based on the access times of 5400 RPM disk drives. Hard drives that are faster than 5400 RPM such as 7200 RPM and solid state drives should have faster response times. Occasional spikes above 25 ms are normal.
-
-If the response times are less than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
-
-If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
]]>
-
-
-
-
-
-The following thresholds are based on the access times of 5400 RPM disk drives. Hard drives that are faster than 5400 RPM such as 7200 RPM and solid state drives should have faster response times. Occasional spikes above 25 ms are normal.
-
-If the response times are less than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
-
-If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
-
-Reference:
-Ruling Out Disk-Bound Problems
-http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
]]>
-
-
-This analysis checks for a % Idle Time of less than 10. Zero (0) indicates that the disk contstanly has at least 1 outstanding I/O in the queue.
-
-Reference:
-]]>
-
-
-
-
-The Performance Monitor Memory\Free System Page Table Entries counter is inaccurate on installations of Windows Server 2003 without Service Pack 1. For more information about this counter, see Microsoft Knowledge Base article 894067. The Performance tool does not accurately show the available Free System Page Table entries in Windows Server 2003 http://go.microsoft.com/fwlink/?linkid=3052&kbid=894067
-
-Fix for Win2003 SP1 systems with /3GB and low on PTE's: If the system is low on PTE's, running Windows 2003, and using /3GB switch, then consider using the /USERVA switch to give back some of the memory to the kernel. Note, this only works for Free System PTE issues.
-
-For more information on the USERVA switch, go to:
-How to use the /userva switch with the /3GB switch to tune the User-mode space to a value between 2 GB and 3 GB
-
-Reference:
-
-Ruling Out Memory-Bound Problems
-
-Microsoft Knowledge Base article 894067 The Performance tool does not accurately show the available Free System Page Table entries in Windows Server 2003
-
-How to use the /userva switch with the /3GB switch to tune the User-mode space to a value between 2 GB and 3 GB
-
-How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
-http://support.microsoft.com/kb/889654
-]]>
-
-
-
-
-
-
-
-
-
-This analysis checks to see if the system is becoming close to the maximum Pool Nonpaged memory size. It does this by estimating the pool sizes taking into consideration /3GB, physical memory size, and 32-bit/64-bit, then determining if the value is higher than 60% of the estimated pool size. If the system becomes close to the maximum size, then the system could experience system wide hangs. Checks both 32-bit and 64-bit memory pools. Warning: The /3GB switch option in the boot.ini file significantly reduces the size of this memory pool.
-
-If the system is low on Paged Pool or non-Paged pool memory, then it is recommended to open a support case with Microsoft to address this. Alternatively, you can use a free and public tool called Poolmon.exe to see what DLL's are using kernel memory (see the article below). Most kernel memory leaks can be tracked back to a usermode process. To identify which user mode process is responsible, reboot the system (so you start off with a clean system), start a performance monitor log intending to run for a week or more capturing the Memory and Process objects, then analyze the perfmon log looking for memory leaks and/or handle leaks in one or more of the processes. In any case, migrating to a 64-bit version of Windows should alleviate this issue.
-
-
-References
-How to Use Memory Pool Monitor (Poolmon.exe) to Troubleshoot Kernel Mode Memory Leaks
-http://support.microsoft.com/kb/177415
-
-Ruling Out Memory-Bound Problems
-http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx]]>
-
-
-
-
-
-
-
-
-
-This analysis checks to see if the system is becoming close to the maximum Pool Paged memory size. It does this by estimating the pool sizes taking into consideration /3GB, physical memory size, and 32-bit/64-bit, then determining if the value is higher than 60% of the estimated pool size. If the system becomes close to the maximum size, then the system could experience system wide hangs. Checks both 32-bit and 64-bit memory pools. Warning: The /3GB switch option in the boot.ini file significantly reduces the size of this memory pool.
-
-If the system is low on Paged Pool or non-Paged pool memory, then it is recommended to open a support case with Microsoft to address this. Alternatively, you can use a free and public tool called Poolmon.exe to see what DLL's are using kernel memory (see the article below). Most kernel memory leaks can be tracked back to a usermode process. To identify which user mode process is responsible, reboot the system (so you start off with a clean system), start a performance monitor log intending to run for a week or more capturing the Memory and Process objects, then analyze the perfmon log looking for memory leaks and/or handle leaks in one or more of the processes. In any case, migrating to a 64-bit version of Windows should alleviate this issue.
-
-Reference:
-How to Use Memory Pool Monitor (Poolmon.exe) to Troubleshoot Kernel Mode Memory Leaks
-http://support.microsoft.com/kb/177415
-
-Ruling Out Memory-Bound Problems
-http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx
-
-How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
-http://support.microsoft.com/kb/889654]]>
-
-
-Pages/sec is the rate at which pages are read from or written to disk to resolve hard page faults. It is the sum of Memory\Pages Input/sec and Memory\Pages Output/sec. It is counted in numbers of pages, so it can be compared to other counts of pages, such as Memory\Page Faults/sec, without conversion. It includes pages retrieved to satisfy faults in the file system cache (usually requested by applications) non-cached mapped memory files.
-
-This counter should always be below 1000, therefore this analysis checks for values above 1000. Use this analysis in correlation with Available Memory Analysis and Memory Leak Analysis. If all are throwing alerts at the same time, then this may indicate the system is running out of memory and the suspected processes involved and follow analysis steps mentioned in the Memory Leak analysis.
-
-Reference
-
-
- ]]>
-
-
-Reference:
-
-]]>
-
-
-This analysis checks if System Cache Resident Bytes is consuming more than 25 percent of RAM. Under load, a server might use the System Cache in order to cache I/O activity such as disk. Use in correlation with Process IO Data Operations/sec and Process IO Other Operations/sec Analyses.
-
-References
-File Cache Performance and Tuning http://technet.microsoft.com/en-us/library/bb742613.aspx
-]]>
-
-
-
-
-This analysis checks if the percentage of usage is greater than 70%.
-
-Reference
-How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
-http://support.microsoft.com/kb/889654
-
]]>
-
-
-
-
-This analysis checks if the amount of Commited memory is becoming close to the Commit Limit (RAM plus total page file sizes), If so, then identify if you have a memory leak. If no memory leak is identified, then consider adding more physical RAM or increase the size of your page files..
-
-The following article covers how to identify and troubleshoot system committed memory problems:
-PerfGuide: Out of System Committed Memory]]>
-
-
-If errors are occuring during this analysis, network connectivity could be affected with a potential for random Outlook RPC dialog boxes. See http://technet.microsoft.com/en-us/library/aa997363.aspx and http://technet.microsoft.com/en-us/library/aa995850.asp for more information
]]>
-
-
-
-References:
-]]>
-
-
-Reference:
-]]>
-
-If the server is using an HBA (Host Bus Adapter: This is used to connect to a Storage Area Network SAN) and if the Current Disk Queue Length goes up to 32 frequently, then consider increasing the queue depth on the HBA to allow more concurrent I/O to the SAN. Please consult your SAN administrator before making any changes.]]>
-
-
-This counter typically has a threshold of number of spindles + 2. Due to disk virtualization, it is difficult to determine the true number of physical spindles behind a logical disk or LUN, therefore this threshold is not a direct indicator of a disk performance problem.
-
-This analysis uses a Warning alert for an average disk queue length greater than 2, but correlate this value with disk latency (Avg. Disk sec/Transfer).
-
-References:
-
-]]>
-
-
-
-
-
-This analysis checks if the number of I/O request packets (IRPs) in the disk queue are at 32 or higher. Many SAN vendors use 32 as a default setting for the Host Bus Adapter (HBA) which interfaces into the fibre channel network to connect to one or more SANs. If the queue depth (simultaneous in-flight I/O) is reached frequently, then the queue depth might need to be increased.
-
-Reference:
-]]>
-
-If the size of the Free and Zero page list is large, then it is a good indicator of too much RAM installed on the computer. A large amount of Free and Zero page list size is normal for computers that have been recently powered on or booted. As the system accesses the hard disk placing pages of memory into the working sets of processes, eventually many of those pages of memory will be discarded or paged out. When that happens, the memory is often placed on the Standby list. A large Standby list is preferable because it uses the extra RAM as a disk cache. Available memory is the sum of the Free, Zero, and Standby page lists, so a high amount of available memory with a low amount of Zero and Free is preferred because the system is using the extra RAM as disk cache.]]>
-
-
-
-This analysis provides statistics only. Threads running on a processor will be in either user mode measured using % User Time or in priviledge/kernel mode measured using % Privileged Time. High % User Time indicates a high amount of application code is being executed. This is desirable versus too much time in privileged mode. See the Processor % Privileged Time analysis for more information.]]>
-
-The lazy writer writes 20% of the dirty pages every second, but increases the number of lazy write flushes if it is unable to keep up with the rate of dirty pages.
-
-This analysis checks for more than 100 lazy write flushes/second which might indicate that the lazy writer is falling behind in writing to the disk.]]>
-
-
-
-
-
-
-
-
-
-
-
-
-The \Memory\Long-Term Average Standby Cache Lifetime (s) performance counter measures the average lifetime in seconds of pages in the standby list cache over a long period of time. A low life expectancy could indicate that the pages on the standby list are frequently used i.e. the system has to replenish the standby list with pages from the modified list (pages that must be written to disk first) and the modified list is replenished by the working sets of processes.
-
-The Standby page list is a list of physical pages that are no longer in use (they are available to be reused), but contain data that already exists on disk. If the data is needed again, then it can be served from the Standby list in physical memory instead of going to disk to get it. Therefore, it is part of the system available memory and it acts as disk cache - the larger the disk cache, the less demand on the disk. ]]>
-
-
-
-This analysis checks for % Interrupt Time greater than 30%. If this occurs, then consider updating devices drivers for hardware that correlates to this alert.
-
-References:
-Measuring .NET Application Performance
-http://msdn2.microsoft.com/en-us/library/ms998579.aspx]]>
-
-
-High privileged mode CPU indicates that computer is spending too much time in system I/O versus real (user mode) work. % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service in called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
-
-This analysis throws a warning alert if privileged mode CPU is consuming more than 20% of total CPU and a critical alert if consuming More than 30% of total CPU.
-
-
-Next steps
-The CPU consumption might be caused by another busy resource such as network, memory, or disk I/O. High privileged mode CPU can also by caused by high amounts of Context Switches/second. See the High Context Switches/second analysis. The KernRate (KrView) tool can be used to profile the kernel to see what component is consuming the most kernel resources. To see more information about how KernRate can be used to analyze high priviledge mode CPU problems, see Mark Russinovich's blog entry in the references section below.
-
-References:
-]]>
-
-
-This analysis creates a Warning alert for utilization greater than 50% on any processor and creates a critical alert for utilization greater than 80%.
-
-
-If average processor utilization is high based on the thresholds witin this analysis, then check if it is high user mode CPU or high privileged mode. If high privileged mode CPU is suspected, then see the Privileged Mode CPU Analysis. If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment article in the references section for more information.
-
-References:
-
-
-
]]>
-
-
-
-
-
-
-This analysis gets the instances of \Paging File(*)\% Usage to identify the logical disks that are hosting paging files. Next, it checks each disk for an Avg. Disk Queue Length of greater than 1 when the Avg. Disk sec/Transfer (otherwise known as "response times") is greater than 15 ms. If true, then this counter returns the value 1 (warning). If the response times are greater than 25 ms and if the IO sizes are 64 KB or smaller, then it returns a 2 (critical). If the IO sizes (Avg. Disk Bytes/Transfer) are greater than 64 KB, then it returns a value of 2 (critical) only if the Avg. Disk sec/Transfer is greater than 35 ms. If none of the criteria is met, then it return a 0 (OK).
-]]>
-
-
-
-
-
+
+
+
+
+
+References:
+
+
+
+
+]]>
+
+
+
+
+
+This analysis creates a Warning alert for utilization greater than 50% on any processor and creates a critical alert for utilization greater than 80%.
+
+
+If average processor utilization is high based on the thresholds witin this analysis, then check if it is high user mode CPU or high privileged mode. If high privileged mode CPU is suspected, then see the Privileged Mode CPU Analysis. If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment article in the references section for more information.
+
+References:
+
+
+
+
+]]>
+
+
+
+
+High privileged mode CPU indicates that computer is spending too much time in system I/O versus real (user mode) work. % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service in called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+
+This analysis throws a warning alert if privileged mode CPU is consuming more than 20% of total CPU and a critical alert if consuming More than 30% of total CPU.
+
+
+Next steps
+The CPU consumption might be caused by another busy resource such as network, memory, or disk I/O. High privileged mode CPU can also by caused by high amounts of Context Switches/second. See the High Context Switches/second analysis. The KernRate (KrView) tool can be used to profile the kernel to see what component is consuming the most kernel resources. To see more information about how KernRate can be used to analyze high priviledge mode CPU problems, see Mark Russinovich's blog entry in the references section below.
+
+References:
+
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This analysis determines if the average processor queue length exceeds the number of processors. If so, then this could indicate a processor bottleneck. Use this analysis in correlation with Privileged Mode CPU Analysis and Excessive Processor Use by Process analysis.
+
+
+Note: Due to the way in which this counter is collected, ignore this counter and alerts for it when collected from a virtual computer.
+
+
+If there are more tasks ready to run than there are processors, threads queue up. The processor queue is the collection of threads that are ready but not able to be executed by the processor because another active thread is currently executing. A sustained or recurring queue of more threads than number of processors is a good indication of a processor bottleneck.
+
+
+
+You can use this counter in conjunction with the \Processor\% Processor Time counter to determine if your application can benefit from more CPUs.
+
+
+
+Reference:
+
+ ]]>
+
+
+
+
+High context switching is only a problem if overall CPU is high as well. This analysis checks for high CPU, high privileged mode CPU, and high system context switches per sec.
+
+Threshold: As a general rule, context switching rates of greater than 5,000 per second per processor are considered a warning. If context switching rates exceed 10,000 per second per processor, then there is a constraint.
+
+Significance: Context switching happens when a higher priority thread preempts a lower priority thread that is currently running or when a high priority thread blocks. High levels of context switching can occur when many threads share the same priority level. This often indicates that there are too many threads competing for the processors on the system. If you do not see much processor utilization and you see very low levels of context switching, it could indicate that threads are blocked.
+
+Reference:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx
+
]]>
+
+Use this analysis in correlation with the Available Memory analysis. If you suspect a memory leak condition, then install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
+
+References:
+
+Debug Diagnostic Tool v1.1 http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
+
+
+This analysis checks all of the processes to determine if a process has more than 500 threads and if it is on an increasing trend of 50 threads per hour. A process with a large number of threads and/or an aggressive upward trend could indicate a thread leak which typically results in either a memory leak or high context switching. High context switching will result in high privileged mode CPU. ]]>
+
+
+
+
+
+This analysis takes into consideration the workload of the disk queue, the size of the IO, and the response times to compute a good or bad condition in regards to if the disk is overwhelmed or not. If Avg Disk Queue Length is greater than 1 and response times are greater than 25 ms for IO sizes of 64 KB or smaller or 35 ms for IO sizes greater than 64 KB, then the disk is overwhelmed. The reasoning is that the disk has a nearly constant IO demand (Avg Disk Queue Length is a calculation of Transfers/sec and sec/Transfer) and the response times are higher than what it would take a 7200 RPM disk drive to return the appropriate IO sizes. This analysis requires \LogicalDisk(*)\Avg. Disk Queue Length, \LogicalDisk(*)\Avg. Disk Bytes/Transfer, and \LogicalDisk(*)\Avg. Disk sec/Transfer counters to be in the counter log. Instances of _Total are ignored because they are aggregates of all counter instances.
+
+If the PAL generated counter of \LogicalDisk(*)\Disk Overwhelmed has a value of 1 (Warning), then it means that the Avg Disk Queue Length is greater than 1 and the response times (Avg. Disk sec/Transfer) are greater than 15 ms. If this counter has a value of 2 (Critical), then it means thatAvg Disk Queue Length is greater than 1 and the response times are greater than 25 ms for IO of 64 KB or smaller and 35 ms for IO sizes greater than 64 KB.]]>
+
+
+
+
+
+
+
+
+This analysis checks for % Interrupt Time greater than 30%. If this occurs, then consider updating devices drivers for hardware that correlates to this alert.
+
+References:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx]]>
+
+
+
+
+If the response times are greater than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
+
+If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+
]]>
+
+
+
+
+If the response times are greater than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
+
+If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
+
]]>
+
+
+
+
+
+The following thresholds are based on the access times of 5400 RPM disk drives. Hard drives that are faster than 5400 RPM such as 7200 RPM and solid state drives should have faster response times. Occasional spikes above 25 ms are normal.
+
+If the response times are less than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
+
+If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
]]>
+
+
+
+
+
+The following thresholds are based on the access times of 5400 RPM disk drives. Hard drives that are faster than 5400 RPM such as 7200 RPM and solid state drives should have faster response times. Occasional spikes above 25 ms are normal.
+
+If the response times are less than 0.015 (15 milliseconds), then the disk subsystem is keeping up with demand.
+
+If the response times are greater than 0.025 (25 milliseconds), then the disk subsystem is likely overwhelmed.
+
+Reference:
+Ruling Out Disk-Bound Problems
+http://technet.microsoft.com/en-us/library/5bcdd349-dcc6-43eb-9dc3-54175f7061ad.aspx
]]>
+
+
+This analysis checks for a % Idle Time of less than 10. Zero (0) indicates that the disk contstanly has at least 1 outstanding I/O in the queue.
+
+Reference:
+]]>
+
+
+
+
+The Performance Monitor Memory\Free System Page Table Entries counter is inaccurate on installations of Windows Server 2003 without Service Pack 1. For more information about this counter, see Microsoft Knowledge Base article 894067. The Performance tool does not accurately show the available Free System Page Table entries in Windows Server 2003 http://go.microsoft.com/fwlink/?linkid=3052&kbid=894067
+
+Fix for Win2003 SP1 systems with /3GB and low on PTE's: If the system is low on PTE's, running Windows 2003, and using /3GB switch, then consider using the /USERVA switch to give back some of the memory to the kernel. Note, this only works for Free System PTE issues.
+
+For more information on the USERVA switch, go to:
+How to use the /userva switch with the /3GB switch to tune the User-mode space to a value between 2 GB and 3 GB
+
+Reference:
+
+Ruling Out Memory-Bound Problems
+
+Microsoft Knowledge Base article 894067 The Performance tool does not accurately show the available Free System Page Table entries in Windows Server 2003
+
+How to use the /userva switch with the /3GB switch to tune the User-mode space to a value between 2 GB and 3 GB
+
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654
+]]>
+
+
+
+
+
+
+
+
+
+This analysis checks to see if the system is becoming close to the maximum Pool Nonpaged memory size. It does this by estimating the pool sizes taking into consideration /3GB, physical memory size, and 32-bit/64-bit, then determining if the value is higher than 60% of the estimated pool size. If the system becomes close to the maximum size, then the system could experience system wide hangs. Checks both 32-bit and 64-bit memory pools. Warning: The /3GB switch option in the boot.ini file significantly reduces the size of this memory pool.
+
+If the system is low on Paged Pool or non-Paged pool memory, then it is recommended to open a support case with Microsoft to address this. Alternatively, you can use a free and public tool called Poolmon.exe to see what DLL's are using kernel memory (see the article below). Most kernel memory leaks can be tracked back to a usermode process. To identify which user mode process is responsible, reboot the system (so you start off with a clean system), start a performance monitor log intending to run for a week or more capturing the Memory and Process objects, then analyze the perfmon log looking for memory leaks and/or handle leaks in one or more of the processes. In any case, migrating to a 64-bit version of Windows should alleviate this issue.
+
+
+References
+How to Use Memory Pool Monitor (Poolmon.exe) to Troubleshoot Kernel Mode Memory Leaks
+http://support.microsoft.com/kb/177415
+
+Ruling Out Memory-Bound Problems
+http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx]]>
+
+
+
+
+
+
+
+
+
+This analysis checks to see if the system is becoming close to the maximum Pool Paged memory size. It does this by estimating the pool sizes taking into consideration /3GB, physical memory size, and 32-bit/64-bit, then determining if the value is higher than 60% of the estimated pool size. If the system becomes close to the maximum size, then the system could experience system wide hangs. Checks both 32-bit and 64-bit memory pools. Warning: The /3GB switch option in the boot.ini file significantly reduces the size of this memory pool.
+
+If the system is low on Paged Pool or non-Paged pool memory, then it is recommended to open a support case with Microsoft to address this. Alternatively, you can use a free and public tool called Poolmon.exe to see what DLL's are using kernel memory (see the article below). Most kernel memory leaks can be tracked back to a usermode process. To identify which user mode process is responsible, reboot the system (so you start off with a clean system), start a performance monitor log intending to run for a week or more capturing the Memory and Process objects, then analyze the perfmon log looking for memory leaks and/or handle leaks in one or more of the processes. In any case, migrating to a 64-bit version of Windows should alleviate this issue.
+
+Reference:
+How to Use Memory Pool Monitor (Poolmon.exe) to Troubleshoot Kernel Mode Memory Leaks
+http://support.microsoft.com/kb/177415
+
+Ruling Out Memory-Bound Problems
+http://technet.microsoft.com/en-us/library/7a44b064-8872-4edf-aac7-36b2a17f662a.aspx
+
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654]]>
+
+
+Pages/sec is the rate at which pages are read from or written to disk to resolve hard page faults. It is the sum of Memory\Pages Input/sec and Memory\Pages Output/sec. It is counted in numbers of pages, so it can be compared to other counts of pages, such as Memory\Page Faults/sec, without conversion. It includes pages retrieved to satisfy faults in the file system cache (usually requested by applications) non-cached mapped memory files.
+
+This counter should always be below 1000, therefore this analysis checks for values above 1000. Use this analysis in correlation with Available Memory Analysis and Memory Leak Analysis. If all are throwing alerts at the same time, then this may indicate the system is running out of memory and the suspected processes involved and follow analysis steps mentioned in the Memory Leak analysis.
+
+Reference
+
+
+ ]]>
+
+
+Reference:
+
+]]>
+
+
+This analysis checks if System Cache Resident Bytes is consuming more than 25 percent of RAM. Under load, a server might use the System Cache in order to cache I/O activity such as disk. Use in correlation with Process IO Data Operations/sec and Process IO Other Operations/sec Analyses.
+
+References
+File Cache Performance and Tuning http://technet.microsoft.com/en-us/library/bb742613.aspx
+]]>
+
+
+
+
+This analysis checks if the percentage of usage is greater than 70%.
+
+Reference
+How to determine the appropriate page file size for 64-bit versions of Windows Server 2003 or Windows XP
+http://support.microsoft.com/kb/889654
+
]]>
+
+
+
+
+This analysis checks if the amount of Commited memory is becoming close to the Commit Limit (RAM plus total page file sizes), If so, then identify if you have a memory leak. If no memory leak is identified, then consider adding more physical RAM or increase the size of your page files..
+
+The following article covers how to identify and troubleshoot system committed memory problems:
+PerfGuide: Out of System Committed Memory]]>
+
+
+If errors are occuring during this analysis, network connectivity could be affected with a potential for random Outlook RPC dialog boxes. See http://technet.microsoft.com/en-us/library/aa997363.aspx and http://technet.microsoft.com/en-us/library/aa995850.asp for more information
]]>
+
+
+
+References:
+]]>
+
+
+Reference:
+]]>
+
+If the server is using an HBA (Host Bus Adapter: This is used to connect to a Storage Area Network SAN) and if the Current Disk Queue Length goes up to 32 frequently, then consider increasing the queue depth on the HBA to allow more concurrent I/O to the SAN. Please consult your SAN administrator before making any changes.]]>
+
+
+This counter typically has a threshold of number of spindles + 2. Due to disk virtualization, it is difficult to determine the true number of physical spindles behind a logical disk or LUN, therefore this threshold is not a direct indicator of a disk performance problem.
+
+This analysis uses a Warning alert for an average disk queue length greater than 2, but correlate this value with disk latency (Avg. Disk sec/Transfer).
+
+References:
+
+]]>
+
+
+
+
+
+This analysis checks if the number of I/O request packets (IRPs) in the disk queue are at 32 or higher. Many SAN vendors use 32 as a default setting for the Host Bus Adapter (HBA) which interfaces into the fibre channel network to connect to one or more SANs. If the queue depth (simultaneous in-flight I/O) is reached frequently, then the queue depth might need to be increased.
+
+Reference:
+]]>
+
+If the size of the Free and Zero page list is large, then it is a good indicator of too much RAM installed on the computer. A large amount of Free and Zero page list size is normal for computers that have been recently powered on or booted. As the system accesses the hard disk placing pages of memory into the working sets of processes, eventually many of those pages of memory will be discarded or paged out. When that happens, the memory is often placed on the Standby list. A large Standby list is preferable because it uses the extra RAM as a disk cache. Available memory is the sum of the Free, Zero, and Standby page lists, so a high amount of available memory with a low amount of Zero and Free is preferred because the system is using the extra RAM as disk cache.]]>
+
+
+
+This analysis provides statistics only. Threads running on a processor will be in either user mode measured using % User Time or in priviledge/kernel mode measured using % Privileged Time. High % User Time indicates a high amount of application code is being executed. This is desirable versus too much time in privileged mode. See the Processor % Privileged Time analysis for more information.]]>
+
+The lazy writer writes 20% of the dirty pages every second, but increases the number of lazy write flushes if it is unable to keep up with the rate of dirty pages.
+
+This analysis checks for more than 100 lazy write flushes/second which might indicate that the lazy writer is falling behind in writing to the disk.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+The \Memory\Long-Term Average Standby Cache Lifetime (s) performance counter measures the average lifetime in seconds of pages in the standby list cache over a long period of time. A low life expectancy could indicate that the pages on the standby list are frequently used i.e. the system has to replenish the standby list with pages from the modified list (pages that must be written to disk first) and the modified list is replenished by the working sets of processes.
+
+The Standby page list is a list of physical pages that are no longer in use (they are available to be reused), but contain data that already exists on disk. If the data is needed again, then it can be served from the Standby list in physical memory instead of going to disk to get it. Therefore, it is part of the system available memory and it acts as disk cache - the larger the disk cache, the less demand on the disk. ]]>
+
+
+
+This analysis checks for % Interrupt Time greater than 30%. If this occurs, then consider updating devices drivers for hardware that correlates to this alert.
+
+References:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx]]>
+
+
+High privileged mode CPU indicates that computer is spending too much time in system I/O versus real (user mode) work. % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service in called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+
+This analysis throws a warning alert if privileged mode CPU is consuming more than 20% of total CPU and a critical alert if consuming More than 30% of total CPU.
+
+
+Next steps
+The CPU consumption might be caused by another busy resource such as network, memory, or disk I/O. High privileged mode CPU can also by caused by high amounts of Context Switches/second. See the High Context Switches/second analysis. The KernRate (KrView) tool can be used to profile the kernel to see what component is consuming the most kernel resources. To see more information about how KernRate can be used to analyze high priviledge mode CPU problems, see Mark Russinovich's blog entry in the references section below.
+
+References:
+]]>
+
+
+This analysis creates a Warning alert for utilization greater than 50% on any processor and creates a critical alert for utilization greater than 80%.
+
+
+If average processor utilization is high based on the thresholds witin this analysis, then check if it is high user mode CPU or high privileged mode. If high privileged mode CPU is suspected, then see the Privileged Mode CPU Analysis. If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment article in the references section for more information.
+
+References:
+
+
+
]]>
+
+
+
+
+
+
+This analysis gets the instances of \Paging File(*)\% Usage to identify the logical disks that are hosting paging files. Next, it checks each disk for an Avg. Disk Queue Length of greater than 1 when the Avg. Disk sec/Transfer (otherwise known as "response times") is greater than 15 ms. If true, then this counter returns the value 1 (warning). If the response times are greater than 25 ms and if the IO sizes are 64 KB or smaller, then it returns a 2 (critical). If the IO sizes (Avg. Disk Bytes/Transfer) are greater than 64 KB, then it returns a value of 2 (critical) only if the Avg. Disk sec/Transfer is greater than 35 ms. If none of the criteria is met, then it return a 0 (OK).
+]]>
+
+
+
+
+
-
-
-
+
+
+
-
-
-
+
+
+
-
-
-
-
-
-Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
-
-Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
-Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider installing a faster or more efficient disk subsystem.
-
-
-Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
-
-
-Threshold:
-
-Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
-
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
-
Next Steps:
-The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
-The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
-If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
-If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
-You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
-The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
-
-Reference:
-
-Monitoring CPU Usage
-http://msdn.microsoft.com/en-us/library/ms178072.aspx
-
Ask the Performance Team
-http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
-
Clint Huffman's Windows Troubleshooting in the Field Blog
-http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx
-]]>
-
-
-
-
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded records occur when a data record in a heap increases in size and the records current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
-Access Methods Forwarded Records/sec measures the number of records fetched through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. This is done as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
-If a table has lots of forwarded records, scanning the table can be very inefficient.
-Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded Records only occurs on heaps which are tables without clustered indexes.
-Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
-
Next Steps:
-
Look at code to determine where the short row is inserted followed by an update. Forwarded records can be avoided by:
-
Reference:
-
SQL Server Storage Engine
-http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
-
Forwarding and forwarded records, and the back-pointer size
-http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
-
sys.dm_db_index_physical_stats (Transact-SQL)
-
http://msdn.microsoft.com/en-us/library/ms188917.aspx
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
Description:
-
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
-Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
-It is usually recommended that you physically order the table rows by using a clustered index on the table.
-FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
-FreeSpace Scans /sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to evaluate clustered index for base tables.
-One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
-
-
Threshold:
-
Yellow: A ratio (10%) of more than 1 freespace scan for every 10 Batch Requests/Sec
-
Next Steps:
-Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
-Reference:
-
PRB: Poor Performance on a Heap
-
http://support.microsoft.com/kb/297861
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or low Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can ignore this counter.
-A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
-This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
-
Threshold:
-
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
-
Formula:
-(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
-
Next Steps:
-The main causes of high Full Scans/sec are:
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc
-
-
-]]>
-
-
-
-Description: The number of page splits per second that occurs as the result of overflowing index pages. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor.
-This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
-
Threshold:
-
Yellow: A ratio of more than 1 page split for every 20 batch requests
-Next Steps:
-If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
-Note: This counter also includes new page allocations, and does not mean there is an issue.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
-]]>
-
-
-Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
-Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
-Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
-Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
-
Threshold:
-Yellow: Greater than 10 per second
-Next Steps: It is important to correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
-High number of Scan Point Revalidations/sec indicates hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
-Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
Description:
-
-Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory.
-
-Workfiles Created/Sec the number of work files created per second. Work files are similar to work tables but are created strictly by hashing operations. Work files are used to store temporary results for hash joins and hash aggregated when the amount of data being processed is too big to fit into the available SQL Server memory.
-
Threshold:
-
Yellow: A ratio of more than 1 workfile created for every 20 batch requests
-
Next Steps: Make queries more efficient by adding/changing indexes, adding additional memory, etc. Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for expensive queries and consider rewriting them.
-
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
Working with tempdb in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx
-]]>
-
-
-
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors. Typically, this number is less than 200.
-
-
Threshold:
-
Yellow: This number should be less than 200. This will need to be baselined for accuracy.
-
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
-Ensure that TempDB is not a bottleneck and is following best practices.
-If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
-Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
-Divide tempdb into multiple data files of equal size. These multiple files don’t necessarily be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for tempdb objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
-Use TF-1118. Under this trace flag SQL Server allocates full extents to each tempdb object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in tempdb. This trace flag has been available since SQL Server 2000. With improvements in tempdb object caching in SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached tempdb objects may not always be available. For example, cached tempdb objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
-
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
Working with tempdb in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx
-]]>
-
-
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
-When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
-
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
-
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
-1 millisecond = 1,000,000 nanoseconds
-The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
-There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
-
-
Threshold:
-
Yellow: Less than 97 percent buffer cache hit ratio
-
Next Steps:
-
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
-
Threshold:
-
Yellow: Less than 640 Free Pages
-
Next Steps:
-Compare the Buffer Manager\Free pages counter to the following:
-
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-
Reference:
-
SQL Server, Access Methods Object
-
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
-
-
Description:
-The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
-Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
-
Threshold:
-
Red: Greater than 20 Lazy Writes per second
-
Next Steps:
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
Description:
-Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. If this value gets below 300 seconds, this is a indication that SQL Server is doing too many logical reads putting pressure on the buffer pool or potentially that your SQL Server could use more memory in order to boost performance. Anything below 300 is a critical level.
-The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
-
Threshold:
-
Red: Page life expectancy is less than 5 minutes (300 seconds)
-
Next Steps:
-If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
-Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentiall add additional memory if non-hardware options to not address the issue.
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
Description:
-Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient.
-
Threshold:
-Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
-
Warning: Page life expectancy is less than 5 minutes (300 seconds)
-
Next Steps:
-Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
-Identify queries with the highest amount of logical I/O's and tune them.
-
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
-
Reference:
-
SQL Server, Buffer Manager Object
-http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 – 90 per second is normal, anything that is above indicates indexing or memory constraint.
-
Threshold:
-
Informational: Page Reads/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page writes issued per second. 80 – 90 per second is normal, anything above, check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are relatively high then, this indicates a memory constraint.
-
Threshold:
-
Informational: Page Writes/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Description: This counter measures the number of index searches per second. Index searches are used to start range scans, single index record fetches, and to reposition within an index. Index searches are preferable to index and table scans. For OLTP applications, optimize for more index searches and less scans preferably, 1 full scan for every 1000 index searches. Index and table scans are expensive I/O operations.
-The Access Methods Index Searches/sec is captured to compare to the Access Methods Full Scans/sec. Full Scans will lead to high logical reads which will deplete the Buffer Pool. A depleted Buffer Pool will lead to disk impact as SQL Server will not have memory available for queries.
-Additionally sudden decreases in Index Searches/Sec value may indicate an index is no longer being used.
-
Threshold:
-
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
-Formula:
-(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
-Next Steps:
-The main causes of high Full Scans/sec compare to Index Searches are:
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-
http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc
-]]>
+
+
+
+
+
+Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+
+Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
+Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider installing a faster or more efficient disk subsystem.
+
+
+Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
+
+
+Threshold:
+
+Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
+
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
+
Next Steps:
+The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
+The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
+If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
+If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
+You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
+The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
+
+Reference:
+
+Monitoring CPU Usage
+http://msdn.microsoft.com/en-us/library/ms178072.aspx
+
Ask the Performance Team
+http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
+
Clint Huffman's Windows Troubleshooting in the Field Blog
+http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx
+]]>
+
+
+
+
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded records occur when a data record in a heap increases in size and the records current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
+Access Methods Forwarded Records/sec measures the number of records fetched through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. This is done as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
+If a table has lots of forwarded records, scanning the table can be very inefficient.
+Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded Records only occurs on heaps which are tables without clustered indexes.
+Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
+
Next Steps:
+
Look at code to determine where the short row is inserted followed by an update. Forwarded records can be avoided by:
+
Reference:
+
SQL Server Storage Engine
+http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
+
Forwarding and forwarded records, and the back-pointer size
+http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
+
sys.dm_db_index_physical_stats (Transact-SQL)
+
http://msdn.microsoft.com/en-us/library/ms188917.aspx
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
Description:
+
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
+Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
+It is usually recommended that you physically order the table rows by using a clustered index on the table.
+FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
+FreeSpace Scans /sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to evaluate clustered index for base tables.
+One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
+
+
Threshold:
+
Yellow: A ratio (10%) of more than 1 freespace scan for every 10 Batch Requests/Sec
+
Next Steps:
+Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
+Reference:
+
PRB: Poor Performance on a Heap
+
http://support.microsoft.com/kb/297861
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or low Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can ignore this counter.
+A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
+This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
+
Threshold:
+
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
+
Formula:
+(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
+
Next Steps:
+The main causes of high Full Scans/sec are:
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc
+
+
+]]>
+
+
+
+Description: The number of page splits per second that occurs as the result of overflowing index pages. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor.
+This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
+
Threshold:
+
Yellow: A ratio of more than 1 page split for every 20 batch requests
+Next Steps:
+If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
+Note: This counter also includes new page allocations, and does not mean there is an issue.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
+]]>
+
+
+Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
+Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
+Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
+Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
+
Threshold:
+Yellow: Greater than 10 per second
+Next Steps: It is important to correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
+High number of Scan Point Revalidations/sec indicates hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
+Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
Description:
+
+Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory.
+
+Workfiles Created/Sec the number of work files created per second. Work files are similar to work tables but are created strictly by hashing operations. Work files are used to store temporary results for hash joins and hash aggregated when the amount of data being processed is too big to fit into the available SQL Server memory.
+
Threshold:
+
Yellow: A ratio of more than 1 workfile created for every 20 batch requests
+
Next Steps: Make queries more efficient by adding/changing indexes, adding additional memory, etc. Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for expensive queries and consider rewriting them.
+
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
Working with tempdb in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx
+]]>
+
+
+
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors. Typically, this number is less than 200.
+
+
Threshold:
+
Yellow: This number should be less than 200. This will need to be baselined for accuracy.
+
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
+Ensure that TempDB is not a bottleneck and is following best practices.
+If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
+Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
+Divide tempdb into multiple data files of equal size. These multiple files don’t necessarily be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for tempdb objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
+Use TF-1118. Under this trace flag SQL Server allocates full extents to each tempdb object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in tempdb. This trace flag has been available since SQL Server 2000. With improvements in tempdb object caching in SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached tempdb objects may not always be available. For example, cached tempdb objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
+
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
Working with tempdb in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx
+]]>
+
+
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
+When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
+
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
+
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
+1 millisecond = 1,000,000 nanoseconds
+The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
+There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
+
+
Threshold:
+
Yellow: Less than 97 percent buffer cache hit ratio
+
Next Steps:
+
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
+
Threshold:
+
Yellow: Less than 640 Free Pages
+
Next Steps:
+Compare the Buffer Manager\Free pages counter to the following:
+
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+
Reference:
+
SQL Server, Access Methods Object
+
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
+
+
Description:
+The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
+Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
+
Threshold:
+
Red: Greater than 20 Lazy Writes per second
+
Next Steps:
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
Description:
+Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. If this value gets below 300 seconds, this is a indication that SQL Server is doing too many logical reads putting pressure on the buffer pool or potentially that your SQL Server could use more memory in order to boost performance. Anything below 300 is a critical level.
+The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
+
Threshold:
+
Red: Page life expectancy is less than 5 minutes (300 seconds)
+
Next Steps:
+If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
+Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentiall add additional memory if non-hardware options to not address the issue.
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
Description:
+Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient.
+
Threshold:
+Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
+
Warning: Page life expectancy is less than 5 minutes (300 seconds)
+
Next Steps:
+Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
+Identify queries with the highest amount of logical I/O's and tune them.
+
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
+
Reference:
+
SQL Server, Buffer Manager Object
+http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 – 90 per second is normal, anything that is above indicates indexing or memory constraint.
+
Threshold:
+
Informational: Page Reads/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page writes issued per second. 80 – 90 per second is normal, anything above, check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are relatively high then, this indicates a memory constraint.
+
Threshold:
+
Informational: Page Writes/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Description: This counter measures the number of index searches per second. Index searches are used to start range scans, single index record fetches, and to reposition within an index. Index searches are preferable to index and table scans. For OLTP applications, optimize for more index searches and less scans preferably, 1 full scan for every 1000 index searches. Index and table scans are expensive I/O operations.
+The Access Methods Index Searches/sec is captured to compare to the Access Methods Full Scans/sec. Full Scans will lead to high logical reads which will deplete the Buffer Pool. A depleted Buffer Pool will lead to disk impact as SQL Server will not have memory available for queries.
+Additionally sudden decreases in Index Searches/Sec value may indicate an index is no longer being used.
+
Threshold:
+
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
+Formula:
+(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
+Next Steps:
+The main causes of high Full Scans/sec compare to Index Searches are:
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+
http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc
+]]>
-
-
-
-
-
-Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
-
-Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
-Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider focusing on improving the disk subsystem.
-
-It is recommended to look for comparitive trends with other processes, work loads, error counts, and other behaviors to find what is driving Privileged Time.
-
-Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
-
-
-Threshold:
-
-Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
-
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
-
Next Steps:
-The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
-The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
-If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
-
-If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
-
-You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
-
-The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
-
-Reference:
-
-Monitoring CPU Usage
-http://msdn.microsoft.com/en-us/library/ms178072.aspx
-
Ask the Performance Team
-http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
-
Clint Huffman's Windows Troubleshooting in the Field Blog
-http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
-
-
-
-
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded records occur when a data record in a heap increases in size and the record's current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
-Access Methods Forwarded Records/sec measures the number of records accessed through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. Forwarding Records are used as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
-If a table has lots of forwarded records, scanning the table can be very inefficient.
-Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded Records only occurs on heaps which are tables without clustered indexes.
-
Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
-
Next Steps:
-
Look at code to determine where the short row is inserted followed by an update.
Forwarded records can be avoided by:
-
Reference:
-
SQL Server Storage Engine
-http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
-
Forwarding and forwarded records, and the back-pointer size
-http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
-
sys.dm_db_index_physical_stats (Transact-SQL)
-
http://msdn.microsoft.com/en-us/library/ms188917.aspx
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
Description:
-
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
-Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
-It is usually recommended that you physically order the table rows by using a clustered index on the table.
-FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
-FreeSpace Scans/sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to add a clustered index for base tables.
-One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
-
-
Threshold:
-
Yellow: A ratio (10%) or more than 1 freespace scan for every 10 Batch Requests/Sec
-
Next Steps:
-Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
-Reference:
-
PRB: Poor Performance on a Heap
-
http://support.microsoft.com/kb/297861
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
-]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
-
-This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
-
-Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also poor disk performance, and / or, high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
-
Next Steps:
-The main causes of high Full Scans/sec are:
-
Note: Identify disk bottlenecks by using Performance Counters, Profiler, sys.dm_io_virtual_file_stats and SHOWPLAN output.
-Also refer to the sys.dm_io_virtual_file_stats dynamic management view (DMV) to track io_stalls to help identify IO bottlenecks.
-To back up and support this information, compare the counters to sys.dm_os_wait_stats output. If you see high values in perfmon, you may also see high waits for the following:
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
-
-
-
-Description: The number of page splits per second that occurs as the result of overflowing index pages and new page allocations. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor and pad_index to leave more empty space per page.
-This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
-
Note: A high value for this counter is not bad in situations where many new pages are being created, since it includes all new page allocations as well as splits when a data page spilts.
-
Threshold:
-
Yellow: A ratio of more than 1 page split for every 20 batch requests
-
Next Steps:
-If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
-
-To track page splits more accurately see the following SQLSkills blog article from Jonathan Kehayias:
-http://www.sqlskills.com/blogs/jonathan/post/Tracking-Problematic-Pages-Splits-in-SQL-Server-2012-Extended-Events-e28093-No-Really-This-Time!.aspx
-]]>
-Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
-Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
-Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
-Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
-
-Note: This is an informative counter. It is not a critical counter that should be used for baselines or alerting.
-
Next Steps: You can correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
-High number of Scan Point Revalidations/sec potentially indicate hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
-Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
-
-
-Description:
-Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory. The Work files are similar to work tables but are created strictly by hashing operations. Workfiles are used to store temporary results for hash joins and hash aggregates.
-Hash joins can require large amounts of memory for execution. As part of executing a hash join, the memory required for the hash can become too large and require a spill to disk. The disk storage to backup the hash operation is called a workfile. Workfiles are collections of extents and pages that are managed strictly by the workfile code.
-
-Threshold:
-
Yellow: Greater than 20 Workfiles created per second
-
-Next Steps:
-Make queries more efficient by adding/changing indexes. Run expensive queries through the Database Tuning Advisor (DTA), look for expensive queries and consider rewriting them, and add as last resort consider adding additional memory.
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
-Working with tempdb in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
-Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx]]>
-
-
-
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors.
-
-
Threshold:
-
Yellow: Greater than 20 Worktables created per second. This will need to be baselined for accuracy.
-
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
-Ensure that TempDB is not a bottleneck and is following best practices.
-If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
-Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
-Divide TempDB into multiple data files of equal size. These multiple files don't necessarily need to be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for TempDB objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
-Use TF-1118. Under this trace flag SQL Server allocates full extents to each TempDB object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in TempDB. This trace flag has been available since SQL Server 2000. With improvements in TempDB object caching in SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached TempDB objects may not always be available. For example, cached TempDB objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
-
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
Working with TempDB in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx ]]>
-
-
-
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
-When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
-
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
-
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
-1 millisecond = 1,000,000 nanoseconds
-The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
-There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Read aheads are an optimistic form of physical reads. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
-
-
Threshold:
-
Yellow: Less than 97 percent buffer cache hit ratio
-
Red: Less than 90 percent buffer cache hit ratio
-
Next Steps:
-
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
-
-
-Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
-
-
-A value less than 640 (or 5 MB) may indicate physical memory pressure.
-
-
Threshold:
-
Yellow: Less than 640 Free Pages
-
Next Steps:
-Compare the Buffer Manager\Free pages counter to the following:
-
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-
Reference:
-
SQL Server, Access Methods Object
-
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
-
-
Description:
-The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
-Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
-
-
-Note: NUMA will increase the number of lazy writer threads per NUMA node and influence the behavior of the lazy writer by increasing its execution at this view. If the server is a NUMA environment other signs of memory pressure should be used and you should analyze the Buffer Node counters for Page Life Expectancy per node. There is not a lazy writer counter in Buffer Nodes.
-
-Threshold:
-
Red: Greater than 20 Lazy Writes per second
-
-
-
-
-Next Steps:
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
-
-
-Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
-
-Configure SQL Server to Use Soft-NUMA
-http://msdn.microsoft.com/en-us/library/ms345357.aspx]]>
-
-
-
-
-
-Description:
-Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
-
-When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
-
-The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
-
-Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
-
Threshold:
-
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
-
Red: Page life expectancy is less than 5 minutes (300 seconds)
-
-
Next Steps:
-If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
-Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
-
-
Reference:
-
-
SQL Server, Access Methods Object
-
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
Description:
-Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient or there was a large number of ad-hoc queries.
-
-
-Threshold:
-Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
-
Warning: Page life expectancy is less than 5 minutes (300 seconds)
-
Next Steps:
-Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
-Identify queries with the highest amount of logical I/O's and tune them.
-
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
-
Reference:
-
SQL Server, Buffer Manager Object
-http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 to 90 per second is normal, anything that is above indicates indexing or memory constraint.
-
Threshold:
-
Yellow: Page Reads/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page writes issued per second. 80 to 90 per second is normal. Anything above 90, it is recommended to check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are also relatively high then, this indicates a memory constraint.
-
-Threshold:
-
Yellow: Page Writes/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
-
-
-Total number of logouts started per second. Greater than 2 per second indicates that the application is not correctly using connection pooling.]]>
-
-Number of users connected to the system. The number of users currently connected to the SQL Server. This should correlate with the Batch Requests per second counter.]]>
-
-Number of latch requests that could not be granted immediately and had to wait before being granted. These are the amount of latches that had to wait.]]>
-
-
-Current number of processes waiting for a workspace memory grant. Memory Grants Pending records the number of connections that are waiting for memory before they can begin processing a memory intensive query such as a sort or hash operation. Connections that wait in this state for a long enough time will eventually receive an 8645 error (A time out occurred while waiting for memory resources to execute the query. Rerun the query). A spid waiting in this state will have a waittype of 0x0040 (RESOURCE_SEMAPHORE) in sysprocesses. If this counter remains above zero for any significant amount of time then you will need to track down what queries are doing sorts/hashes and run them through Database Tuning Advisor (DTA) to see if they can get a more efficient plan.
-
-
-Threshold:
-
Red: Numbers higher than 0 indicate a lack of memory.]]>
-
-
-
-Total amount of dynamic memory the server is willing to consume]]>
-
-
-Description Number of SQL batch requests received by server. This counter measures the number of batch requests that SQL Server receives per second, and generally follows in step to how busy your server's CPUs are. Generally speaking, over 1000 batch requests per second indicates a very busy SQL Server, and could mean that if you are not already experiencing a CPU bottleneck, that you may very well soon. Of course, this is a relative number, and the bigger your hardware, the more batch requests per second SQL Server can handle. From a network bottleneck approach, a typical 100Mbs network card is only able to handle about 3000 batch requests per second. If you have a server that is this busy, you may need to have two or more network cards, or go to a 1Gbs network card.
-
-Note: Sometimes low batch requests/sec can be misleading. If there were a SQL statements/sec counter, this would be a more accurate measure of the amount of SQL Server activity. For example, an application may call only a few stored procedures yet each stored procedure does lot of work. In that case, we will see a low number for batch requests/sec but each stored procedure (one batch) will execute many SQL statements that drive CPU and other resources. As a result, many counter thresholds based on the number of batch requests/sec will seem to identify issues because the batch requests on such a server are unusually low for the level of activity on the server.
-
-We cannot conclude that a SQL Server is not active simply by looking at only batch requests/sec. Rather, you have to do more investigation before deciding there is no load on the server. If the average number of batch requests/sec is below 5 and other counters (such as SQL Server processor utilization) confirm the absence of significant activity, then there is not enough of a load to make any recommendations or identify issues regarding scalability.
-
-Note: Batch requests / sec is a great counter to use for baselining and to use as a measurement of how many batches the system could handle before a sympton was evident or a particular condition occured. This counter will greatly depend on SQL Server code and the hardware being used. It is often used as a gauge of saying that a particular system was able to handle x number of batch requests per second and then to examine system and SQL Server counters to determine what resource is the bottlneck at that particular workload.]]>
-
-
-
-Description: Number of SQL compilations that occured per second that includes recompiles. A high value subtracting recompiles can be an indication of a large number of ad hoc queries that can also be cross referenced with the number of ad hoc plans in the plan cache counter.
-
-Be aware of the following:
-
-Reference
-SQL Server, Plan Cache Object
-http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
-
-
-
-]]>
-
-
-
-Description: Number of SQL re-compiles per second that measures the number of times that a statement executed, but had to be compiled again before the statement completed. There are a variety of reasons that a recompile occured such as statistics being out of date, an column was added to a table a store procedure depends on, statement was run with a recompile option, etc. This counter needs to be as close to 0 as possible. A recompile can cause deadlocks and compile locks that are not compatible with any locking type.
-
-SQL Server Trace / Profiler provides an execellent way to find out exactly why recompiles are occuring in your environment.
-
-Troubleshooting stored procedure recompilation
http://support.microsoft.com/kb/243586
-How to identify the cause of recompilation in an SP:Recompile event
-http://support.microsoft.com/kb/308737]]>
-
-
-
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
-
-This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
-
-Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
-
Threshold:
-
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
-
Formula:
-(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
-
Next Steps:
-The main causes of high Full Scans/sec are:
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
-
-Description: Number of new locks and lock conversions requested from the lock manager. This value should tie close to the number of Batch Requests per second. Values greaters than 1000 may indicate queries are pulling large volumes of data thereby accessing large numbers of rows
-
-Threshold
-
-Yellow Greater than > 1000 Lock Requests / sec]]>
-
Description: Number of lock requests that could not be satisfied immediately and required the caller to wait before being granted the lock. This is a sign that there is some blocking occuring and would be a good baseline measurement of lock waits for load testing.
-
Note: Lock waits are not recorded by until after the lock event completes. For examining active blocking it is recommended to query sys.dm_os_waiting_tasks.
-
-Threshold
-Yellow Values greater than 0]]>
-
-Description: Number of lock requests that timed out. This does not include requests for NOWAIT locks. A value greater than zero might indicate that user queries are not completing.
-
-Threshold
-Yellow Greater than >1
-
-]]>
-
-Description:
-Number of lock requests, per second, which resulted in a deadlock. Deadlocks are always an issue that should be resolved. A deadlock transaction that is killed must be rerun. It is recommended to use the SQL Trace deadlock graph, trace flag 1222, and the extended events deadlock capture to help identify and solve all of the deadlocks in your environment.
-
-Threshold
-Red Any Deadlocks greater than 0
-
-Resources
-
Bart Duncan Deadlock Resources
-Getting historical deadlock info using extended events
-http://www.sqlskills.com/BLOGS/PAUL/post/Getting-historical-deadlock-info-using-extended-events.aspx]]>
-Total latch wait time (milliseconds) for latch requests that had to wait in the last second.]]>
-
-
-Description:
-% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this counter.
-
-This counter measures the percentage of total processor time spent (user mode and kernel mode) on SQL Server process threads. If this counter stays at 80% for sustained periods of time, then you may also wish to investigate other Process (sqlservr) such as Private Bytes, Virtual Bytes, and Working Set to get a better understanding of how SQL Server allocates certain segments of memory.
-
-
-Threshold:
-
Red: SQL Server is using more than 30% user mode CPU usage
-
-
-Reference:
-
-Monitoring CPU Usage
-http://msdn.microsoft.com/en-us/library/ms178072.aspx
-
Ask the Performance Team
-http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
-
Clint Huffman's Windows Troubleshooting in the Field Blog
-http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
-Total number of processes per second that have successfully acquired a workspace memory grant. This counter should be used as a baseline for comparisons under load.]]>
-Total amount of memory granted to executing processes. This memory is used for hash, sort and create index operations.]]>
-Total amount of memory granted to executing processes. This memory is used primarily for hash, sort and create index operations.]]>
-
-
-
-
-
-Number of auto-parameterization attempts.]]>
-Number of failed auto-parameterizations.]]>
-Number of unsafe auto-parameterizations.]]>
-
-Description: Number of requests that had to wait for a free page.
-
-Free list stalls/sec is the frequency with which requests for available database pages are suspended because no buffers are available. Free list stall rates of greater than 2 per second indicate too little SQL memory available.
-
-
Reference
-Threshold
-Yellow - Free list stalls/sec > 2
-SQL Server, Buffer Manager Object
-
-http://technet.microsoft.com/en-us/library/ms189628.aspx
-]]>
-Description
-
-Number of pages, per second, flushed by checkpoint or other operations that require all dirty pages to be flushed. The checkpoint frequency can be due to low memory conditions as well as the recovery interval set by sp_configure.
-
-Reference
-
-SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-
-A SQL Server DBA myth a day: (15/30) checkpoint only writes pages from committed transactions
-
http://www.sqlskills.com/BLOGS/PAUL/category/Checkpoint.aspx
-
-Database Checkpoints (SQL Server)
-
-http://technet.microsoft.com/en-us/library/ms189573(v=sql.110).aspx]]>
-
Description: Number of pages read, in second, in anticipation of use which is an optimistic physical read. This number should not exceed greater than 20% of total page reads.
-Threshold:
-
Yellow:< 20% of Page Reads/sec
-
]]>
Description: Number of pages used for miscellaneous server purposes (including procedure cache). This counter shows how many pages were taken from the buffer pool to accomodate non-buffer pool needs such as plan cache, procedure cache, the optimizer, workspace memory, etc. This counter should be baselined and can be analyzed by comparing this counter to the amount of buffer pool space and large requests that are hitting the SQL Server instance.
-
-
Note: DBCC MEMORYSTATUS can also be leveraged to examine the impact of stolen memory to the buffer pool.
-
Note: The lazywriter process is not permitted to flush Stolen buffers out of the buffer pool.
-
-
Reference:
-SQL Server, Buffer Manager Object
-
-http://technet.microsoft.com/en-us/library/ms189628(v=sql.105).aspx
-INF: Using DBCC MEMORYSTATUS to Monitor SQL Server Memory Usage
-
-http://support.microsoft.com/kb/271624]]>
-
-Feature usage since last SQL Server startup
-
-You can also examine performance counters through the sys.dm_os_performance_counters DMV. By using the perfmon counters for deprecation and the DMVs, you can help your application prepare and avoid issue when migrating to the future versions of SQL Server.
-
-SELECT * FROM sys.dm_os_performance_counters
-WHERE object_name LIKE '%Deprecated Features%'AND cntr_value > 0
-ORDER BY cntr_value DESC
-]]>
-
-Number of attentions per second. Attentions are the number of user cancels and query timeout that occured per second. A high number of attentions may indicate slow query performance as users are cancelling queries.]]>
-Description Ratio between cache hits and lookups
-
-The Plan Cacheobject provides counters to monitor how SQL Server uses memory to store objects such as stored procedures, ad hoc and prepared Transact-SQL statements, and triggers. Multiple instances of the Plan Cache object can be monitored at the same time, with each instance representing a different type of plan to monitor.
-
Compiled Plan Stubs & Plan Cache Perf Counters:
-
-In SQL Server 2008 R2, there are three options that can help in dealing with plan cache pollution issues.
-
-Contributor(s):
-
-Reference:
-SQL Server, Plan Cache Object
-http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
-
-
-
-
-]]>
-The average amount of wait time (milliseconds) for each lock request that resulted in a wait. This wait could indicate excessive blocking that can be verified by querying sys.dm_os_waiting_tasks. Compare this counter to "Lock Waits/sec" and look for trends.
-
-Threshold
-Yellow Greater than >500 Average Wait Time.
]]>
-
-
Description: Percentage of work tables created where the initial two pages of the work table were not allocated but were immediately available from the work table cache.
-
-In SQL Server 2005 worktable caching was improved. When a query execution plan is cached, the work tables needed by the plan are not dropped across multiple executions of the plan but merely truncated. In addition, the first nine pages for the work table are kept. In SQL Server 2000, the work tables used during query plan execution are dropped. Because the work table is cached, the next execution of the query is faster. When the system is low on memory, the execution plan may be removed from the cache and the associated work tables dropped as well. Both SQL Server 2000 and SQL Server 2005 use a small global pool of pre-allocated pages and extents that make the initial creation of work tables faster.
-
-Note: When a work table is dropped, two pages may remain allocated and they are returned to the work table cache. A value less than 90% may indicate insufficient memory, since execution plans are being dropped, or may indicate, on 32-bit systems, the need for an upgrade to a 64-bit system.
-
-
Threshold:
-
Yellow: Less than 90% Worktables from Cache Ratio. This will need to be baselined for accuracy.
-
-
Reference:
-SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426(v=sql.105).aspx]]>
-
Description:
-The number of times locks on a table were escalated from page- or row-level to table-level. Frequent or even occasional spiking in this value may indicate poorly coded transactions.
-
-
-Lock Escalation Thresholds
-
-Lock escalation is triggered when lock escalation is not disabled on the table by using the ALTER TABLE SET LOCK_ESCALATION option, and when either of the following conditions exists:
-
-
-If locks cannot be escalated because of lock conflicts, the Database Engine periodically triggers lock escalation at every 1,250 new locks acquired.
-
-Next Steps
-
Reducing Locking and Escalation
-In most cases, the Database Engine delivers the best performance when operating with its default settings for locking and lock escalation. If an instance of the Database Engine generates a lot of locks and is seeing frequent lock escalations, consider reducing the amount of locking by:
-
Using an isolation level that does not generate shared locks for read operations.
-
-
-Note: Changing the isolation level affects all tables on the instance of the Database Engine.
-
-
-You can also use trace flags 1211 and 1224 to disable all or some lock escalations. For more information, see Trace Flags (Transact-SQL). Also, monitor lock escalation by using the SQL Server Profiler Lock:Escalation event; and see Using SQL Server Profiler.
-
-Reference:
-Lock Escalation (Database Engine) -
http://msdn.microsoft.com/en-us/library/ms184286(SQL.105).aspx
-
-]]>
-The free space in tempdb in KB.]]>
-The longest running time of any transcation in seconds. This counter could indicate a long running statement pulling large amounts of data that normally takes a long time to execute or potentially a blocking condition.]]>
-The total number of active non-snapshot transactions that generate version records. These are all of the non-snapshot isolation versions such as triggers and online indexing.
-
-Note: The sum of Update Snapshot Transactions and NonSnapshot Version Transactions represents the total number of transactions that participate in version generation. The difference of Snapshot Transactions and Update Snapshot Transactions reports the number of read-only snapshot transactions.
-
-Reference:
-http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
-http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
-Managing TempDB in SQL Server: TempDB Basics (Version Store: Why do we need it?)
-http://blogs.msdn.com/b/sqlserverstorageengine/archive/2008/12/22/managing-tempdb-in-sql-server-tempdb-basics-verison-store.aspx
-]]>
-The total number of active snapshot transactions.]]>
-Description
-
-The version cleanup rate in KB per seconds.
-
-Monitors the version cleanup rate in KBps in all version stores. If the version cleanup rate is lower than the version generation rate, the version store will use more and more space in tempdb. However, if the version cleanup rate is 0 but the version generation rate is not, there is probably a long-running transaction that is preventing the version store cleanup.
-
-Row versions are shared across sessions. The creator of the row version has no control over when the row version can be reclaimed. You will need to find and then possibly stop the longest-running transaction that is preventing the row version cleanup.
-
The following query returns the top two longest-running transactions that depend on the versions in the version store:
-
-select top 2
- transaction_id,
- transaction_sequence_num,
- elapsed_time_seconds
-from sys.dm_tran_active_snapshot_database_transactions
-order by elapsed_time_seconds DESC
-
-Reference
-
-Row Versioning Resource Usage
-http://msdn.microsoft.com/en-us/library/ms175492.aspx]]>
-Description: The version generation rate in KB per seconds.
-
-You can use the Version Generation Rate and Version Cleanup Rate counters to measure version store impact on TempDB. THe Version Generation Rate should not outpace the Cleanup Rate. Additionally, if your Version Cleanup Rate is 0, a long-running transaction could be preventing the version store cleanup. Incidentally, before generating an out-of-tempdb-space error, SQL Server 2008 makes a last-ditch attempt by forcing the version stores to shrink. During the shrink process, the longest-running transactions that have not yet generated any row versions are marked as victims. This frees up the version space used by them. Message 3967 is generated in the error log for each such victim transaction. If a transaction is marked as a victim, it can no longer read the row versions in the version store or create new ones. Message 3966 is generated and the transaction is rolled back when the victim transaction attempts to read row versions. If the shrink of the version store succeeds, more space is available in tempdb. Otherwise, tempdb runs out of space.
-
-If TempDB fills and runs out of space, writes will continue, butversions will not and reads will fail.
-
-Reference
-SQL Server, Transactions Object
-http://technet.microsoft.com/en-us/library/ms189038.aspx]]>
-
-
-
-Plan re-use is desirable for OLTP workloads because re-creating the same plan (for similar or identical transactions) is a waste of CPU resources.
-
To compute the plan re-use rate, compare SQL Server SQL Statistics: batch requests/sec to SQL compilations/sec.
-
Special exception to the plan re-use rule is that zero (or trivial) cost plans will not be cached (not re-used) in SQL 2005 SP2 and above.
-
Applications that use zero cost plans will have a lower plan re-use but this is not a performance issue, because it is cheaper to generate a new plan every time than to cache.
-
Reference:
-
Execution Plan Caching and Reuse
-
http://msdn.microsoft.com/en-us/library/ms181055.aspx
-
Top SQL Server 2005 Performance Issues for OLTP Applications
-
http://technet.microsoft.com/en-us/library/cc966401.aspx
-]]>
-A number from 1-5 indicating the current memory state of the server.
-
-
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
-Number of shrink notifications the server issued in the last second. Indicates how often the server believes it is under memory pressure.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
-Number of bytes the server requested to shrink.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
-
-
-
-Number of seconds a page will stay in the buffer pool without references.
-
-When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
-
-If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
-
-Note: See "SQLServer_Buffer Manager Page Life Expectancy" for details.
-]]>
-
-Number of pages which are not from NUMA-local memory.
-
-When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
-
-If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
-
-Reference:
-http://msdn.microsoft.com/en-us/library/ms345597(v=sql.105).aspx]]>
-The amount of memory that is in use. This may include both physical and virtual memory. When this number is larger than the amount of physical memory, the Buffers Spooled count rises as an indication that memory swapping is increasing. Increased memory swapping slows performance of the data flow engine.
-]]>
-
-The number of buffers spooled to disk.
-
-“Buffers spooled†has an initial value of 0. When it goes above 0, it indicates that the engine has started memory swapping.
-
-In this case, it is recommended to set Data Flow Task properties BLOBTempStoragePath and BufferTempStoragePath appropriately for maximal I/O bandwidth.
-
-
-
-Reference:
-
http://msdn.microsoft.com/en-us/library/ms137622.aspx
-
http://msdn.microsoft.com/en-us/library/dd795224(v=SQL.100).aspx
-]]>
The number of flat memory buffers in use throughout the pipeline.
-
-“Buffers in useâ€, “Flat buffers in use†and “Private buffers in use†are useful to discover leaks. During package execution time, you will see these counters fluctuating. But once the package finishes execution, their values should return to the same value as what they were before the execution. Otherwise, buffers are leaked. In occasions like that, it is recommended to contact Microsoft PSS.
-
-“Rows read†and “Rows written†show how many rows the entire Data Flow has processed. They give you an overall idea about the execution progress.
-]]>
-The number of pipeline buffers in use throughout the pipeline.
-
-“Buffers in useâ€, “Flat buffers in use†and “Private buffers in use†are useful to discover leaks. During package execution time, you will see these counters fluctuating. But once the package finishes execution, their values should return to the same value as what they were before the execution. Otherwise, buffers are leaked. In occasions like that, it is recommended to contact Microsoft PSS.
-
-“Rows read†and “Rows written†show how many rows the entire Data Flow has processed. They give you an overall idea about the execution progress.
-]]>
-The number of private transformation buffers in use throughout the pipeline.
-
-“Buffers in useâ€, “Flat buffers in use†and “Private buffers in use†are useful to discover leaks. During package execution time, you will see these counters fluctuating. But once the package finishes execution, their values should return to the same value as what they were before the execution. Otherwise, buffers are leaked. In occasions like that, it is recommended to contact Microsoft PSS.
-
-“Rows read†and “Rows written†show how many rows the entire Data Flow has processed. They give you an overall idea about the execution progress.
-]]>
-Number of reports that are currently active and being handled by the report server. Use this counter to evaluate caching strategy. There might be significantly more requests than reports generated.
-
-Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
Number of requests per second that failed to return a report from cache. Use this counter to find out whether the resources used for caching (disk or memory) are sufficient.
-
-Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
-Total number of cache misses against the in-memory cache after the service started. This counter resets when the application domain recycles.
-
-Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
-Total number of reports that ran successfully after the service started. This counter resets when the application domain recycles.
-
-Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
-The total number of errors that occur during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
-The total number of errors that occur per second during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
-The number of Jobs that have failed to complete successfully for any reason since the last SQL Server Agent restart.]]>
-Number of running jobs. This counter can be used to find out if the current load on the system is potentially being driven from SQL Server Agent execution.]]>
-Percentage of successful jobs from the total number of executed jobs.]]>
-The number of Jobs that have successfully completed since the last SQL Server Agent restart.]]>
-Number of active steps.]]>
-The total number of times any Job Step execution is retried since the last SQL Server restart.]]>
+
+
+
+
+
+Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+
+Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
+Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider focusing on improving the disk subsystem.
+
+It is recommended to look for comparitive trends with other processes, work loads, error counts, and other behaviors to find what is driving Privileged Time.
+
+Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
+
+
+Threshold:
+
+Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
+
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
+
Next Steps:
+The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
+The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
+If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
+
+If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
+
+You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
+
+The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
+
+Reference:
+
+Monitoring CPU Usage
+http://msdn.microsoft.com/en-us/library/ms178072.aspx
+
Ask the Performance Team
+http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
+
Clint Huffman's Windows Troubleshooting in the Field Blog
+http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
+
+
+
+
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded records occur when a data record in a heap increases in size and the record's current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
+Access Methods Forwarded Records/sec measures the number of records accessed through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. Forwarding Records are used as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
+If a table has lots of forwarded records, scanning the table can be very inefficient.
+Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded Records only occurs on heaps which are tables without clustered indexes.
+
Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
+
Next Steps:
+
Look at code to determine where the short row is inserted followed by an update.
Forwarded records can be avoided by:
+
Reference:
+
SQL Server Storage Engine
+http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
+
Forwarding and forwarded records, and the back-pointer size
+http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
+
sys.dm_db_index_physical_stats (Transact-SQL)
+
http://msdn.microsoft.com/en-us/library/ms188917.aspx
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
Description:
+
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
+Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
+It is usually recommended that you physically order the table rows by using a clustered index on the table.
+FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
+FreeSpace Scans/sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to add a clustered index for base tables.
+One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
+
+
Threshold:
+
Yellow: A ratio (10%) or more than 1 freespace scan for every 10 Batch Requests/Sec
+
Next Steps:
+Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
+Reference:
+
PRB: Poor Performance on a Heap
+
http://support.microsoft.com/kb/297861
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
+]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
+
+This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
+
+Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also poor disk performance, and / or, high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
+
Next Steps:
+The main causes of high Full Scans/sec are:
+
Note: Identify disk bottlenecks by using Performance Counters, Profiler, sys.dm_io_virtual_file_stats and SHOWPLAN output.
+Also refer to the sys.dm_io_virtual_file_stats dynamic management view (DMV) to track io_stalls to help identify IO bottlenecks.
+To back up and support this information, compare the counters to sys.dm_os_wait_stats output. If you see high values in perfmon, you may also see high waits for the following:
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
+
+
+
+Description: The number of page splits per second that occurs as the result of overflowing index pages and new page allocations. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor and pad_index to leave more empty space per page.
+This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
+
Note: A high value for this counter is not bad in situations where many new pages are being created, since it includes all new page allocations as well as splits when a data page spilts.
+
Threshold:
+
Yellow: A ratio of more than 1 page split for every 20 batch requests
+
Next Steps:
+If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
+
+To track page splits more accurately see the following SQLSkills blog article from Jonathan Kehayias:
+http://www.sqlskills.com/blogs/jonathan/post/Tracking-Problematic-Pages-Splits-in-SQL-Server-2012-Extended-Events-e28093-No-Really-This-Time!.aspx
+]]>
+Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
+Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
+Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
+Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
+
+Note: This is an informative counter. It is not a critical counter that should be used for baselines or alerting.
+
Next Steps: You can correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
+High number of Scan Point Revalidations/sec potentially indicate hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
+Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
+
+
+Description:
+Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory. The Work files are similar to work tables but are created strictly by hashing operations. Workfiles are used to store temporary results for hash joins and hash aggregates.
+Hash joins can require large amounts of memory for execution. As part of executing a hash join, the memory required for the hash can become too large and require a spill to disk. The disk storage to backup the hash operation is called a workfile. Workfiles are collections of extents and pages that are managed strictly by the workfile code.
+
+Threshold:
+
Yellow: Greater than 20 Workfiles created per second
+
+Next Steps:
+Make queries more efficient by adding/changing indexes. Run expensive queries through the Database Tuning Advisor (DTA), look for expensive queries and consider rewriting them, and add as last resort consider adding additional memory.
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
+Working with tempdb in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
+Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx]]>
+
+
+
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors.
+
+
Threshold:
+
Yellow: Greater than 20 Worktables created per second. This will need to be baselined for accuracy.
+
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
+Ensure that TempDB is not a bottleneck and is following best practices.
+If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
+Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
+Divide TempDB into multiple data files of equal size. These multiple files don't necessarily need to be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for TempDB objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
+Use TF-1118. Under this trace flag SQL Server allocates full extents to each TempDB object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in TempDB. This trace flag has been available since SQL Server 2000. With improvements in TempDB object caching in SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached TempDB objects may not always be available. For example, cached TempDB objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
+
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
Working with TempDB in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx ]]>
+
+
+
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
+When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
+
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
+
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
+1 millisecond = 1,000,000 nanoseconds
+The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
+There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Read aheads are an optimistic form of physical reads. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
+
+
Threshold:
+
Yellow: Less than 97 percent buffer cache hit ratio
+
Red: Less than 90 percent buffer cache hit ratio
+
Next Steps:
+
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
+
+
+Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
+
+
+A value less than 640 (or 5 MB) may indicate physical memory pressure.
+
+
Threshold:
+
Yellow: Less than 640 Free Pages
+
Next Steps:
+Compare the Buffer Manager\Free pages counter to the following:
+
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+
Reference:
+
SQL Server, Access Methods Object
+
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
+
+
Description:
+The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
+Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
+
+
+Note: NUMA will increase the number of lazy writer threads per NUMA node and influence the behavior of the lazy writer by increasing its execution at this view. If the server is a NUMA environment other signs of memory pressure should be used and you should analyze the Buffer Node counters for Page Life Expectancy per node. There is not a lazy writer counter in Buffer Nodes.
+
+Threshold:
+
Red: Greater than 20 Lazy Writes per second
+
+
+
+
+Next Steps:
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
+
+
+Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
+
+Configure SQL Server to Use Soft-NUMA
+http://msdn.microsoft.com/en-us/library/ms345357.aspx]]>
+
+
+
+
+
+Description:
+Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
+
+When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
+
+The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
+
+Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
+
Threshold:
+
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
+
Red: Page life expectancy is less than 5 minutes (300 seconds)
+
+
Next Steps:
+If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
+Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
+
+
Reference:
+
+
SQL Server, Access Methods Object
+
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
Description:
+Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient or there was a large number of ad-hoc queries.
+
+
+Threshold:
+Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
+
Warning: Page life expectancy is less than 5 minutes (300 seconds)
+
Next Steps:
+Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
+Identify queries with the highest amount of logical I/O's and tune them.
+
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
+
Reference:
+
SQL Server, Buffer Manager Object
+http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 to 90 per second is normal, anything that is above indicates indexing or memory constraint.
+
Threshold:
+
Yellow: Page Reads/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page writes issued per second. 80 to 90 per second is normal. Anything above 90, it is recommended to check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are also relatively high then, this indicates a memory constraint.
+
+Threshold:
+
Yellow: Page Writes/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
+
+
+Total number of logouts started per second. Greater than 2 per second indicates that the application is not correctly using connection pooling.]]>
+
+Number of users connected to the system. The number of users currently connected to the SQL Server. This should correlate with the Batch Requests per second counter.]]>
+
+Number of latch requests that could not be granted immediately and had to wait before being granted. These are the amount of latches that had to wait.]]>
+
+
+Current number of processes waiting for a workspace memory grant. Memory Grants Pending records the number of connections that are waiting for memory before they can begin processing a memory intensive query such as a sort or hash operation. Connections that wait in this state for a long enough time will eventually receive an 8645 error (A time out occurred while waiting for memory resources to execute the query. Rerun the query). A spid waiting in this state will have a waittype of 0x0040 (RESOURCE_SEMAPHORE) in sysprocesses. If this counter remains above zero for any significant amount of time then you will need to track down what queries are doing sorts/hashes and run them through Database Tuning Advisor (DTA) to see if they can get a more efficient plan.
+
+
+Threshold:
+
Red: Numbers higher than 0 indicate a lack of memory.]]>
+
+
+
+Total amount of dynamic memory the server is willing to consume]]>
+
+
+Description Number of SQL batch requests received by server. This counter measures the number of batch requests that SQL Server receives per second, and generally follows in step to how busy your server's CPUs are. Generally speaking, over 1000 batch requests per second indicates a very busy SQL Server, and could mean that if you are not already experiencing a CPU bottleneck, that you may very well soon. Of course, this is a relative number, and the bigger your hardware, the more batch requests per second SQL Server can handle. From a network bottleneck approach, a typical 100Mbs network card is only able to handle about 3000 batch requests per second. If you have a server that is this busy, you may need to have two or more network cards, or go to a 1Gbs network card.
+
+Note: Sometimes low batch requests/sec can be misleading. If there were a SQL statements/sec counter, this would be a more accurate measure of the amount of SQL Server activity. For example, an application may call only a few stored procedures yet each stored procedure does lot of work. In that case, we will see a low number for batch requests/sec but each stored procedure (one batch) will execute many SQL statements that drive CPU and other resources. As a result, many counter thresholds based on the number of batch requests/sec will seem to identify issues because the batch requests on such a server are unusually low for the level of activity on the server.
+
+We cannot conclude that a SQL Server is not active simply by looking at only batch requests/sec. Rather, you have to do more investigation before deciding there is no load on the server. If the average number of batch requests/sec is below 5 and other counters (such as SQL Server processor utilization) confirm the absence of significant activity, then there is not enough of a load to make any recommendations or identify issues regarding scalability.
+
+Note: Batch requests / sec is a great counter to use for baselining and to use as a measurement of how many batches the system could handle before a sympton was evident or a particular condition occured. This counter will greatly depend on SQL Server code and the hardware being used. It is often used as a gauge of saying that a particular system was able to handle x number of batch requests per second and then to examine system and SQL Server counters to determine what resource is the bottlneck at that particular workload.]]>
+
+
+
+Description: Number of SQL compilations that occured per second that includes recompiles. A high value subtracting recompiles can be an indication of a large number of ad hoc queries that can also be cross referenced with the number of ad hoc plans in the plan cache counter.
+
+Be aware of the following:
+
+Reference
+SQL Server, Plan Cache Object
+http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
+
+
+
+]]>
+
+
+
+Description: Number of SQL re-compiles per second that measures the number of times that a statement executed, but had to be compiled again before the statement completed. There are a variety of reasons that a recompile occured such as statistics being out of date, an column was added to a table a store procedure depends on, statement was run with a recompile option, etc. This counter needs to be as close to 0 as possible. A recompile can cause deadlocks and compile locks that are not compatible with any locking type.
+
+SQL Server Trace / Profiler provides an execellent way to find out exactly why recompiles are occuring in your environment.
+
+Troubleshooting stored procedure recompilation
http://support.microsoft.com/kb/243586
+How to identify the cause of recompilation in an SP:Recompile event
+http://support.microsoft.com/kb/308737]]>
+
+
+
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
+
+This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
+
+Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
+
Threshold:
+
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
+
Formula:
+(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
+
Next Steps:
+The main causes of high Full Scans/sec are:
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
+
+Description: Number of new locks and lock conversions requested from the lock manager. This value should tie close to the number of Batch Requests per second. Values greaters than 1000 may indicate queries are pulling large volumes of data thereby accessing large numbers of rows
+
+Threshold
+
+Yellow Greater than > 1000 Lock Requests / sec]]>
+
Description: Number of lock requests that could not be satisfied immediately and required the caller to wait before being granted the lock. This is a sign that there is some blocking occuring and would be a good baseline measurement of lock waits for load testing.
+
Note: Lock waits are not recorded by until after the lock event completes. For examining active blocking it is recommended to query sys.dm_os_waiting_tasks.
+
+Threshold
+Yellow Values greater than 0]]>
+
+Description: Number of lock requests that timed out. This does not include requests for NOWAIT locks. A value greater than zero might indicate that user queries are not completing.
+
+Threshold
+Yellow Greater than >1
+
+]]>
+
+Description:
+Number of lock requests, per second, which resulted in a deadlock. Deadlocks are always an issue that should be resolved. A deadlock transaction that is killed must be rerun. It is recommended to use the SQL Trace deadlock graph, trace flag 1222, and the extended events deadlock capture to help identify and solve all of the deadlocks in your environment.
+
+Threshold
+Red Any Deadlocks greater than 0
+
+Resources
+
Bart Duncan Deadlock Resources
+Getting historical deadlock info using extended events
+http://www.sqlskills.com/BLOGS/PAUL/post/Getting-historical-deadlock-info-using-extended-events.aspx]]>
+Total latch wait time (milliseconds) for latch requests that had to wait in the last second.]]>
+
+
+Description:
+% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this counter.
+
+This counter measures the percentage of total processor time spent (user mode and kernel mode) on SQL Server process threads. If this counter stays at 80% for sustained periods of time, then you may also wish to investigate other Process (sqlservr) such as Private Bytes, Virtual Bytes, and Working Set to get a better understanding of how SQL Server allocates certain segments of memory.
+
+
+Threshold:
+
Red: SQL Server is using more than 30% user mode CPU usage
+
+
+Reference:
+
+Monitoring CPU Usage
+http://msdn.microsoft.com/en-us/library/ms178072.aspx
+
Ask the Performance Team
+http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
+
Clint Huffman's Windows Troubleshooting in the Field Blog
+http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
+Total number of processes per second that have successfully acquired a workspace memory grant. This counter should be used as a baseline for comparisons under load.]]>
+Total amount of memory granted to executing processes. This memory is used for hash, sort and create index operations.]]>
+Total amount of memory granted to executing processes. This memory is used primarily for hash, sort and create index operations.]]>
+
+
+
+
+
+Number of auto-parameterization attempts.]]>
+Number of failed auto-parameterizations.]]>
+Number of unsafe auto-parameterizations.]]>
+
+Description: Number of requests that had to wait for a free page.
+
+Free list stalls/sec is the frequency with which requests for available database pages are suspended because no buffers are available. Free list stall rates of greater than 2 per second indicate too little SQL memory available.
+
+
Reference
+Threshold
+Yellow - Free list stalls/sec > 2
+SQL Server, Buffer Manager Object
+
+http://technet.microsoft.com/en-us/library/ms189628.aspx
+]]>
+Description
+
+Number of pages, per second, flushed by checkpoint or other operations that require all dirty pages to be flushed. The checkpoint frequency can be due to low memory conditions as well as the recovery interval set by sp_configure.
+
+Reference
+
+SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+
+A SQL Server DBA myth a day: (15/30) checkpoint only writes pages from committed transactions
+
http://www.sqlskills.com/BLOGS/PAUL/category/Checkpoint.aspx
+
+Database Checkpoints (SQL Server)
+
+http://technet.microsoft.com/en-us/library/ms189573(v=sql.110).aspx]]>
+
Description: Number of pages read, in second, in anticipation of use which is an optimistic physical read. This number should not exceed greater than 20% of total page reads.
+Threshold:
+
Yellow:< 20% of Page Reads/sec
+
]]>
Description: Number of pages used for miscellaneous server purposes (including procedure cache). This counter shows how many pages were taken from the buffer pool to accomodate non-buffer pool needs such as plan cache, procedure cache, the optimizer, workspace memory, etc. This counter should be baselined and can be analyzed by comparing this counter to the amount of buffer pool space and large requests that are hitting the SQL Server instance.
+
+
Note: DBCC MEMORYSTATUS can also be leveraged to examine the impact of stolen memory to the buffer pool.
+
Note: The lazywriter process is not permitted to flush Stolen buffers out of the buffer pool.
+
+
Reference:
+SQL Server, Buffer Manager Object
+
+http://technet.microsoft.com/en-us/library/ms189628(v=sql.105).aspx
+INF: Using DBCC MEMORYSTATUS to Monitor SQL Server Memory Usage
+
+http://support.microsoft.com/kb/271624]]>
+
+Feature usage since last SQL Server startup
+
+You can also examine performance counters through the sys.dm_os_performance_counters DMV. By using the perfmon counters for deprecation and the DMVs, you can help your application prepare and avoid issue when migrating to the future versions of SQL Server.
+
+SELECT * FROM sys.dm_os_performance_counters
+WHERE object_name LIKE '%Deprecated Features%'AND cntr_value > 0
+ORDER BY cntr_value DESC
+]]>
+
+Number of attentions per second. Attentions are the number of user cancels and query timeout that occured per second. A high number of attentions may indicate slow query performance as users are cancelling queries.]]>
+Description Ratio between cache hits and lookups
+
+The Plan Cacheobject provides counters to monitor how SQL Server uses memory to store objects such as stored procedures, ad hoc and prepared Transact-SQL statements, and triggers. Multiple instances of the Plan Cache object can be monitored at the same time, with each instance representing a different type of plan to monitor.
+
Compiled Plan Stubs & Plan Cache Perf Counters:
+
+In SQL Server 2008 R2, there are three options that can help in dealing with plan cache pollution issues.
+
+Contributor(s):
+
+Reference:
+SQL Server, Plan Cache Object
+http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
+
+
+
+
+]]>
+The average amount of wait time (milliseconds) for each lock request that resulted in a wait. This wait could indicate excessive blocking that can be verified by querying sys.dm_os_waiting_tasks. Compare this counter to "Lock Waits/sec" and look for trends.
+
+Threshold
+Yellow Greater than >500 Average Wait Time.
]]>
+
+
Description: Percentage of work tables created where the initial two pages of the work table were not allocated but were immediately available from the work table cache.
+
+In SQL Server 2005 worktable caching was improved. When a query execution plan is cached, the work tables needed by the plan are not dropped across multiple executions of the plan but merely truncated. In addition, the first nine pages for the work table are kept. In SQL Server 2000, the work tables used during query plan execution are dropped. Because the work table is cached, the next execution of the query is faster. When the system is low on memory, the execution plan may be removed from the cache and the associated work tables dropped as well. Both SQL Server 2000 and SQL Server 2005 use a small global pool of pre-allocated pages and extents that make the initial creation of work tables faster.
+
+Note: When a work table is dropped, two pages may remain allocated and they are returned to the work table cache. A value less than 90% may indicate insufficient memory, since execution plans are being dropped, or may indicate, on 32-bit systems, the need for an upgrade to a 64-bit system.
+
+
Threshold:
+
Yellow: Less than 90% Worktables from Cache Ratio. This will need to be baselined for accuracy.
+
+
Reference:
+SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426(v=sql.105).aspx]]>
+
Description:
+The number of times locks on a table were escalated from page- or row-level to table-level. Frequent or even occasional spiking in this value may indicate poorly coded transactions.
+
+
+Lock Escalation Thresholds
+
+Lock escalation is triggered when lock escalation is not disabled on the table by using the ALTER TABLE SET LOCK_ESCALATION option, and when either of the following conditions exists:
+
+
+If locks cannot be escalated because of lock conflicts, the Database Engine periodically triggers lock escalation at every 1,250 new locks acquired.
+
+Next Steps
+
Reducing Locking and Escalation
+In most cases, the Database Engine delivers the best performance when operating with its default settings for locking and lock escalation. If an instance of the Database Engine generates a lot of locks and is seeing frequent lock escalations, consider reducing the amount of locking by:
+
Using an isolation level that does not generate shared locks for read operations.
+
+
+Note: Changing the isolation level affects all tables on the instance of the Database Engine.
+
+
+You can also use trace flags 1211 and 1224 to disable all or some lock escalations. For more information, see Trace Flags (Transact-SQL). Also, monitor lock escalation by using the SQL Server Profiler Lock:Escalation event; and see Using SQL Server Profiler.
+
+Reference:
+Lock Escalation (Database Engine) -
http://msdn.microsoft.com/en-us/library/ms184286(SQL.105).aspx
+
+]]>
+The free space in tempdb in KB.]]>
+The longest running time of any transcation in seconds. This counter could indicate a long running statement pulling large amounts of data that normally takes a long time to execute or potentially a blocking condition.]]>
+The total number of active non-snapshot transactions that generate version records. These are all of the non-snapshot isolation versions such as triggers and online indexing.
+
+Note: The sum of Update Snapshot Transactions and NonSnapshot Version Transactions represents the total number of transactions that participate in version generation. The difference of Snapshot Transactions and Update Snapshot Transactions reports the number of read-only snapshot transactions.
+
+Reference:
+http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
+http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
+Managing TempDB in SQL Server: TempDB Basics (Version Store: Why do we need it?)
+http://blogs.msdn.com/b/sqlserverstorageengine/archive/2008/12/22/managing-tempdb-in-sql-server-tempdb-basics-verison-store.aspx
+]]>
+The total number of active snapshot transactions.]]>
+Description
+
+The version cleanup rate in KB per seconds.
+
+Monitors the version cleanup rate in KBps in all version stores. If the version cleanup rate is lower than the version generation rate, the version store will use more and more space in tempdb. However, if the version cleanup rate is 0 but the version generation rate is not, there is probably a long-running transaction that is preventing the version store cleanup.
+
+Row versions are shared across sessions. The creator of the row version has no control over when the row version can be reclaimed. You will need to find and then possibly stop the longest-running transaction that is preventing the row version cleanup.
+
The following query returns the top two longest-running transactions that depend on the versions in the version store:
+
+select top 2
+ transaction_id,
+ transaction_sequence_num,
+ elapsed_time_seconds
+from sys.dm_tran_active_snapshot_database_transactions
+order by elapsed_time_seconds DESC
+
+Reference
+
+Row Versioning Resource Usage
+http://msdn.microsoft.com/en-us/library/ms175492.aspx]]>
+Description: The version generation rate in KB per seconds.
+
+You can use the Version Generation Rate and Version Cleanup Rate counters to measure version store impact on TempDB. THe Version Generation Rate should not outpace the Cleanup Rate. Additionally, if your Version Cleanup Rate is 0, a long-running transaction could be preventing the version store cleanup. Incidentally, before generating an out-of-tempdb-space error, SQL Server 2008 makes a last-ditch attempt by forcing the version stores to shrink. During the shrink process, the longest-running transactions that have not yet generated any row versions are marked as victims. This frees up the version space used by them. Message 3967 is generated in the error log for each such victim transaction. If a transaction is marked as a victim, it can no longer read the row versions in the version store or create new ones. Message 3966 is generated and the transaction is rolled back when the victim transaction attempts to read row versions. If the shrink of the version store succeeds, more space is available in tempdb. Otherwise, tempdb runs out of space.
+
+If TempDB fills and runs out of space, writes will continue, butversions will not and reads will fail.
+
+Reference
+SQL Server, Transactions Object
+http://technet.microsoft.com/en-us/library/ms189038.aspx]]>
+
+
+
+Plan re-use is desirable for OLTP workloads because re-creating the same plan (for similar or identical transactions) is a waste of CPU resources.
+
To compute the plan re-use rate, compare SQL Server SQL Statistics: batch requests/sec to SQL compilations/sec.
+
Special exception to the plan re-use rule is that zero (or trivial) cost plans will not be cached (not re-used) in SQL 2005 SP2 and above.
+
Applications that use zero cost plans will have a lower plan re-use but this is not a performance issue, because it is cheaper to generate a new plan every time than to cache.
+
Reference:
+
Execution Plan Caching and Reuse
+
http://msdn.microsoft.com/en-us/library/ms181055.aspx
+
Top SQL Server 2005 Performance Issues for OLTP Applications
+
http://technet.microsoft.com/en-us/library/cc966401.aspx
+]]>
+A number from 1-5 indicating the current memory state of the server.
+
+
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
+Number of shrink notifications the server issued in the last second. Indicates how often the server believes it is under memory pressure.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
+Number of bytes the server requested to shrink.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
+
+
+
+Number of seconds a page will stay in the buffer pool without references.
+
+When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
+
+If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
+
+Note: See "SQLServer_Buffer Manager Page Life Expectancy" for details.
+]]>
+
+Number of pages which are not from NUMA-local memory.
+
+When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
+
+If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
+
+Reference:
+http://msdn.microsoft.com/en-us/library/ms345597(v=sql.105).aspx]]>
+The amount of memory that is in use. This may include both physical and virtual memory. When this number is larger than the amount of physical memory, the Buffers Spooled count rises as an indication that memory swapping is increasing. Increased memory swapping slows performance of the data flow engine.
+]]>
+
+The number of buffers spooled to disk.
+
+“Buffers spooled†has an initial value of 0. When it goes above 0, it indicates that the engine has started memory swapping.
+
+In this case, it is recommended to set Data Flow Task properties BLOBTempStoragePath and BufferTempStoragePath appropriately for maximal I/O bandwidth.
+
+
+
+Reference:
+
http://msdn.microsoft.com/en-us/library/ms137622.aspx
+
http://msdn.microsoft.com/en-us/library/dd795224(v=SQL.100).aspx
+]]>
The number of flat memory buffers in use throughout the pipeline.
+
+“Buffers in useâ€, “Flat buffers in use†and “Private buffers in use†are useful to discover leaks. During package execution time, you will see these counters fluctuating. But once the package finishes execution, their values should return to the same value as what they were before the execution. Otherwise, buffers are leaked. In occasions like that, it is recommended to contact Microsoft PSS.
+
+“Rows read†and “Rows written†show how many rows the entire Data Flow has processed. They give you an overall idea about the execution progress.
+]]>
+The number of pipeline buffers in use throughout the pipeline.
+
+“Buffers in useâ€, “Flat buffers in use†and “Private buffers in use†are useful to discover leaks. During package execution time, you will see these counters fluctuating. But once the package finishes execution, their values should return to the same value as what they were before the execution. Otherwise, buffers are leaked. In occasions like that, it is recommended to contact Microsoft PSS.
+
+“Rows read†and “Rows written†show how many rows the entire Data Flow has processed. They give you an overall idea about the execution progress.
+]]>
+The number of private transformation buffers in use throughout the pipeline.
+
+“Buffers in useâ€, “Flat buffers in use†and “Private buffers in use†are useful to discover leaks. During package execution time, you will see these counters fluctuating. But once the package finishes execution, their values should return to the same value as what they were before the execution. Otherwise, buffers are leaked. In occasions like that, it is recommended to contact Microsoft PSS.
+
+“Rows read†and “Rows written†show how many rows the entire Data Flow has processed. They give you an overall idea about the execution progress.
+]]>
+Number of reports that are currently active and being handled by the report server. Use this counter to evaluate caching strategy. There might be significantly more requests than reports generated.
+
+Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
Number of requests per second that failed to return a report from cache. Use this counter to find out whether the resources used for caching (disk or memory) are sufficient.
+
+Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
+Total number of cache misses against the in-memory cache after the service started. This counter resets when the application domain recycles.
+
+Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
+Total number of reports that ran successfully after the service started. This counter resets when the application domain recycles.
+
+Performance Counters for the MSRS 2008 R2 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.105).aspx]]>
+The total number of errors that occur during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
+The total number of errors that occur per second during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.105).aspx]]>
+The number of Jobs that have failed to complete successfully for any reason since the last SQL Server Agent restart.]]>
+Number of running jobs. This counter can be used to find out if the current load on the system is potentially being driven from SQL Server Agent execution.]]>
+Percentage of successful jobs from the total number of executed jobs.]]>
+The number of Jobs that have successfully completed since the last SQL Server Agent restart.]]>
+Number of active steps.]]>
+The total number of times any Job Step execution is retried since the last SQL Server restart.]]>
-
-
-
-
-
-Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
-
-Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
-Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider focusing on improving the disk subsystem.
-
-It is recommended to look for comparitive trends with other processes, work loads, error counts, and other behaviors to find what is driving Privileged Time.
-
-Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
-
-
-Threshold:
-
-Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
-
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
-
Next Steps:
-The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
-The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
-If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
-
-If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
-
-You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
-
-The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
-
-Reference:
-
-Monitoring CPU Usage
-http://msdn.microsoft.com/en-us/library/ms178072.aspx
-
Ask the Performance Team
-http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
-
Clint Huffman's Windows Troubleshooting in the Field Blog
-http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
-
-
-
-
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded records occur when a data record in a heap increases in size and the record's current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
-Access Methods Forwarded Records/sec measures the number of records accessed through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. Forwarding Records are used as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
-If a table has lots of forwarded records, scanning the table can be very inefficient.
-Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded Records only occurs on heaps which are tables without clustered indexes.
-
Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
-
Next Steps:
-
Look at code to determine where the short row is inserted followed by an update.
Forwarded records can be avoided by:
-
Reference:
-
SQL Server Storage Engine
-
http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
-
Forwarding and forwarded records, and the back-pointer size
-
http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
-
sys.dm_db_index_physical_stats (Transact-SQL)
-
http://msdn.microsoft.com/en-us/library/ms188917.aspx
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
-
Description:
-
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
-Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
-It is usually recommended that you physically order the table rows by using a clustered index on the table.
-FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
-
-***Also, a heap table requires an additional column called an uniquifier to be generated for each row inserted. It is usually recommended that you physically order the table rows by using a clustered on the table for most tables.
-
-FreeSpace Scans/sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to add a clustered index for base tables.
-One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
-
-
Threshold:
-
Yellow: A ratio (10%) or more than 1 freespace scan for every 10 Batch Requests/Sec
-
Next Steps:
-Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
-Reference:
-
PRB: Poor Performance on a Heap
-
http://support.microsoft.com/kb/297861
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
-]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
-
-This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
-
-Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also poor disk performance, and / or, high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
-
Next Steps:
-The main causes of high Full Scans/sec are:
-
Note: Identify disk bottlenecks by using Performance Counters, Profiler, sys.dm_io_virtual_file_stats and SHOWPLAN output.
-Also refer to the sys.dm_io_virtual_file_stats dynamic management view (DMV) to track io_stalls to help identify IO bottlenecks.
-To back up and support this information, compare the counters to sys.dm_os_wait_stats output. If you see high values in perfmon, you may also see high waits for the following:
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
-
-
-
-
-Description: The number of page splits per second that occurs as the result of overflowing index pages and new page allocations. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor and pad_index to leave more empty space per page.
-This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
-
Note: A high value for this counter is not bad in situations where many new pages are being created, since it includes all new page allocations as well as splits when a data page spilts.
-
Threshold:
-
Yellow: A ratio of more than 1 page split for every 20 batch requests
-
Next Steps:
-If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
-
-To track page splits more accurately see the following SQLSkills blog article from Jonathan Kehayias:
-http://www.sqlskills.com/blogs/jonathan/post/Tracking-Problematic-Pages-Splits-in-SQL-Server-2012-Extended-Events-e28093-No-Really-This-Time!.aspx
-]]>
-Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
-Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
-Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
-Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
-
-Note: This is an informative counter. It is not a critical counter that should be used for baselines or alerting.
-
Next Steps: You can correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
-High number of Scan Point Revalidations/sec potentially indicate hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
-Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
-
-
-
-Description:
-Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory. The Work files are similar to work tables but are created strictly by hashing operations. Workfiles are used to store temporary results for hash joins and hash aggregates.
-Hash joins can require large amounts of memory for execution. As part of executing a hash join, the memory required for the hash can become too large and require a spill to disk. The disk storage to backup the hash operation is called a workfile. Workfiles are collections of extents and pages that are managed strictly by the workfile code.
-
-Threshold:
-
Yellow: Greater than 20 Workfiles created per second
-
-Next Steps:
-Make queries more efficient by adding/changing indexes. Run expensive queries through the Database Tuning Advisor (DTA), look for expensive queries and consider rewriting them, and add as last resort consider adding additional memory.
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
-Working with tempdb in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
-Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx]]>
-
-
-
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors.
-
-
Threshold:
-
Yellow: Greater than 20 Worktables created per second. This will need to be baselined for accuracy.
-
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
-Ensure that TempDB is not a bottleneck and is following best practices.
-If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
-Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
-Divide TempDB into multiple data files of equal size. These multiple files don't necessarily need to be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for TempDB objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
-Use TF-1118. Under this trace flag SQL Server allocates full extents to each TempDB object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in TempDB. This trace flag has been available since SQL Server 2000. With improvements in TempDB object caching since SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached TempDB objects may not always be available. For example, cached TempDB objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
-
-Note:For each release of SQL Server, TempDB has more potential uses such as with SNAPSHOT ISOLATION level, temporary statistics use for read-only databases in SQL Server 2012 and more. It is recommended to keep a close watch on the usage of TempDB and leverage the TF1118 if the data file and sizing best practices do not address allocation bottlenecks.
-
-Additionally consider putting TempDB on local SSD disks in order to maximize disk performance.
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
Working with TempDB in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx ]]>
-
-
-
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
-When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
-
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
-
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
-1 millisecond = 1,000,000 nanoseconds
-The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
-There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Read aheads are an optimistic form of physical reads. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
-
-
Threshold:
-
Yellow: Less than 97 percent buffer cache hit ratio
-
Red: Less than 90 percent buffer cache hit ratio
-
Next Steps:
-
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
-
-
-Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
-
-
-A value less than 640 (or 5 MB) may indicate physical memory pressure.
-
-
Threshold:
-
Yellow: Less than 640 Free Pages
-
Next Steps:
-Compare the Buffer Manager\Free pages counter to the following:
-
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-
Reference:
-
SQL Server, Access Methods Object
-
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
-
-
Description:
-The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
-Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
-
-
-Note: NUMA will increase the number of lazy writer threads per NUMA node and influence the behavior of the lazy writer by increasing its execution at this view. If the server is a NUMA environment other signs of memory pressure should be used and you should analyze the Buffer Node counters for Page Life Expectancy per node. There is not a lazy writer counter in Buffer Nodes.
-
-Threshold:
-
Red: Greater than 20 Lazy Writes per second
-
-
-
-
-Next Steps:
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
-
-
-Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
-
-Configure SQL Server to Use Soft-NUMA
-http://msdn.microsoft.com/en-us/library/ms345357.aspx]]>
-
-
-
-
-
-Description:
-Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
-
-When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
-
-The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
-
-Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
-
Threshold:
-
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
-
Red: Page life expectancy is less than 5 minutes (300 seconds)
-
-
Next Steps:
-If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
-Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
-
-
Reference:
-
-
SQL Server, Access Methods Object
-
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
Description:
-Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient or there was a large number of ad-hoc queries.
-
-
-Threshold:
-Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
-
Warning: Page life expectancy is less than 5 minutes (300 seconds)
-
Next Steps:
-Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
-Identify queries with the highest amount of logical I/O's and tune them.
-
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
-
Reference:
-
SQL Server, Buffer Manager Object
-http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 to 90 per second is normal, anything that is above indicates indexing or memory constraint.
-
Threshold:
-
Yellow: Page Reads/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page writes issued per second. 80 to 90 per second is normal. Anything above 90, it is recommended to check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are also relatively high then, this indicates a memory constraint.
-
-Threshold:
-
Yellow: Page Writes/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description:
-
Login and logout rates should be approximately the same. A login rate higher than the logout rate suggests that the server is not in a steady state, or that applications are not correctly using connection pooling. This could result in an increased load on the server.
-
Next Steps:
-
Verify if the .NET connection string is using the pooling=true e connection reset=true parameters.
-If so, a profiler trace with the Audit login and Audit logout Events would reveal the usage of sp_reset_connection stored procedure, which is used by SQL Server to support remote stored procedure calls in a transaction.
-This stored procedure also causes Audit Login and Audit Logout events to fire when a connection is reused from a connection pool.
-Also, the EventSubClass column in the trace will show if the connections are being pooled or not.
-Therefore focus the comparison only on the rate of non-pooled Logins and Logouts, as pooled connections will be reflected in the Logins/sec counter, but not on the Logouts/sec counter.
-
Reference:
-
SQL Server 2012 Books Online: SQL Server: General Statistics Object
-
http://technet.microsoft.com/en-us/library/ms190697(v=sql.110).aspx
-
SQL Server Connection Pooling
-
http://msdn.microsoft.com/en-us/library/8xx3tyca.aspx
-
SQL Server 2012 Books Online: Audit Login Event Class
-
http://msdn.microsoft.com/en-us/library/ms190260(v=sql.110).aspx
-]]>
-
-
-Total number of logouts started per second. Greater than 2 per second indicates that the application is not correctly using connection pooling.]]>
-
-Number of users connected to the system. The number of users currently connected to the SQL Server. This should correlate with the Batch Requests per second counter.]]>
-
-Number of latch requests that could not be granted immediately and had to wait before being granted. These are the amount of latches that had to wait.]]>
-
-
-Current number of processes waiting for a workspace memory grant. Memory Grants Pending records the number of connections that are waiting for memory before they can begin processing a memory intensive query such as a sort or hash operation. Connections that wait in this state for a long enough time will eventually receive an 8645 error (A time out occurred while waiting for memory resources to execute the query. Rerun the query). A spid waiting in this state will have a waittype of 0x0040 (RESOURCE_SEMAPHORE) in sysprocesses. If this counter remains above zero for any significant amount of time then you will need to track down what queries are doing sorts/hashes and run them through Database Tuning Advisor (DTA) to see if they can get a more efficient plan.
-
-
-Threshold:
-
Red: Numbers higher than 0 indicate a lack of memory.]]>
-
-
-
-Total amount of dynamic memory the server is willing to consume]]>
-
-
-Description Number of SQL batch requests received by server. This counter measures the number of batch requests that SQL Server receives per second, and generally follows in step to how busy your server's CPUs are. Generally speaking, over 1000 batch requests per second indicates a very busy SQL Server, and could mean that if you are not already experiencing a CPU bottleneck, that you may very well soon. Of course, this is a relative number, and the bigger your hardware, the more batch requests per second SQL Server can handle. From a network bottleneck approach, a typical 100Mbs network card is only able to handle about 3000 batch requests per second. If you have a server that is this busy, you may need to have two or more network cards, or go to a 1Gbs network card.
-
-Note: Sometimes low batch requests/sec can be misleading. If there were a SQL statements/sec counter, this would be a more accurate measure of the amount of SQL Server activity. For example, an application may call only a few stored procedures yet each stored procedure does lot of work. In that case, we will see a low number for batch requests/sec but each stored procedure (one batch) will execute many SQL statements that drive CPU and other resources. As a result, many counter thresholds based on the number of batch requests/sec will seem to identify issues because the batch requests on such a server are unusually low for the level of activity on the server.
-
-We cannot conclude that a SQL Server is not active simply by looking at only batch requests/sec. Rather, you have to do more investigation before deciding there is no load on the server. If the average number of batch requests/sec is below 5 and other counters (such as SQL Server processor utilization) confirm the absence of significant activity, then there is not enough of a load to make any recommendations or identify issues regarding scalability.
-
-Note: Batch requests / sec is a great counter to use for baselining and to use as a measurement of how many batches the system could handle before a sympton was evident or a particular condition occured. This counter will greatly depend on SQL Server code and the hardware being used. It is often used as a gauge of saying that a particular system was able to handle x number of batch requests per second and then to examine system and SQL Server counters to determine what resource is the bottlneck at that particular workload.]]>
-
-
-
-Description: Number of SQL compilations that occured per second that includes recompiles. A high value subtracting recompiles can be an indication of a large number of ad hoc queries that can also be cross referenced with the number of ad hoc plans in the plan cache counter.
-
-Be aware of the following:
-
-Reference
-SQL Server, Plan Cache Object
-http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
-
SQL Server Compilation Bottlenecks
-
http://blogs.msdn.com/grahamk/archive/2009/02/03/compilation-bottlenecks-error-8628-severity-17-state-0-part-1.aspx
-
-
-]]>
-
-
-
-Description: Number of SQL re-compiles per second that measures the number of times that a statement executed, but had to be compiled again before the statement completed. There are a variety of reasons that a recompile occured such as statistics being out of date, an column was added to a table a store procedure depends on, statement was run with a recompile option, etc. This counter needs to be as close to 0 as possible. A recompile can cause deadlocks and compile locks that are not compatible with any locking type.
-
-SQL Server Trace / Profiler provides an execellent way to find out exactly why recompiles are occuring in your environment.
-
-Troubleshooting stored procedure recompilation
http://support.microsoft.com/kb/243586
-How to identify the cause of recompilation in an SP:Recompile event
-http://support.microsoft.com/kb/308737]]>
-
-
-
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
-
-This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
-
-Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
-
Threshold:
-
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
-
Formula:
-(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
-
Next Steps:
-The main causes of high Full Scans/sec are:
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
-
Description: Lock Requests/sec reports the number of new locks and lock conversions requested from the lock manager per second. A Lock Requests/sec greater than 500 when compared to Batch Request/sec indicates that batches are acquiring a large number of locks.
-This suggests inefficient queries and there is a risk is that blocking may occur.
-Threshold: (Yellow) - This value should not be greater than 50% of the number of Batch Requests/Sec
-
Next Steps:
-
Review high-read queries. In addition, examine the code to determine where to reduce the number of reads by either tuning your application or the database.
-
Reference:
-
SQL Server, Locks Object
-
http://msdn.microsoft.com/en-us/library/ms190216.aspx
-]]>
-
-
-
-
-Description: Number of new locks and lock conversions requested from the lock manager. This value should tie close to the number of Batch Requests per second. Values greaters than 1000 may indicate queries are pulling large volumes of data thereby accessing large numbers of rows
-
Reference:
-
SQL Server, Locks Object
-
http://msdn.microsoft.com/en-us/library/ms190216.aspx
-
-Threshold
-
-Yellow Greater than > 1000 Lock Requests / sec]]>
-
Description: Number of lock requests that could not be satisfied immediately and required the caller to wait before being granted the lock. This is a sign that there is some blocking occuring and would be a good baseline measurement of lock waits for load testing.
-
Note: Lock waits are not recorded by until after the lock event completes. For examining active blocking it is recommended to query sys.dm_os_waiting_tasks.
-
-Threshold
-Yellow Values greater than 0]]>
Recommendation: Look for peaks that approach or exceed 60 seconds.
-
Even though this counter counts how many total milliseconds SQL Server is waiting on locks over the last second, the counter actually records the lock wait time for a particular lock wait at the end of the locking event.
-
The following methods can be used to reduce lock contention and increase overall throughput:
-
-
-
-
-
-
-Description: Number of lock requests that timed out. This does not include requests for NOWAIT locks. A value greater than zero might indicate that user queries are not completing.
-
-Threshold
-Yellow Greater than 1
-]]>
-
-Description:
-Number of lock requests, per second, which resulted in a deadlock. Deadlocks are always an issue that should be resolved. A deadlock transaction that is killed must be rerun. It is recommended to use the SQL Trace deadlock graph, trace flag 1222, and the extended events deadlock capture to help identify and solve all of the deadlocks in your environment.
-
-Threshold
-Red Any Deadlocks greater than 0
-
-Resources
-
Bart Duncan Deadlock Resources
-Getting historical deadlock info using extended events
-http://www.sqlskills.com/BLOGS/PAUL/post/Getting-historical-deadlock-info-using-extended-events.aspx]]>
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
-
Reference:
-
Performance Tuning Waits and Queues
-
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
-]]>
-
-
-
-
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
-
Reference:
-
Performance Tuning Waits and Queues
-
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
-]]>
-
-
-
-
-
-Description:
-% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this counter.
-
-This counter measures the percentage of total processor time spent (user mode and kernel mode) on SQL Server process threads. If this counter stays at 80% for sustained periods of time, then you may also wish to investigate other Process (sqlservr) such as Private Bytes, Virtual Bytes, and Working Set to get a better understanding of how SQL Server allocates certain segments of memory.
-
-
-Threshold:
-
Red: SQL Server is using more than 30% user mode CPU usage
-
-
-Reference:
-
-Monitoring CPU Usage
-http://msdn.microsoft.com/en-us/library/ms178072.aspx
-
Ask the Performance Team
-http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
-
Clint Huffman's Windows Troubleshooting in the Field Blog
-http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
-
Description: Total number of processes per second that have successfully acquired a workspace memory grant. This counter should be used as a baseline for comparisons under load.]]>
-
Description: Total amount of memory granted to executing processes. This memory is used for hash, sort and create index operations.]]>
-
Description: Total amount of memory granted to executing processes. This memory is used primarily for hash, sort and create index operations.]]>
-
Description: The cumulative size of all the data files in the database.]]>
-
Description: Total number of log bytes flushed.]]>
-
Description: The cumulative size of all the log files in the database.]]>
-
Description: The cumulative used size of all the log files in the database.]]>
-
-
Description: Total wait time (milliseconds).]]>
-
-
Description: Number of commits waiting on log flush.]]>
-
Description: Number of log flushes.]]>
-
-
Description: Total number of log growths for this database.]]>
-
Description: Total number of log truncations for this database.]]>
-
-
Description: Total number of log shrinks for this database.]]>
-
-
Description: The percent of space in the log that is in use.]]>
-
Description: Number of auto-parameterization attempts.]]>
-
Description: Number of failed auto-parameterizations.]]>
-
Description: Number of safe auto-parameterizations.]]>
-
Description: Number of unsafe auto-parameterizations.]]>
-
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
-
Description: Number of completed requests per second in the workload group.]]>
-
Description: Number of requests waiting in the queue due to resource governor limits in the workload group.]]>
-
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
-
Description: Target amount of memory in kilobytes the resource pool is trying to attain based on the settings and server state.]]>
-Used amount of memory in kilobytes in the resource pool.]]>
-
-Description: Number of requests that had to wait for a free page.
-
-Free list stalls/sec is the frequency with which requests for available database pages are suspended because no buffers are available. Free list stall rates of greater than 2 per second indicate too little SQL memory available.
-
-
Reference
-Threshold
-Yellow - Free list stalls/sec > 2
-SQL Server, Buffer Manager Object
-
-http://technet.microsoft.com/en-us/library/ms189628.aspx
-]]>
-Description
-
-Number of pages, per second, flushed by checkpoint or other operations that require all dirty pages to be flushed. The checkpoint frequency can be due to low memory conditions as well as the recovery interval set by sp_configure.
-
-Reference
-
-SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-
-A SQL Server DBA myth a day: (15/30) checkpoint only writes pages from committed transactions
-
http://www.sqlskills.com/BLOGS/PAUL/category/Checkpoint.aspx
-
-Database Checkpoints (SQL Server)
-
-http://technet.microsoft.com/en-us/library/ms189573(v=sql.110).aspx]]>
-
-
-
Description: Number of pages read, in second, in anticipation of use which is an optimistic physical read. This number should not exceed 20% of total page reads.
-Threshold:
-
Yellow:Greater than 20% of Page Reads/sec
-
-http://technet.microsoft.com/en-us/library/ms189628.aspx]]>
-
-Feature usage since last SQL Server startup
-
-You can also examine performance counters through the sys.dm_os_performance_counters DMV. By using the perfmon counters for deprecation and the DMVs, you can help your application prepare and avoid issue when migrating to the future versions of SQL Server.
-
-SELECT * FROM sys.dm_os_performance_counters
-WHERE object_name LIKE '%Deprecated Features%'
AND cntr_value > 0
-ORDER BY cntr_value DESC
-
-SQL Server, Deprecated Features Object
-
-http://technet.microsoft.com/en-us/library/bb510662.aspx]]>
-
-Number of attentions per second. Attentions are the number of user cancels and query timeout that occured per second. A high number of attentions may indicate slow query performance as users are cancelling queries.]]>
-
-Number of errors/sec]]>
-Description Ratio between cache hits and lookups
-
-The Plan Cacheobject provides counters to monitor how SQL Server uses memory to store objects such as stored procedures, ad hoc and prepared Transact-SQL statements, and triggers. Multiple instances of the Plan Cache object can be monitored at the same time, with each instance representing a different type of plan to monitor.
-
Compiled Plan Stubs & Plan Cache Perf Counters:
-
-In SQL Server 2008 R2, there are three options that can help in dealing with plan cache pollution issues.
-
-Contributor(s):
-
-Reference:
-SQL Server, Plan Cache Object
-http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
-]]>
-The average amount of wait time (milliseconds) for each lock request that resulted in a wait. This wait could indicate excessive blocking that can be verified by querying sys.dm_os_waiting_tasks. Compare this counter to "Lock Waits/sec" and look for trends.
-
-Threshold
-Yellow Greater than >500 Average Wait Time.
]]>
-
-
Description: Percentage of work tables created where the initial two pages of the work table were not allocated but were immediately available from the work table cache.
-
-Since SQL Server 2005 worktable caching was improved. When a query execution plan is cached, the work tables needed by the plan are not dropped across multiple executions of the plan but merely truncated. In addition, the first nine pages for the work table are kept. In SQL Server 2000, the work tables used during query plan execution are dropped. Because the work table is cached, the next execution of the query is faster. When the system is low on memory, the execution plan may be removed from the cache and the associated work tables dropped as well. Both SQL Server 2000 and SQL Server 2005 use a small global pool of pre-allocated pages and extents that make the initial creation of work tables faster.
-
-Note: When a work table is dropped, two pages may remain allocated and they are returned to the work table cache. A value less than 90% may indicate insufficient memory, since execution plans are being dropped, or may indicate, on 32-bit systems, the need for an upgrade to a 64-bit system.
-
-
Threshold:
-
Yellow: Less than 90% Worktables from Cache Ratio. This will need to be baselined for accuracy.
-
-
Reference:
-SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426(v=sql.110).aspx]]>
-
Description:
-The number of times locks on a table were escalated from page- or row-level to table-level. Frequent or even occasional spiking in this value may indicate poorly coded transactions.
-
-
-Lock Escalation Thresholds
-
-Lock escalation is triggered when lock escalation is not disabled on the table by using the ALTER TABLE SET LOCK_ESCALATION option, and when either of the following conditions exists:
-
-
-If locks cannot be escalated because of lock conflicts, the Database Engine periodically triggers lock escalation at every 1,250 new locks acquired.
-
-Next Steps
-
Reducing Locking and Escalation
-In most cases, the Database Engine delivers the best performance when operating with its default settings for locking and lock escalation. If an instance of the Database Engine generates a lot of locks and is seeing frequent lock escalations, consider reducing the amount of locking by:
-
Using an isolation level that does not generate shared locks for read operations.
-
-
-Note: Changing the isolation level affects all tables on the instance of the Database Engine.
-
-
-You can also use trace flags 1211 and 1224 to disable all or some lock escalations. For more information, see Trace Flags (Transact-SQL). Also, monitor lock escalation by using the SQL Server Profiler Lock:Escalation event; and see Using SQL Server Profiler.
-
-Reference:
-Lock Escalation (Database Engine) -
http://msdn.microsoft.com/en-us/library/ms184286(SQL.105).aspx
-
-]]>
-The free space in tempdb in KB.]]>
-The longest running time of any transcation in seconds. This counter could indicate a long running statement pulling large amounts of data that normally takes a long time to execute or potentially a blocking condition.]]>
-The total number of active non-snapshot transactions that generate version records. These are all of the non-snapshot isolation versions such as triggers and online indexing.
-
-Note: The sum of Update Snapshot Transactions and NonSnapshot Version Transactions represents the total number of transactions that participate in version generation. The difference of Snapshot Transactions and Update Snapshot Transactions reports the number of read-only snapshot transactions.
-
-Reference:
-http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
-http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
-Managing TempDB in SQL Server: TempDB Basics (Version Store: Why do we need it?)
-http://blogs.msdn.com/b/sqlserverstorageengine/archive/2008/12/22/managing-tempdb-in-sql-server-tempdb-basics-verison-store.aspx
-]]>
-The total number of active snapshot transactions.]]>
-Description
-
-The version cleanup rate in KB per seconds.
-
-Monitors the version cleanup rate in KBps in all version stores. If the version cleanup rate is lower than the version generation rate, the version store will use more and more space in tempdb. However, if the version cleanup rate is 0 but the version generation rate is not, there is probably a long-running transaction that is preventing the version store cleanup.
-
-Row versions are shared across sessions. The creator of the row version has no control over when the row version can be reclaimed. You will need to find and then possibly stop the longest-running transaction that is preventing the row version cleanup.
-
The following query returns the top two longest-running transactions that depend on the versions in the version store:
-
-select top 2
- transaction_id,
- transaction_sequence_num,
- elapsed_time_seconds
-from sys.dm_tran_active_snapshot_database_transactions
-order by elapsed_time_seconds DESC
-
-Reference
-
-Row Versioning Resource Usage
-http://msdn.microsoft.com/en-us/library/ms175492.aspx]]>
-Description: The version generation rate in KB per seconds.
-
-You can use the Version Generation Rate and Version Cleanup Rate counters to measure version store impact on TempDB. THe Version Generation Rate should not outpace the Cleanup Rate. Additionally, if your Version Cleanup Rate is 0, a long-running transaction could be preventing the version store cleanup. Incidentally, before generating an out-of-tempdb-space error, SQL Server 2008 makes a last-ditch attempt by forcing the version stores to shrink. During the shrink process, the longest-running transactions that have not yet generated any row versions are marked as victims. This frees up the version space used by them. Message 3967 is generated in the error log for each such victim transaction. If a transaction is marked as a victim, it can no longer read the row versions in the version store or create new ones. Message 3966 is generated and the transaction is rolled back when the victim transaction attempts to read row versions. If the shrink of the version store succeeds, more space is available in tempdb. Otherwise, tempdb runs out of space.
-
-If TempDB fills and runs out of space, writes will continue, butversions will not and reads will fail.
-
-Reference
-SQL Server, Transactions Object
-http://technet.microsoft.com/en-us/library/ms189038.aspx]]>
-
-
-
-Plan re-use is desirable for OLTP workloads because re-creating the same plan (for similar or identical transactions) is a waste of CPU resources.
-
To compute the plan re-use rate, compare SQL Server SQL Statistics: batch requests/sec to SQL compilations/sec.
-
Special exception to the plan re-use rule is that zero (or trivial) cost plans will not be cached (not re-used) in SQL 2005 SP2 and above.
-
Applications that use zero cost plans will have a lower plan re-use but this is not a performance issue, because it is cheaper to generate a new plan every time than to cache.
-
Reference:
-
Execution Plan Caching and Reuse
-
http://msdn.microsoft.com/en-us/library/ms181055.aspx
-
Top SQL Server 2005 Performance Issues for OLTP Applications
-
http://technet.microsoft.com/en-us/library/cc966401.aspx
-]]>
-
-Number of pages which are not from NUMA-local memory.
-
-When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
-
-If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
-
-Reference:
-http://msdn.microsoft.com/en-us/library/ms345597(v=sql.110).aspx]]>
-Maximum amount of memory in kilobytes the resource pool can have based on the settings and server state.]]>
Number of requests per second that failed to return a report from cache. Use this counter to find out whether the resources used for caching (disk or memory) are sufficient.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Number of reports that are currently active and being handled by the report server. Use this counter to evaluate caching strategy. There might be significantly more requests than reports generated.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Total number of cache misses against the in-memory cache after the service started. This counter resets when the application domain recycles.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Total number of reports that ran successfully after the service started. This counter resets when the application domain recycles.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Number of running jobs. This counter can be used to find out if the current load on the system is potentially being driven from SQL Server Agent execution.]]>
-The number of Jobs that have failed to complete successfully for any reason since the last SQL Server Agent restart.]]>
-Percentage of successful jobs from the total number of executed jobs.
-
-]]>
-The number of Jobs that have successfully completed since the last SQL Server Agent restart.]]>
-Number of active steps.
-]]>
-The total number of times any Job Step execution is retried since the last SQL Server restart.]]>
-Number of AlwaysOn messages sent to this availability replica per second
-]]>
-Number of AlwaysOn messages resent in the last second
-
-
-]]>
-Number of AlwaysOn messages received from the replica per second
-]]>
-Bytes Received from Replica/sec: Number of bytes received from the availability replica per second
-]]>
-Number of bytes sent to the remote availability replica per second
-]]>
-Note: These counters are not defined by default and would be 0 unless configured through SQL Server through the sp_user_counter# stored procedures.]]>
-Number of milliseconds transaction termination waited for acknowledgement per second.
-
-The Replica:Transaction Delay counter measures the primary replica’s wait for acknowledgement that the transaction has committed at the secondary replica database in order to commit its own transaction. Since Asynchronous Commit Mode does not require acknowledgment to commit the transaction, this counter reports 0 when measured against a database in asynchronous commit mode.
-
-When there are multiple secondaries, this is a measure of the total time all transactions waited on the secondary acknowledgement.
-Note: This counter should be viewed on the Primary replica
-]]>
-The amount of log in kilobytes that need to be undone.
-
-Note: This counter should be viewed on the Secondary replica]]>
-Amount of log records redone on the secondary database in the last second.
-
-This counter can be compared to Log Bytes Received/Sec. If Log Bytes Received/Sec trends greater than Redone Bytes/Sec for sustained periods of time, then redo latency is building up between the primary and secondary replicas, which suggests that counter Redo Bytes Remaining and Recovery Queue is growing. This could indicate Redo is the bottleneck.
-
-To measure Recovery Time, divide Recovery Queue by Redone Bytes / Sec.
-
-Note: This counter should be viewed on the Secondary replica]]>
-The amount of log in kilobytes remaining to be redone to finish the reverting phase. If Redo Bytes Remaining counter is trending up, The redo process could be a bottleneck.
-
-Note: This counter should be viewed on the Secondary replica
-
]]>
-Number of times redo gets blocked in the last second]]>
-Amount of log records in the log files of the secondary replica that has not yet been redone.
-
-The Recovery Queue monitors the progress of the redo of flushed pages. If Recovery Queue is trending up, the redo process could be a bottleneck. For AlwaysON, the redo process is single threaded to ensure a consistent read for readable secondaries.
-
-Note: This counter should be viewed on the Secondary replica
-
]]>
-Amount of logs received by the availability replica for the database
-
-Note: This counter should be viewed on the Secondary replica
-]]>
-The amount of log in kilobytes remaining to finish the undo phase.
-
-Note: This counter should be viewed on the Secondary replica
-]]>
Amount of log records in the log files of the primary database, in kilobytes, that has not yet been sent to the secondary availability replica. This value is sent to the secondary availability replica from the primary availability replica.
-
Note: Queue size does not include FileStream files that are sent to a secondary.
-
The log send queue size at any point will give an indication approximately how much log has not been sent in KB. This is the amount of log secondary does not have at the time of failover and the amount of data loss that could be experienced. The log send queue size is also reported in DMV sys.dm_hadr_database_replica_states.log_send_queue_size column column in KB.
-
-Note: This counter should be viewed on the Secondary replica
-
-Reference:
-
http://technet.microsoft.com/en-us/library/ff877972.aspx
-
http://www.sqlskills.com/blogs/joe/answering-questions-with-the-alwayson-dashboard/
-
http://support.microsoft.com/kb/2857849]]>
Number of transactions that wrote to the mirrored database and waited for the log to be sent to the mirror in order to commit, in the last second.
-
-This counter is a measure of transactions that are waiting to be hardened to the primary because of Synchronous Availability Mode requiring that they harden at secondary also. When using Asynchronous availability mode this counter is 0.
-
Note: This counter should be viewed on the Primary replica
-
]]>
-Total amount of dynamic memory the server is using for query optimization]]>
-Amount of memory the server is currently using for the purposes other than the database pages.
-
-\SQLServer:Buffer Manager Stolen pages
-
-
Description: Number of pages used for miscellaneous server purposes (including procedure cache). This counter shows how many pages were taken from the buffer pool to accomodate non-buffer pool needs such as plan cache, procedure cache, the optimizer, workspace memory, etc. This counter should be baselined and can be analyzed by comparing this counter to the amount of buffer pool space and large requests that are hitting the SQL Server instance.
-
-
Note: DBCC MEMORYSTATUS can also be leveraged to examine the impact of stolen memory to the buffer pool.
-
Note: The lazywriter process is not permitted to flush Stolen buffers out of the buffer pool.
-
-
Reference:
-SQL Server, Buffer Manager Object
-
-http://technet.microsoft.com/en-us/library/ms189628(v=sql.105).aspx
-INF: Using DBCC MEMORYSTATUS to Monitor SQL Server Memory Usage
-
-http://support.microsoft.com/kb/271624]]>
-Ideal amount of memory the server is willing to consume]]>
-Total amount of dynamic memory the server is currently consuming]]>
-Amount of memory the server is using on this node for database pages.]]>
-Non NUMA-local amount of memory on this node.]]>
-Amount of memory the server is using on this node for the purposes other than database pages.]]>
-Ideal amount of memory for this node.]]>
-Total amount of memory the server has committed on this node.]]>
-Number of lookup requests from this node, which were satisfied from other nodes.
-
-Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
-Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
-
-References
-CoreInfo
-
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
-
Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
-
http://support.microsoft.com/kb/2806535
-]]>
-Number of lookup requests from this node, which were satisfied from this node.
-Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
-Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
-
-References
-CoreInfo
-
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
-Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
-
http://support.microsoft.com/kb/2806535
-]]>
-Database pages on node.
-
-SQL Server:Buffer Node
-
- http://technet.microsoft.com/en-us/library/ms345597.aspx
-]]>
-Number of pages flushed to enforce the recovery interval settings.
-
-When Indirect Checkpoints is enabled at the database level, you will notice a new background thread in sys.dm_exec_requests with the command token "RECOVERY WRITER". There is a single background writer for the SQL Server instance. The background writer performs aggressive flushing of dirty pages based on LSN order and reduces the redo phase recovery time.
-
-The catalog view sys.databases contains a column named target_recovery_time_in_seconds that indicates whether a specific database is using the new Indirect checkpoint algorithm. There is a new performance monitor counter called 'Background writer pages/sec' that exposes the amount of dirty pages processed by the background writer.
-
-SQL Server Books Online contains a discussion about Indirect Checkpoints and how it interacts with the recovery interval setting:
-
-
-Database Checkpoints (SQL Server)
-
-http://msdn.microsoft.com/en-us/library/ms189573(v=sql.110).aspx ]]>
-The ideal number of pages in the Buffer Pool according the maximum memory granted to SQL Server.]]>
-
-
-
-
-
-Description:
-Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
-
-When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
-
-The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
-
-Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
-
Threshold:
-
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
-
Red: Page life expectancy is less than 5 minutes (300 seconds)
-
-
Next Steps:
-If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
-Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
-
-
Reference:
-
-
SQL Server, Access Methods Object
-
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-Number of active update transactions for the database.]]>
-Read/write throughput for backup/restore of a database.]]>
-KiloBytes bulk copied.]]>
-Number of temporary tables/table variables in use]]>
-Number of temporary tables/table variables created/sec]]>
-Number of temporary tables/table variables waiting to be destroyed by the cleanup system thread]]>
-Number of suboptimal query plans generated per second in the workload group.]]>
-Number of threads used by parallel queries in the workload group. Serial queries and the main thread of parallel queries are not included in this number.]]>
-Number of queries per second getting less than ideal amount of memory in the workload group.]]>
-Number of currently running requests in the workload group.]]>
-The total number of active transactions.]]>
-The size of the version store in KB.]]>
-The total number of errors that occur during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
-]]>
-The total number of errors that occur per second during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
-A number from 1-5 indicating the current memory state of the server.
-
-
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
-]]>
-Number of bytes the server requested to shrink.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
-Number of shrink notifications the server issued in the last second. Indicates how often the server believes it is under memory pressure.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
-Actual number of bytes sent per second over the network to the remote availability replica
-]]>
+
+
+
+
+
+Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+
+Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
+Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider focusing on improving the disk subsystem.
+
+It is recommended to look for comparitive trends with other processes, work loads, error counts, and other behaviors to find what is driving Privileged Time.
+
+Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
+
+
+Threshold:
+
+Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
+
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
+
Next Steps:
+The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
+The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
+If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
+
+If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
+
+You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
+
+The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
+
+Reference:
+
+Monitoring CPU Usage
+http://msdn.microsoft.com/en-us/library/ms178072.aspx
+
Ask the Performance Team
+http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
+
Clint Huffman's Windows Troubleshooting in the Field Blog
+http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
+
+
+
+
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded records occur when a data record in a heap increases in size and the record's current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
+Access Methods Forwarded Records/sec measures the number of records accessed through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. Forwarding Records are used as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
+If a table has lots of forwarded records, scanning the table can be very inefficient.
+Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded Records only occurs on heaps which are tables without clustered indexes.
+
Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
+
Next Steps:
+
Look at code to determine where the short row is inserted followed by an update.
Forwarded records can be avoided by:
+
Reference:
+
SQL Server Storage Engine
+
http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
+
Forwarding and forwarded records, and the back-pointer size
+
http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
+
sys.dm_db_index_physical_stats (Transact-SQL)
+
http://msdn.microsoft.com/en-us/library/ms188917.aspx
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
+
Description:
+
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
+Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
+It is usually recommended that you physically order the table rows by using a clustered index on the table.
+FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
+
+***Also, a heap table requires an additional column called an uniquifier to be generated for each row inserted. It is usually recommended that you physically order the table rows by using a clustered on the table for most tables.
+
+FreeSpace Scans/sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to add a clustered index for base tables.
+One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
+
+
Threshold:
+
Yellow: A ratio (10%) or more than 1 freespace scan for every 10 Batch Requests/Sec
+
Next Steps:
+Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
+Reference:
+
PRB: Poor Performance on a Heap
+
http://support.microsoft.com/kb/297861
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
+]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
+
+This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
+
+Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also poor disk performance, and / or, high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
+
Next Steps:
+The main causes of high Full Scans/sec are:
+
Note: Identify disk bottlenecks by using Performance Counters, Profiler, sys.dm_io_virtual_file_stats and SHOWPLAN output.
+Also refer to the sys.dm_io_virtual_file_stats dynamic management view (DMV) to track io_stalls to help identify IO bottlenecks.
+To back up and support this information, compare the counters to sys.dm_os_wait_stats output. If you see high values in perfmon, you may also see high waits for the following:
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
+
+
+
+
+Description: The number of page splits per second that occurs as the result of overflowing index pages and new page allocations. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor and pad_index to leave more empty space per page.
+This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
+
Note: A high value for this counter is not bad in situations where many new pages are being created, since it includes all new page allocations as well as splits when a data page spilts.
+
Threshold:
+
Yellow: A ratio of more than 1 page split for every 20 batch requests
+
Next Steps:
+If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
+
+To track page splits more accurately see the following SQLSkills blog article from Jonathan Kehayias:
+http://www.sqlskills.com/blogs/jonathan/post/Tracking-Problematic-Pages-Splits-in-SQL-Server-2012-Extended-Events-e28093-No-Really-This-Time!.aspx
+]]>
+Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
+Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
+Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
+Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
+
+Note: This is an informative counter. It is not a critical counter that should be used for baselines or alerting.
+
Next Steps: You can correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
+High number of Scan Point Revalidations/sec potentially indicate hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
+Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
+
+
+
+Description:
+Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory. The Work files are similar to work tables but are created strictly by hashing operations. Workfiles are used to store temporary results for hash joins and hash aggregates.
+Hash joins can require large amounts of memory for execution. As part of executing a hash join, the memory required for the hash can become too large and require a spill to disk. The disk storage to backup the hash operation is called a workfile. Workfiles are collections of extents and pages that are managed strictly by the workfile code.
+
+Threshold:
+
Yellow: Greater than 20 Workfiles created per second
+
+Next Steps:
+Make queries more efficient by adding/changing indexes. Run expensive queries through the Database Tuning Advisor (DTA), look for expensive queries and consider rewriting them, and add as last resort consider adding additional memory.
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
+Working with tempdb in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
+Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx]]>
+
+
+
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors.
+
+
Threshold:
+
Yellow: Greater than 20 Worktables created per second. This will need to be baselined for accuracy.
+
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
+Ensure that TempDB is not a bottleneck and is following best practices.
+If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
+Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
+Divide TempDB into multiple data files of equal size. These multiple files don't necessarily need to be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for TempDB objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
+Use TF-1118. Under this trace flag SQL Server allocates full extents to each TempDB object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in TempDB. This trace flag has been available since SQL Server 2000. With improvements in TempDB object caching since SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached TempDB objects may not always be available. For example, cached TempDB objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
+
+Note:For each release of SQL Server, TempDB has more potential uses such as with SNAPSHOT ISOLATION level, temporary statistics use for read-only databases in SQL Server 2012 and more. It is recommended to keep a close watch on the usage of TempDB and leverage the TF1118 if the data file and sizing best practices do not address allocation bottlenecks.
+
+Additionally consider putting TempDB on local SSD disks in order to maximize disk performance.
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
Working with TempDB in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx ]]>
+
+
+
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
+When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
+
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
+
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
+1 millisecond = 1,000,000 nanoseconds
+The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
+There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Read aheads are an optimistic form of physical reads. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
+
+
Threshold:
+
Yellow: Less than 97 percent buffer cache hit ratio
+
Red: Less than 90 percent buffer cache hit ratio
+
Next Steps:
+
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
+
+
+Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
+
+
+A value less than 640 (or 5 MB) may indicate physical memory pressure.
+
+
Threshold:
+
Yellow: Less than 640 Free Pages
+
Next Steps:
+Compare the Buffer Manager\Free pages counter to the following:
+
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+
Reference:
+
SQL Server, Access Methods Object
+
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
+
+
Description:
+The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
+Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
+
+
+Note: NUMA will increase the number of lazy writer threads per NUMA node and influence the behavior of the lazy writer by increasing its execution at this view. If the server is a NUMA environment other signs of memory pressure should be used and you should analyze the Buffer Node counters for Page Life Expectancy per node. There is not a lazy writer counter in Buffer Nodes.
+
+Threshold:
+
Red: Greater than 20 Lazy Writes per second
+
+
+
+
+Next Steps:
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
+
+
+Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
+
+Configure SQL Server to Use Soft-NUMA
+http://msdn.microsoft.com/en-us/library/ms345357.aspx]]>
+
+
+
+
+
+Description:
+Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
+
+When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
+
+The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
+
+Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
+
Threshold:
+
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
+
Red: Page life expectancy is less than 5 minutes (300 seconds)
+
+
Next Steps:
+If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
+Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
+
+
Reference:
+
+
SQL Server, Access Methods Object
+
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
Description:
+Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient or there was a large number of ad-hoc queries.
+
+
+Threshold:
+Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
+
Warning: Page life expectancy is less than 5 minutes (300 seconds)
+
Next Steps:
+Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
+Identify queries with the highest amount of logical I/O's and tune them.
+
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
+
Reference:
+
SQL Server, Buffer Manager Object
+http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 to 90 per second is normal, anything that is above indicates indexing or memory constraint.
+
Threshold:
+
Yellow: Page Reads/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page writes issued per second. 80 to 90 per second is normal. Anything above 90, it is recommended to check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are also relatively high then, this indicates a memory constraint.
+
+Threshold:
+
Yellow: Page Writes/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description:
+
Login and logout rates should be approximately the same. A login rate higher than the logout rate suggests that the server is not in a steady state, or that applications are not correctly using connection pooling. This could result in an increased load on the server.
+
Next Steps:
+
Verify if the .NET connection string is using the pooling=true e connection reset=true parameters.
+If so, a profiler trace with the Audit login and Audit logout Events would reveal the usage of sp_reset_connection stored procedure, which is used by SQL Server to support remote stored procedure calls in a transaction.
+This stored procedure also causes Audit Login and Audit Logout events to fire when a connection is reused from a connection pool.
+Also, the EventSubClass column in the trace will show if the connections are being pooled or not.
+Therefore focus the comparison only on the rate of non-pooled Logins and Logouts, as pooled connections will be reflected in the Logins/sec counter, but not on the Logouts/sec counter.
+
Reference:
+
SQL Server 2012 Books Online: SQL Server: General Statistics Object
+
http://technet.microsoft.com/en-us/library/ms190697(v=sql.110).aspx
+
SQL Server Connection Pooling
+
http://msdn.microsoft.com/en-us/library/8xx3tyca.aspx
+
SQL Server 2012 Books Online: Audit Login Event Class
+
http://msdn.microsoft.com/en-us/library/ms190260(v=sql.110).aspx
+]]>
+
+
+Total number of logouts started per second. Greater than 2 per second indicates that the application is not correctly using connection pooling.]]>
+
+Number of users connected to the system. The number of users currently connected to the SQL Server. This should correlate with the Batch Requests per second counter.]]>
+
+Number of latch requests that could not be granted immediately and had to wait before being granted. These are the amount of latches that had to wait.]]>
+
+
+Current number of processes waiting for a workspace memory grant. Memory Grants Pending records the number of connections that are waiting for memory before they can begin processing a memory intensive query such as a sort or hash operation. Connections that wait in this state for a long enough time will eventually receive an 8645 error (A time out occurred while waiting for memory resources to execute the query. Rerun the query). A spid waiting in this state will have a waittype of 0x0040 (RESOURCE_SEMAPHORE) in sysprocesses. If this counter remains above zero for any significant amount of time then you will need to track down what queries are doing sorts/hashes and run them through Database Tuning Advisor (DTA) to see if they can get a more efficient plan.
+
+
+Threshold:
+
Red: Numbers higher than 0 indicate a lack of memory.]]>
+
+
+
+Total amount of dynamic memory the server is willing to consume]]>
+
+
+Description Number of SQL batch requests received by server. This counter measures the number of batch requests that SQL Server receives per second, and generally follows in step to how busy your server's CPUs are. Generally speaking, over 1000 batch requests per second indicates a very busy SQL Server, and could mean that if you are not already experiencing a CPU bottleneck, that you may very well soon. Of course, this is a relative number, and the bigger your hardware, the more batch requests per second SQL Server can handle. From a network bottleneck approach, a typical 100Mbs network card is only able to handle about 3000 batch requests per second. If you have a server that is this busy, you may need to have two or more network cards, or go to a 1Gbs network card.
+
+Note: Sometimes low batch requests/sec can be misleading. If there were a SQL statements/sec counter, this would be a more accurate measure of the amount of SQL Server activity. For example, an application may call only a few stored procedures yet each stored procedure does lot of work. In that case, we will see a low number for batch requests/sec but each stored procedure (one batch) will execute many SQL statements that drive CPU and other resources. As a result, many counter thresholds based on the number of batch requests/sec will seem to identify issues because the batch requests on such a server are unusually low for the level of activity on the server.
+
+We cannot conclude that a SQL Server is not active simply by looking at only batch requests/sec. Rather, you have to do more investigation before deciding there is no load on the server. If the average number of batch requests/sec is below 5 and other counters (such as SQL Server processor utilization) confirm the absence of significant activity, then there is not enough of a load to make any recommendations or identify issues regarding scalability.
+
+Note: Batch requests / sec is a great counter to use for baselining and to use as a measurement of how many batches the system could handle before a sympton was evident or a particular condition occured. This counter will greatly depend on SQL Server code and the hardware being used. It is often used as a gauge of saying that a particular system was able to handle x number of batch requests per second and then to examine system and SQL Server counters to determine what resource is the bottlneck at that particular workload.]]>
+
+
+
+Description: Number of SQL compilations that occured per second that includes recompiles. A high value subtracting recompiles can be an indication of a large number of ad hoc queries that can also be cross referenced with the number of ad hoc plans in the plan cache counter.
+
+Be aware of the following:
+
+Reference
+SQL Server, Plan Cache Object
+http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
+
SQL Server Compilation Bottlenecks
+
http://blogs.msdn.com/grahamk/archive/2009/02/03/compilation-bottlenecks-error-8628-severity-17-state-0-part-1.aspx
+
+
+]]>
+
+
+
+Description: Number of SQL re-compiles per second that measures the number of times that a statement executed, but had to be compiled again before the statement completed. There are a variety of reasons that a recompile occured such as statistics being out of date, an column was added to a table a store procedure depends on, statement was run with a recompile option, etc. This counter needs to be as close to 0 as possible. A recompile can cause deadlocks and compile locks that are not compatible with any locking type.
+
+SQL Server Trace / Profiler provides an execellent way to find out exactly why recompiles are occuring in your environment.
+
+Troubleshooting stored procedure recompilation
http://support.microsoft.com/kb/243586
+How to identify the cause of recompilation in an SP:Recompile event
+http://support.microsoft.com/kb/308737]]>
+
+
+
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
+
+This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
+
+Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
+
Threshold:
+
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
+
Formula:
+(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
+
Next Steps:
+The main causes of high Full Scans/sec are:
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
+
Description: Lock Requests/sec reports the number of new locks and lock conversions requested from the lock manager per second. A Lock Requests/sec greater than 500 when compared to Batch Request/sec indicates that batches are acquiring a large number of locks.
+This suggests inefficient queries and there is a risk is that blocking may occur.
+Threshold: (Yellow) - This value should not be greater than 50% of the number of Batch Requests/Sec
+
Next Steps:
+
Review high-read queries. In addition, examine the code to determine where to reduce the number of reads by either tuning your application or the database.
+
Reference:
+
SQL Server, Locks Object
+
http://msdn.microsoft.com/en-us/library/ms190216.aspx
+]]>
+
+
+
+
+Description: Number of new locks and lock conversions requested from the lock manager. This value should tie close to the number of Batch Requests per second. Values greaters than 1000 may indicate queries are pulling large volumes of data thereby accessing large numbers of rows
+
Reference:
+
SQL Server, Locks Object
+
http://msdn.microsoft.com/en-us/library/ms190216.aspx
+
+Threshold
+
+Yellow Greater than > 1000 Lock Requests / sec]]>
+
Description: Number of lock requests that could not be satisfied immediately and required the caller to wait before being granted the lock. This is a sign that there is some blocking occuring and would be a good baseline measurement of lock waits for load testing.
+
Note: Lock waits are not recorded by until after the lock event completes. For examining active blocking it is recommended to query sys.dm_os_waiting_tasks.
+
+Threshold
+Yellow Values greater than 0]]>
Recommendation: Look for peaks that approach or exceed 60 seconds.
+
Even though this counter counts how many total milliseconds SQL Server is waiting on locks over the last second, the counter actually records the lock wait time for a particular lock wait at the end of the locking event.
+
The following methods can be used to reduce lock contention and increase overall throughput:
+
+
+
+
+
+
+Description: Number of lock requests that timed out. This does not include requests for NOWAIT locks. A value greater than zero might indicate that user queries are not completing.
+
+Threshold
+Yellow Greater than 1
+]]>
+
+Description:
+Number of lock requests, per second, which resulted in a deadlock. Deadlocks are always an issue that should be resolved. A deadlock transaction that is killed must be rerun. It is recommended to use the SQL Trace deadlock graph, trace flag 1222, and the extended events deadlock capture to help identify and solve all of the deadlocks in your environment.
+
+Threshold
+Red Any Deadlocks greater than 0
+
+Resources
+
Bart Duncan Deadlock Resources
+Getting historical deadlock info using extended events
+http://www.sqlskills.com/BLOGS/PAUL/post/Getting-historical-deadlock-info-using-extended-events.aspx]]>
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
+
Reference:
+
Performance Tuning Waits and Queues
+
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
+]]>
+
+
+
+
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
+
Reference:
+
Performance Tuning Waits and Queues
+
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
+]]>
+
+
+
+
+
+Description:
+% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this counter.
+
+This counter measures the percentage of total processor time spent (user mode and kernel mode) on SQL Server process threads. If this counter stays at 80% for sustained periods of time, then you may also wish to investigate other Process (sqlservr) such as Private Bytes, Virtual Bytes, and Working Set to get a better understanding of how SQL Server allocates certain segments of memory.
+
+
+Threshold:
+
Red: SQL Server is using more than 30% user mode CPU usage
+
+
+Reference:
+
+Monitoring CPU Usage
+http://msdn.microsoft.com/en-us/library/ms178072.aspx
+
Ask the Performance Team
+http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
+
Clint Huffman's Windows Troubleshooting in the Field Blog
+http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
+
Description: Total number of processes per second that have successfully acquired a workspace memory grant. This counter should be used as a baseline for comparisons under load.]]>
+
Description: Total amount of memory granted to executing processes. This memory is used for hash, sort and create index operations.]]>
+
Description: Total amount of memory granted to executing processes. This memory is used primarily for hash, sort and create index operations.]]>
+
Description: The cumulative size of all the data files in the database.]]>
+
Description: Total number of log bytes flushed.]]>
+
Description: The cumulative size of all the log files in the database.]]>
+
Description: The cumulative used size of all the log files in the database.]]>
+
+
Description: Total wait time (milliseconds).]]>
+
+
Description: Number of commits waiting on log flush.]]>
+
Description: Number of log flushes.]]>
+
+
Description: Total number of log growths for this database.]]>
+
Description: Total number of log truncations for this database.]]>
+
+
Description: Total number of log shrinks for this database.]]>
+
+
Description: The percent of space in the log that is in use.]]>
+
Description: Number of auto-parameterization attempts.]]>
+
Description: Number of failed auto-parameterizations.]]>
+
Description: Number of safe auto-parameterizations.]]>
+
Description: Number of unsafe auto-parameterizations.]]>
+
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
+
Description: Number of completed requests per second in the workload group.]]>
+
Description: Number of requests waiting in the queue due to resource governor limits in the workload group.]]>
+
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
+
Description: Target amount of memory in kilobytes the resource pool is trying to attain based on the settings and server state.]]>
+Used amount of memory in kilobytes in the resource pool.]]>
+
+Description: Number of requests that had to wait for a free page.
+
+Free list stalls/sec is the frequency with which requests for available database pages are suspended because no buffers are available. Free list stall rates of greater than 2 per second indicate too little SQL memory available.
+
+
Reference
+Threshold
+Yellow - Free list stalls/sec > 2
+SQL Server, Buffer Manager Object
+
+http://technet.microsoft.com/en-us/library/ms189628.aspx
+]]>
+Description
+
+Number of pages, per second, flushed by checkpoint or other operations that require all dirty pages to be flushed. The checkpoint frequency can be due to low memory conditions as well as the recovery interval set by sp_configure.
+
+Reference
+
+SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+
+A SQL Server DBA myth a day: (15/30) checkpoint only writes pages from committed transactions
+
http://www.sqlskills.com/BLOGS/PAUL/category/Checkpoint.aspx
+
+Database Checkpoints (SQL Server)
+
+http://technet.microsoft.com/en-us/library/ms189573(v=sql.110).aspx]]>
+
+
+
Description: Number of pages read, in second, in anticipation of use which is an optimistic physical read. This number should not exceed 20% of total page reads.
+Threshold:
+
Yellow:Greater than 20% of Page Reads/sec
+
+http://technet.microsoft.com/en-us/library/ms189628.aspx]]>
+
+Feature usage since last SQL Server startup
+
+You can also examine performance counters through the sys.dm_os_performance_counters DMV. By using the perfmon counters for deprecation and the DMVs, you can help your application prepare and avoid issue when migrating to the future versions of SQL Server.
+
+SELECT * FROM sys.dm_os_performance_counters
+WHERE object_name LIKE '%Deprecated Features%'
AND cntr_value > 0
+ORDER BY cntr_value DESC
+
+SQL Server, Deprecated Features Object
+
+http://technet.microsoft.com/en-us/library/bb510662.aspx]]>
+
+Number of attentions per second. Attentions are the number of user cancels and query timeout that occured per second. A high number of attentions may indicate slow query performance as users are cancelling queries.]]>
+
+Number of errors/sec]]>
+Description Ratio between cache hits and lookups
+
+The Plan Cacheobject provides counters to monitor how SQL Server uses memory to store objects such as stored procedures, ad hoc and prepared Transact-SQL statements, and triggers. Multiple instances of the Plan Cache object can be monitored at the same time, with each instance representing a different type of plan to monitor.
+
Compiled Plan Stubs & Plan Cache Perf Counters:
+
+In SQL Server 2008 R2, there are three options that can help in dealing with plan cache pollution issues.
+
+Contributor(s):
+
+Reference:
+SQL Server, Plan Cache Object
+http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
+]]>
+The average amount of wait time (milliseconds) for each lock request that resulted in a wait. This wait could indicate excessive blocking that can be verified by querying sys.dm_os_waiting_tasks. Compare this counter to "Lock Waits/sec" and look for trends.
+
+Threshold
+Yellow Greater than >500 Average Wait Time.
]]>
+
+
Description: Percentage of work tables created where the initial two pages of the work table were not allocated but were immediately available from the work table cache.
+
+Since SQL Server 2005 worktable caching was improved. When a query execution plan is cached, the work tables needed by the plan are not dropped across multiple executions of the plan but merely truncated. In addition, the first nine pages for the work table are kept. In SQL Server 2000, the work tables used during query plan execution are dropped. Because the work table is cached, the next execution of the query is faster. When the system is low on memory, the execution plan may be removed from the cache and the associated work tables dropped as well. Both SQL Server 2000 and SQL Server 2005 use a small global pool of pre-allocated pages and extents that make the initial creation of work tables faster.
+
+Note: When a work table is dropped, two pages may remain allocated and they are returned to the work table cache. A value less than 90% may indicate insufficient memory, since execution plans are being dropped, or may indicate, on 32-bit systems, the need for an upgrade to a 64-bit system.
+
+
Threshold:
+
Yellow: Less than 90% Worktables from Cache Ratio. This will need to be baselined for accuracy.
+
+
Reference:
+SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426(v=sql.110).aspx]]>
+
Description:
+The number of times locks on a table were escalated from page- or row-level to table-level. Frequent or even occasional spiking in this value may indicate poorly coded transactions.
+
+
+Lock Escalation Thresholds
+
+Lock escalation is triggered when lock escalation is not disabled on the table by using the ALTER TABLE SET LOCK_ESCALATION option, and when either of the following conditions exists:
+
+
+If locks cannot be escalated because of lock conflicts, the Database Engine periodically triggers lock escalation at every 1,250 new locks acquired.
+
+Next Steps
+
Reducing Locking and Escalation
+In most cases, the Database Engine delivers the best performance when operating with its default settings for locking and lock escalation. If an instance of the Database Engine generates a lot of locks and is seeing frequent lock escalations, consider reducing the amount of locking by:
+
Using an isolation level that does not generate shared locks for read operations.
+
+
+Note: Changing the isolation level affects all tables on the instance of the Database Engine.
+
+
+You can also use trace flags 1211 and 1224 to disable all or some lock escalations. For more information, see Trace Flags (Transact-SQL). Also, monitor lock escalation by using the SQL Server Profiler Lock:Escalation event; and see Using SQL Server Profiler.
+
+Reference:
+Lock Escalation (Database Engine) -
http://msdn.microsoft.com/en-us/library/ms184286(SQL.105).aspx
+
+]]>
+The free space in tempdb in KB.]]>
+The longest running time of any transcation in seconds. This counter could indicate a long running statement pulling large amounts of data that normally takes a long time to execute or potentially a blocking condition.]]>
+The total number of active non-snapshot transactions that generate version records. These are all of the non-snapshot isolation versions such as triggers and online indexing.
+
+Note: The sum of Update Snapshot Transactions and NonSnapshot Version Transactions represents the total number of transactions that participate in version generation. The difference of Snapshot Transactions and Update Snapshot Transactions reports the number of read-only snapshot transactions.
+
+Reference:
+http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
+http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
+Managing TempDB in SQL Server: TempDB Basics (Version Store: Why do we need it?)
+http://blogs.msdn.com/b/sqlserverstorageengine/archive/2008/12/22/managing-tempdb-in-sql-server-tempdb-basics-verison-store.aspx
+]]>
+The total number of active snapshot transactions.]]>
+Description
+
+The version cleanup rate in KB per seconds.
+
+Monitors the version cleanup rate in KBps in all version stores. If the version cleanup rate is lower than the version generation rate, the version store will use more and more space in tempdb. However, if the version cleanup rate is 0 but the version generation rate is not, there is probably a long-running transaction that is preventing the version store cleanup.
+
+Row versions are shared across sessions. The creator of the row version has no control over when the row version can be reclaimed. You will need to find and then possibly stop the longest-running transaction that is preventing the row version cleanup.
+
The following query returns the top two longest-running transactions that depend on the versions in the version store:
+
+select top 2
+ transaction_id,
+ transaction_sequence_num,
+ elapsed_time_seconds
+from sys.dm_tran_active_snapshot_database_transactions
+order by elapsed_time_seconds DESC
+
+Reference
+
+Row Versioning Resource Usage
+http://msdn.microsoft.com/en-us/library/ms175492.aspx]]>
+Description: The version generation rate in KB per seconds.
+
+You can use the Version Generation Rate and Version Cleanup Rate counters to measure version store impact on TempDB. THe Version Generation Rate should not outpace the Cleanup Rate. Additionally, if your Version Cleanup Rate is 0, a long-running transaction could be preventing the version store cleanup. Incidentally, before generating an out-of-tempdb-space error, SQL Server 2008 makes a last-ditch attempt by forcing the version stores to shrink. During the shrink process, the longest-running transactions that have not yet generated any row versions are marked as victims. This frees up the version space used by them. Message 3967 is generated in the error log for each such victim transaction. If a transaction is marked as a victim, it can no longer read the row versions in the version store or create new ones. Message 3966 is generated and the transaction is rolled back when the victim transaction attempts to read row versions. If the shrink of the version store succeeds, more space is available in tempdb. Otherwise, tempdb runs out of space.
+
+If TempDB fills and runs out of space, writes will continue, butversions will not and reads will fail.
+
+Reference
+SQL Server, Transactions Object
+http://technet.microsoft.com/en-us/library/ms189038.aspx]]>
+
+
+
+Plan re-use is desirable for OLTP workloads because re-creating the same plan (for similar or identical transactions) is a waste of CPU resources.
+
To compute the plan re-use rate, compare SQL Server SQL Statistics: batch requests/sec to SQL compilations/sec.
+
Special exception to the plan re-use rule is that zero (or trivial) cost plans will not be cached (not re-used) in SQL 2005 SP2 and above.
+
Applications that use zero cost plans will have a lower plan re-use but this is not a performance issue, because it is cheaper to generate a new plan every time than to cache.
+
Reference:
+
Execution Plan Caching and Reuse
+
http://msdn.microsoft.com/en-us/library/ms181055.aspx
+
Top SQL Server 2005 Performance Issues for OLTP Applications
+
http://technet.microsoft.com/en-us/library/cc966401.aspx
+]]>
+
+Number of pages which are not from NUMA-local memory.
+
+When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
+
+If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
+
+Reference:
+http://msdn.microsoft.com/en-us/library/ms345597(v=sql.110).aspx]]>
+Maximum amount of memory in kilobytes the resource pool can have based on the settings and server state.]]>
Number of requests per second that failed to return a report from cache. Use this counter to find out whether the resources used for caching (disk or memory) are sufficient.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Number of reports that are currently active and being handled by the report server. Use this counter to evaluate caching strategy. There might be significantly more requests than reports generated.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Total number of cache misses against the in-memory cache after the service started. This counter resets when the application domain recycles.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Total number of reports that ran successfully after the service started. This counter resets when the application domain recycles.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Number of running jobs. This counter can be used to find out if the current load on the system is potentially being driven from SQL Server Agent execution.]]>
+The number of Jobs that have failed to complete successfully for any reason since the last SQL Server Agent restart.]]>
+Percentage of successful jobs from the total number of executed jobs.
+
+]]>
+The number of Jobs that have successfully completed since the last SQL Server Agent restart.]]>
+Number of active steps.
+]]>
+The total number of times any Job Step execution is retried since the last SQL Server restart.]]>
+Number of AlwaysOn messages sent to this availability replica per second
+]]>
+Number of AlwaysOn messages resent in the last second
+
+
+]]>
+Number of AlwaysOn messages received from the replica per second
+]]>
+Bytes Received from Replica/sec: Number of bytes received from the availability replica per second
+]]>
+Number of bytes sent to the remote availability replica per second
+]]>
+Note: These counters are not defined by default and would be 0 unless configured through SQL Server through the sp_user_counter# stored procedures.]]>
+Number of milliseconds transaction termination waited for acknowledgement per second.
+
+The Replica:Transaction Delay counter measures the primary replica’s wait for acknowledgement that the transaction has committed at the secondary replica database in order to commit its own transaction. Since Asynchronous Commit Mode does not require acknowledgment to commit the transaction, this counter reports 0 when measured against a database in asynchronous commit mode.
+
+When there are multiple secondaries, this is a measure of the total time all transactions waited on the secondary acknowledgement.
+Note: This counter should be viewed on the Primary replica
+]]>
+The amount of log in kilobytes that need to be undone.
+
+Note: This counter should be viewed on the Secondary replica]]>
+Amount of log records redone on the secondary database in the last second.
+
+This counter can be compared to Log Bytes Received/Sec. If Log Bytes Received/Sec trends greater than Redone Bytes/Sec for sustained periods of time, then redo latency is building up between the primary and secondary replicas, which suggests that counter Redo Bytes Remaining and Recovery Queue is growing. This could indicate Redo is the bottleneck.
+
+To measure Recovery Time, divide Recovery Queue by Redone Bytes / Sec.
+
+Note: This counter should be viewed on the Secondary replica]]>
+The amount of log in kilobytes remaining to be redone to finish the reverting phase. If Redo Bytes Remaining counter is trending up, The redo process could be a bottleneck.
+
+Note: This counter should be viewed on the Secondary replica
+
]]>
+Number of times redo gets blocked in the last second]]>
+Amount of log records in the log files of the secondary replica that has not yet been redone.
+
+The Recovery Queue monitors the progress of the redo of flushed pages. If Recovery Queue is trending up, the redo process could be a bottleneck. For AlwaysON, the redo process is single threaded to ensure a consistent read for readable secondaries.
+
+Note: This counter should be viewed on the Secondary replica
+
]]>
+Amount of logs received by the availability replica for the database
+
+Note: This counter should be viewed on the Secondary replica
+]]>
+The amount of log in kilobytes remaining to finish the undo phase.
+
+Note: This counter should be viewed on the Secondary replica
+]]>
Amount of log records in the log files of the primary database, in kilobytes, that has not yet been sent to the secondary availability replica. This value is sent to the secondary availability replica from the primary availability replica.
+
Note: Queue size does not include FileStream files that are sent to a secondary.
+
The log send queue size at any point will give an indication approximately how much log has not been sent in KB. This is the amount of log secondary does not have at the time of failover and the amount of data loss that could be experienced. The log send queue size is also reported in DMV sys.dm_hadr_database_replica_states.log_send_queue_size column column in KB.
+
+Note: This counter should be viewed on the Secondary replica
+
+Reference:
+
http://technet.microsoft.com/en-us/library/ff877972.aspx
+
http://www.sqlskills.com/blogs/joe/answering-questions-with-the-alwayson-dashboard/
+
http://support.microsoft.com/kb/2857849]]>
Number of transactions that wrote to the mirrored database and waited for the log to be sent to the mirror in order to commit, in the last second.
+
+This counter is a measure of transactions that are waiting to be hardened to the primary because of Synchronous Availability Mode requiring that they harden at secondary also. When using Asynchronous availability mode this counter is 0.
+
Note: This counter should be viewed on the Primary replica
+
]]>
+Total amount of dynamic memory the server is using for query optimization]]>
+Amount of memory the server is currently using for the purposes other than the database pages.
+
+\SQLServer:Buffer Manager Stolen pages
+
+
Description: Number of pages used for miscellaneous server purposes (including procedure cache). This counter shows how many pages were taken from the buffer pool to accomodate non-buffer pool needs such as plan cache, procedure cache, the optimizer, workspace memory, etc. This counter should be baselined and can be analyzed by comparing this counter to the amount of buffer pool space and large requests that are hitting the SQL Server instance.
+
+
Note: DBCC MEMORYSTATUS can also be leveraged to examine the impact of stolen memory to the buffer pool.
+
Note: The lazywriter process is not permitted to flush Stolen buffers out of the buffer pool.
+
+
Reference:
+SQL Server, Buffer Manager Object
+
+http://technet.microsoft.com/en-us/library/ms189628(v=sql.105).aspx
+INF: Using DBCC MEMORYSTATUS to Monitor SQL Server Memory Usage
+
+http://support.microsoft.com/kb/271624]]>
+Ideal amount of memory the server is willing to consume]]>
+Total amount of dynamic memory the server is currently consuming]]>
+Amount of memory the server is using on this node for database pages.]]>
+Non NUMA-local amount of memory on this node.]]>
+Amount of memory the server is using on this node for the purposes other than database pages.]]>
+Ideal amount of memory for this node.]]>
+Total amount of memory the server has committed on this node.]]>
+Number of lookup requests from this node, which were satisfied from other nodes.
+
+Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
+Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
+
+References
+CoreInfo
+
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
+
Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
+
http://support.microsoft.com/kb/2806535
+]]>
+Number of lookup requests from this node, which were satisfied from this node.
+Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
+Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
+
+References
+CoreInfo
+
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
+Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
+
http://support.microsoft.com/kb/2806535
+]]>
+Database pages on node.
+
+SQL Server:Buffer Node
+
+ http://technet.microsoft.com/en-us/library/ms345597.aspx
+]]>
+Number of pages flushed to enforce the recovery interval settings.
+
+When Indirect Checkpoints is enabled at the database level, you will notice a new background thread in sys.dm_exec_requests with the command token "RECOVERY WRITER". There is a single background writer for the SQL Server instance. The background writer performs aggressive flushing of dirty pages based on LSN order and reduces the redo phase recovery time.
+
+The catalog view sys.databases contains a column named target_recovery_time_in_seconds that indicates whether a specific database is using the new Indirect checkpoint algorithm. There is a new performance monitor counter called 'Background writer pages/sec' that exposes the amount of dirty pages processed by the background writer.
+
+SQL Server Books Online contains a discussion about Indirect Checkpoints and how it interacts with the recovery interval setting:
+
+
+Database Checkpoints (SQL Server)
+
+http://msdn.microsoft.com/en-us/library/ms189573(v=sql.110).aspx ]]>
+The ideal number of pages in the Buffer Pool according the maximum memory granted to SQL Server.]]>
+
+
+
+
+
+Description:
+Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
+
+When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
+
+The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
+
+Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
+
Threshold:
+
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
+
Red: Page life expectancy is less than 5 minutes (300 seconds)
+
+
Next Steps:
+If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
+Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
+
+
Reference:
+
+
SQL Server, Access Methods Object
+
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+Number of active update transactions for the database.]]>
+Read/write throughput for backup/restore of a database.]]>
+KiloBytes bulk copied.]]>
+Number of temporary tables/table variables in use]]>
+Number of temporary tables/table variables created/sec]]>
+Number of temporary tables/table variables waiting to be destroyed by the cleanup system thread]]>
+Number of suboptimal query plans generated per second in the workload group.]]>
+Number of threads used by parallel queries in the workload group. Serial queries and the main thread of parallel queries are not included in this number.]]>
+Number of queries per second getting less than ideal amount of memory in the workload group.]]>
+Number of currently running requests in the workload group.]]>
+The total number of active transactions.]]>
+The size of the version store in KB.]]>
+The total number of errors that occur during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
+]]>
+The total number of errors that occur per second during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
+A number from 1-5 indicating the current memory state of the server.
+
+
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
+]]>
+Number of bytes the server requested to shrink.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
+Number of shrink notifications the server issued in the last second. Indicates how often the server believes it is under memory pressure.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
+Actual number of bytes sent per second over the network to the remote availability replica
+]]>
-
-
-
-
-
-Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
-
-Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
-Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider focusing on improving the disk subsystem.
-
-It is recommended to look for comparitive trends with other processes, work loads, error counts, and other behaviors to find what is driving Privileged Time.
-
-Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
-
-
-Threshold:
-
-Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
-
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
-
Next Steps:
-The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
-The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
-If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
-
-If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
-
-You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
-
-The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
-
-Reference:
-
-Monitoring CPU Usage
-http://msdn.microsoft.com/en-us/library/ms178072.aspx
-
Ask the Performance Team
-http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
-
Clint Huffman's Windows Troubleshooting in the Field Blog
-http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx
-]]>
-
-
-
-
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded records occur when a data record in a heap increases in size and the record's current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
-Access Methods Forwarded Records/sec measures the number of records accessed through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. Forwarding Records are used as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
-If a table has lots of forwarded records, scanning the table can be very inefficient.
-Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
-Forwarded Records only occurs on heaps which are tables without clustered indexes.
-
Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
-
Next Steps:
-
Look at code to determine where the short row is inserted followed by an update.
Forwarded records can be avoided by:
-
Reference:
-
SQL Server Storage Engine
-
http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
-
Forwarding and forwarded records, and the back-pointer size
-
http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
-
sys.dm_db_index_physical_stats (Transact-SQL)
-
http://msdn.microsoft.com/en-us/library/ms188917.aspx
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
-
Description:
-
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
-Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
-It is usually recommended that you physically order the table rows by using a clustered index on the table.
-FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
-
-***Also, a heap table requires an additional column called an uniquifier to be generated for each row inserted. It is usually recommended that you physically order the table rows by using a clustered on the table for most tables.
-
-FreeSpace Scans/sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to add a clustered index for base tables.
-One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
-
-
Threshold:
-
Yellow: A ratio (10%) or more than 1 freespace scan for every 10 Batch Requests/Sec
-
Next Steps:
-Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
-Reference:
-
PRB: Poor Performance on a Heap
-
http://support.microsoft.com/kb/297861
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
-]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
-
-This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
-
-Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also poor disk performance, and / or, high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
-
Next Steps:
-The main causes of high Full Scans/sec are:
-
Note: Identify disk bottlenecks by using Performance Counters, Profiler, sys.dm_io_virtual_file_stats and SHOWPLAN output.
-Also refer to the sys.dm_io_virtual_file_stats dynamic management view (DMV) to track io_stalls to help identify IO bottlenecks.
-To back up and support this information, compare the counters to sys.dm_os_wait_stats output. If you see high values in perfmon, you may also see high waits for the following:
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
-
-
-
-
-Description: The number of page splits per second that occurs as the result of overflowing index pages and new page allocations. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor and pad_index to leave more empty space per page.
-This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
-
Note: A high value for this counter is not bad in situations where many new pages are being created, since it includes all new page allocations as well as splits when a data page spilts.
-
Threshold:
-
Yellow: A ratio of more than 1 page split for every 20 batch requests
-
Next Steps:
-If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
-
-To track page splits more accurately see the following SQLSkills blog article from Jonathan Kehayias:
-http://www.sqlskills.com/blogs/jonathan/post/Tracking-Problematic-Pages-Splits-in-SQL-Server-2012-Extended-Events-e28093-No-Really-This-Time!.aspx
-]]>
-Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
-Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
-Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
-Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
-
-Note: This is an informative counter. It is not a critical counter that should be used for baselines or alerting.
-
Next Steps: You can correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
-High number of Scan Point Revalidations/sec potentially indicate hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
-Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
-
-
-
-Description:
-Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory. The Work files are similar to work tables but are created strictly by hashing operations. Workfiles are used to store temporary results for hash joins and hash aggregates.
-Hash joins can require large amounts of memory for execution. As part of executing a hash join, the memory required for the hash can become too large and require a spill to disk. The disk storage to backup the hash operation is called a workfile. Workfiles are collections of extents and pages that are managed strictly by the workfile code.
-
-Threshold:
-
Yellow: Greater than 20 Workfiles created per second
-
-Next Steps:
-Make queries more efficient by adding/changing indexes. Run expensive queries through the Database Tuning Advisor (DTA), look for expensive queries and consider rewriting them, and add as last resort consider adding additional memory.
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
-Working with tempdb in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
-Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx]]>
-
-
-
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors.
-
-
Threshold:
-
Yellow: Greater than 20 Worktables created per second. This will need to be baselined for accuracy.
-
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
-Ensure that TempDB is not a bottleneck and is following best practices.
-If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
-Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
-Divide TempDB into multiple data files of equal size. These multiple files don't necessarily need to be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for TempDB objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
-Use TF-1118. Under this trace flag SQL Server allocates full extents to each TempDB object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in TempDB. This trace flag has been available since SQL Server 2000. With improvements in TempDB object caching since SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached TempDB objects may not always be available. For example, cached TempDB objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
-
-Note:For each release of SQL Server, TempDB has more potential uses such as with SNAPSHOT ISOLATION level, temporary statistics use for read-only databases in SQL Server 2012 and more. It is recommended to keep a close watch on the usage of TempDB and leverage the TF1118 if the data file and sizing best practices do not address allocation bottlenecks.
-
-Additionally consider putting TempDB on local SSD disks in order to maximize disk performance.
-
Reference:
-
SQL Server, Access Methods Object
-
http://technet.microsoft.com/en-us/library/ms177426.aspx
-
Working with TempDB in SQL Server 2005
-
http://msdn.microsoft.com/en-us/library/cc966545.aspx
-
Troubleshooting Performance Problems in SQL Server 2008
-
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx ]]>
-
-
-
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
-When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
-
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
-
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
-1 millisecond = 1,000,000 nanoseconds
-The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
-There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Read aheads are an optimistic form of physical reads. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
-
-
Threshold:
-
Yellow: Less than 97 percent buffer cache hit ratio
-
Red: Less than 90 percent buffer cache hit ratio
-
Next Steps:
-
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
-
Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
-
-
-Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
-
-
-A value less than 640 (or 5 MB) may indicate physical memory pressure.
-
-
Threshold:
-
Yellow: Less than 640 Free Pages
-
Next Steps:
-Compare the Buffer Manager\Free pages counter to the following:
-
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-
Reference:
-
SQL Server, Access Methods Object
-
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
-
-
Description:
-The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
-Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
-
-
-Note: NUMA will increase the number of lazy writer threads per NUMA node and influence the behavior of the lazy writer by increasing its execution at this view. If the server is a NUMA environment other signs of memory pressure should be used and you should analyze the Buffer Node counters for Page Life Expectancy per node. There is not a lazy writer counter in Buffer Nodes.
-
-Threshold:
-
Red: Greater than 20 Lazy Writes per second
-
-
-
-
-Next Steps:
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
-
-
-Reference:
-
SQL Server, Access Methods Object
-
http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
-
-Configure SQL Server to Use Soft-NUMA
-http://msdn.microsoft.com/en-us/library/ms345357.aspx]]>
-
-
-
-
-
-Description:
-Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
-
-When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
-
-The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
-
-Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
-
Threshold:
-
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
-
Red: Page life expectancy is less than 5 minutes (300 seconds)
-
-
Next Steps:
-If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
-Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
-
-
Reference:
-
-
SQL Server, Access Methods Object
-
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-
-
-
Description:
-Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient or there was a large number of ad-hoc queries.
-
-
-Threshold:
-Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
-
Warning: Page life expectancy is less than 5 minutes (300 seconds)
-
Next Steps:
-Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
-Identify queries with the highest amount of logical I/O's and tune them.
-
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
-
Reference:
-
SQL Server, Buffer Manager Object
-http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 to 90 per second is normal, anything that is above indicates indexing or memory constraint.
-
Threshold:
-
Yellow: Page Reads/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description: Number of physical database page writes issued per second. 80 to 90 per second is normal. Anything above 90, it is recommended to check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are also relatively high then, this indicates a memory constraint.
-
-Threshold:
-
Yellow: Page Writes/sec > 90
-
Next Steps:
-Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
-If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
-Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
-Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
-
Reference:
-
SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-]]>
-
-
Description:
-
Login and logout rates should be approximately the same. A login rate higher than the logout rate suggests that the server is not in a steady state, or that applications are not correctly using connection pooling. This could result in an increased load on the server.
-
Next Steps:
-
Verify if the .NET connection string is using the pooling=true e connection reset=true parameters.
-If so, a profiler trace with the Audit login and Audit logout Events would reveal the usage of sp_reset_connection stored procedure, which is used by SQL Server to support remote stored procedure calls in a transaction.
-This stored procedure also causes Audit Login and Audit Logout events to fire when a connection is reused from a connection pool.
-Also, the EventSubClass column in the trace will show if the connections are being pooled or not.
-Therefore focus the comparison only on the rate of non-pooled Logins and Logouts, as pooled connections will be reflected in the Logins/sec counter, but not on the Logouts/sec counter.
-
Reference:
-
SQL Server 2012 Books Online: SQL Server: General Statistics Object
-
http://technet.microsoft.com/en-us/library/ms190697(v=sql.110).aspx
-
SQL Server Connection Pooling
-
http://msdn.microsoft.com/en-us/library/8xx3tyca.aspx
-
SQL Server 2012 Books Online: Audit Login Event Class
-
http://msdn.microsoft.com/en-us/library/ms190260(v=sql.110).aspx
-]]>
-
-
-
Description: Total number of logouts started per second. Greater than 2 per second indicates that the application is not correctly using connection pooling.]]>
-
-
Description: Number of users connected to the system. The number of users currently connected to the SQL Server. This should correlate with the Batch Requests per second counter.]]>
-
-
-
Description: Current number of processes waiting for a workspace memory grant. Memory Grants Pending records the number of connections that are waiting for memory before they can begin processing a memory intensive query such as a sort or hash operation. Connections that wait in this state for a long enough time will eventually receive an 8645 error (A time out occurred while waiting for memory resources to execute the query. Rerun the query). A spid waiting in this state will have a waittype of 0x0040 (RESOURCE_SEMAPHORE) in sysprocesses. If this counter remains above zero for any significant amount of time then you will need to track down what queries are doing sorts/hashes and run them through Database Tuning Advisor (DTA) to see if they can get a more efficient plan.
-
-
-Threshold:
-
Red: Numbers higher than 0 indicate a lack of memory.]]>
-
-
-
-Total amount of dynamic memory the server is willing to consume]]>
-
-
-Description Number of SQL batch requests received by server. This counter measures the number of batch requests that SQL Server receives per second, and generally follows in step to how busy your server's CPUs are. Generally speaking, over 1000 batch requests per second indicates a very busy SQL Server, and could mean that if you are not already experiencing a CPU bottleneck, that you may very well soon. Of course, this is a relative number, and the bigger your hardware, the more batch requests per second SQL Server can handle. From a network bottleneck approach, a typical 100Mbs network card is only able to handle about 3000 batch requests per second. If you have a server that is this busy, you may need to have two or more network cards, or go to a 1Gbs network card.
-
-Note: Sometimes low batch requests/sec can be misleading. If there were a SQL statements/sec counter, this would be a more accurate measure of the amount of SQL Server activity. For example, an application may call only a few stored procedures yet each stored procedure does lot of work. In that case, we will see a low number for batch requests/sec but each stored procedure (one batch) will execute many SQL statements that drive CPU and other resources. As a result, many counter thresholds based on the number of batch requests/sec will seem to identify issues because the batch requests on such a server are unusually low for the level of activity on the server.
-
-We cannot conclude that a SQL Server is not active simply by looking at only batch requests/sec. Rather, you have to do more investigation before deciding there is no load on the server. If the average number of batch requests/sec is below 5 and other counters (such as SQL Server processor utilization) confirm the absence of significant activity, then there is not enough of a load to make any recommendations or identify issues regarding scalability.
-
-Note: Batch requests / sec is a great counter to use for baselining and to use as a measurement of how many batches the system could handle before a sympton was evident or a particular condition occured. This counter will greatly depend on SQL Server code and the hardware being used. It is often used as a gauge of saying that a particular system was able to handle x number of batch requests per second and then to examine system and SQL Server counters to determine what resource is the bottlneck at that particular workload.]]>
-
-
-
-Description: Number of SQL compilations that occured per second that includes recompiles. A high value subtracting recompiles can be an indication of a large number of ad hoc queries that can also be cross referenced with the number of ad hoc plans in the plan cache counter.
-
-Be aware of the following:
-
-Reference
-SQL Server, Plan Cache Object
-http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
-
SQL Server Compilation Bottlenecks
-
http://blogs.msdn.com/grahamk/archive/2009/02/03/compilation-bottlenecks-error-8628-severity-17-state-0-part-1.aspx
-
-
-]]>
-
-
-
-Description: Number of SQL re-compiles per second that measures the number of times that a statement executed, but had to be compiled again before the statement completed. There are a variety of reasons that a recompile occured such as statistics being out of date, an column was added to a table a store procedure depends on, statement was run with a recompile option, etc. This counter needs to be as close to 0 as possible. A recompile can cause deadlocks and compile locks that are not compatible with any locking type.
-
-SQL Server Trace / Profiler provides an execellent way to find out exactly why recompiles are occuring in your environment.
-
-Troubleshooting stored procedure recompilation
http://support.microsoft.com/kb/243586
-How to identify the cause of recompilation in an SP:Recompile event
-http://support.microsoft.com/kb/308737]]>
-
-
-
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
-
-This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
-
-Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
-
Threshold:
-
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
-
Formula:
-(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
-
Next Steps:
-The main causes of high Full Scans/sec are:
-
Reference:
-
SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-
SQL Server 2005 Waits and Queues
-http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
-
Wait Types and Correlation to Other Performance Info
-http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
-
Description: Lock Requests/sec reports the number of new locks and lock conversions requested from the lock manager per second. A Lock Requests/sec greater than 500 when compared to Batch Request/sec indicates that batches are acquiring a large number of locks.
-This suggests inefficient queries and there is a risk is that blocking may occur.
-Threshold: (Yellow) - This value should not be greater than 50% of the number of Batch Requests/Sec
-
Next Steps:
-
Review high-read queries. In addition, examine the code to determine where to reduce the number of reads by either tuning your application or the database.
-
Reference:
-
SQL Server, Locks Object
-
http://msdn.microsoft.com/en-us/library/ms190216.aspx
-]]>
-
-
-
-
-Description: Number of new locks and lock conversions requested from the lock manager. This value should tie close to the number of Batch Requests per second. Values greaters than 1000 may indicate queries are pulling large volumes of data thereby accessing large numbers of rows
-
Reference:
-
SQL Server, Locks Object
-
http://msdn.microsoft.com/en-us/library/ms190216.aspx
-
-Threshold
-
-Yellow Greater than > 1000 Lock Requests / sec]]>
-
Description: Number of lock requests that could not be satisfied immediately and required the caller to wait before being granted the lock. This is a sign that there is some blocking occuring and would be a good baseline measurement of lock waits for load testing.
-
Note: Lock waits are not recorded by until after the lock event completes. For examining active blocking it is recommended to query sys.dm_os_waiting_tasks.
-
-Threshold
-Yellow Values greater than 0]]>
Recommendation: Look for peaks that approach or exceed 60 seconds.
-
Even though this counter counts how many total milliseconds SQL Server is waiting on locks over the last second, the counter actually records the lock wait time for a particular lock wait at the end of the locking event.
-
The following methods can be used to reduce lock contention and increase overall throughput:
-
-
-
-
-
-
-Description: Number of lock requests that timed out. This does not include requests for NOWAIT locks. A value greater than zero might indicate that user queries are not completing.
-
-Threshold
-Yellow Greater than 1
-]]>
-
-Description:
-Number of lock requests, per second, which resulted in a deadlock. Deadlocks are always an issue that should be resolved. A deadlock transaction that is killed must be rerun. It is recommended to use the SQL Trace deadlock graph, trace flag 1222, and the extended events deadlock capture to help identify and solve all of the deadlocks in your environment.
-
-Threshold
-Red Any Deadlocks greater than 0
-
-Resources
-
Bart Duncan Deadlock Resources
-Getting historical deadlock info using extended events
-http://www.sqlskills.com/BLOGS/PAUL/post/Getting-historical-deadlock-info-using-extended-events.aspx]]>
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
-
Reference:
-
Performance Tuning Waits and Queues
-
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
-]]>
-
-
-
-
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
-
Reference:
-
Performance Tuning Waits and Queues
-
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
-]]>
-
-
-
-
-
-Description:
-% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this counter.
-
-This counter measures the percentage of total processor time spent (user mode and kernel mode) on SQL Server process threads. If this counter stays at 80% for sustained periods of time, then you may also wish to investigate other Process (sqlservr) such as Private Bytes, Virtual Bytes, and Working Set to get a better understanding of how SQL Server allocates certain segments of memory.
-
-
-Threshold:
-
Red: SQL Server is using more than 30% user mode CPU usage
-
-
-Reference:
-
-Monitoring CPU Usage
-http://msdn.microsoft.com/en-us/library/ms178072.aspx
-
Ask the Performance Team
-http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
-
Clint Huffman's Windows Troubleshooting in the Field Blog
-http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
-
Description: Total number of processes per second that have successfully acquired a workspace memory grant. This counter should be used as a baseline for comparisons under load.]]>
-
Description: Total amount of memory granted to executing processes. This memory is used for hash, sort and create index operations.]]>
-
Description: Total amount of memory granted to executing processes. This memory is used primarily for hash, sort and create index operations.]]>
-
Description: The cumulative size of all the data files in the database.]]>
-
Description: Total number of log bytes flushed.]]>
-
Description: The cumulative size of all the log files in the database.]]>
-
Description: The cumulative used size of all the log files in the database.]]>
-
-
Description: Total wait time (milliseconds).]]>
-
-
Description: Number of commits waiting on log flush.]]>
-
Description: Number of log flushes.]]>
-
-
Description: Total number of log growths for this database.]]>
-
Description: Total number of log truncations for this database.]]>
-
-
Description: Total number of log shrinks for this database.]]>
-
-
Description: The percent of space in the log that is in use.]]>
-
Description: Number of auto-parameterization attempts.]]>
-
Description: Number of failed auto-parameterizations.]]>
-
Description: Number of safe auto-parameterizations.]]>
-
Description: Number of unsafe auto-parameterizations.]]>
-
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
-
Description: Number of completed requests per second in the workload group.]]>
-
Description: Number of requests waiting in the queue due to resource governor limits in the workload group.]]>
-
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
-
Description: Target amount of memory in kilobytes the resource pool is trying to attain based on the settings and server state.]]>
-
-
-
-
Description: Used amount of memory in kilobytes in the resource pool.
-
-
Threshold:
-
Yellow: A High ratio of Used to Target Resource Group memory
-The following table describes memory available to In-Memory OLTP database in a resource pool (named or default) before an OOM error is raised.
-
-Target Committed Memory Percent available for in-memory tables
-<= 8 GB 70%
-<= 16 GB 75%
-<= 32 GB 80%
-<= 96 GB 85%
->96 GB 90%
-]]>
-
-Description: Number of requests that had to wait for a free page.
-
-Free list stalls/sec is the frequency with which requests for available database pages are suspended because no buffers are available. Free list stall rates of greater than 2 per second indicate too little SQL memory available.
-
-
Reference
-Threshold
-Yellow - Free list stalls/sec > 2
-SQL Server, Buffer Manager Object
-
-http://technet.microsoft.com/en-us/library/ms189628.aspx
-]]>
-Description
-
-Number of pages, per second, flushed by checkpoint or other operations that require all dirty pages to be flushed. The checkpoint frequency can be due to low memory conditions as well as the recovery interval set by sp_configure.
-
-Reference
-
-SQL Server, Buffer Manager Object
-
http://msdn.microsoft.com/en-us/library/ms189628.aspx
-
-A SQL Server DBA myth a day: (15/30) checkpoint only writes pages from committed transactions
-
http://www.sqlskills.com/BLOGS/PAUL/category/Checkpoint.aspx
-
-Database Checkpoints (SQL Server)
-
-http://technet.microsoft.com/en-us/library/ms189573(v=sql.110).aspx]]>
-
-
-
Description: Number of pages read, in second, in anticipation of use which is an optimistic physical read. This number should not exceed 20% of total page reads.
-Threshold:
-
Yellow:Greater than 20% of Page Reads/sec
-
-http://technet.microsoft.com/en-us/library/ms189628.aspx]]>
-
-Feature usage since last SQL Server startup
-
-You can also examine performance counters through the sys.dm_os_performance_counters DMV. By using the perfmon counters for deprecation and the DMVs, you can help your application prepare and avoid issue when migrating to the future versions of SQL Server.
-
-SELECT * FROM sys.dm_os_performance_counters
-WHERE object_name LIKE '%Deprecated Features%'
AND cntr_value > 0
-ORDER BY cntr_value DESC
-
-SQL Server, Deprecated Features Object
-
-http://technet.microsoft.com/en-us/library/bb510662.aspx]]>
-
-Number of attentions per second. Attentions are the number of user cancels and query timeout that occured per second. A high number of attentions may indicate slow query performance as users are cancelling queries.]]>
-
-Number of errors/sec]]>
-Description Ratio between cache hits and lookups
-
-The Plan Cacheobject provides counters to monitor how SQL Server uses memory to store objects such as stored procedures, ad hoc and prepared Transact-SQL statements, and triggers. Multiple instances of the Plan Cache object can be monitored at the same time, with each instance representing a different type of plan to monitor.
-
Compiled Plan Stubs & Plan Cache Perf Counters:
-
-In SQL Server 2008 R2, there are three options that can help in dealing with plan cache pollution issues.
-
-Contributor(s):
-
-Reference:
-SQL Server, Plan Cache Object
-http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
-]]>
-The average amount of wait time (milliseconds) for each lock request that resulted in a wait. This wait could indicate excessive blocking that can be verified by querying sys.dm_os_waiting_tasks. Compare this counter to "Lock Waits/sec" and look for trends.
-
-Threshold
-Yellow Greater than >500 Average Wait Time.
]]>
-
-
Description: Percentage of work tables created where the initial two pages of the work table were not allocated but were immediately available from the work table cache.
-
-Since SQL Server 2005 worktable caching was improved. When a query execution plan is cached, the work tables needed by the plan are not dropped across multiple executions of the plan but merely truncated. In addition, the first nine pages for the work table are kept. In SQL Server 2000, the work tables used during query plan execution are dropped. Because the work table is cached, the next execution of the query is faster. When the system is low on memory, the execution plan may be removed from the cache and the associated work tables dropped as well. Both SQL Server 2000 and SQL Server 2005 use a small global pool of pre-allocated pages and extents that make the initial creation of work tables faster.
-
-Note: When a work table is dropped, two pages may remain allocated and they are returned to the work table cache. A value less than 90% may indicate insufficient memory, since execution plans are being dropped, or may indicate, on 32-bit systems, the need for an upgrade to a 64-bit system.
-
-
Threshold:
-
Yellow: Less than 90% Worktables from Cache Ratio. This will need to be baselined for accuracy.
-
-
Reference:
-SQL Server, Access Methods Object
-http://msdn.microsoft.com/en-us/library/ms177426(v=sql.110).aspx]]>
-
Description:
-The number of times locks on a table were escalated from page- or row-level to table-level. Frequent or even occasional spiking in this value may indicate poorly coded transactions.
-
-
-Lock Escalation Thresholds
-
-Lock escalation is triggered when lock escalation is not disabled on the table by using the ALTER TABLE SET LOCK_ESCALATION option, and when either of the following conditions exists:
-
-
-If locks cannot be escalated because of lock conflicts, the Database Engine periodically triggers lock escalation at every 1,250 new locks acquired.
-
-Next Steps
-
Reducing Locking and Escalation
-In most cases, the Database Engine delivers the best performance when operating with its default settings for locking and lock escalation. If an instance of the Database Engine generates a lot of locks and is seeing frequent lock escalations, consider reducing the amount of locking by:
-
Using an isolation level that does not generate shared locks for read operations.
-
-
-Note: Changing the isolation level affects all tables on the instance of the Database Engine.
-
-
-You can also use trace flags 1211 and 1224 to disable all or some lock escalations. For more information, see Trace Flags (Transact-SQL). Also, monitor lock escalation by using the SQL Server Profiler Lock:Escalation event; and see Using SQL Server Profiler.
-
-Reference:
-Lock Escalation (Database Engine) -
http://msdn.microsoft.com/en-us/library/ms184286(SQL.105).aspx
-
-]]>
-The free space in tempdb in KB.]]>
-The longest running time of any transcation in seconds. This counter could indicate a long running statement pulling large amounts of data that normally takes a long time to execute or potentially a blocking condition.]]>
-The total number of active non-snapshot transactions that generate version records. These are all of the non-snapshot isolation versions such as triggers and online indexing.
-
-Note: The sum of Update Snapshot Transactions and NonSnapshot Version Transactions represents the total number of transactions that participate in version generation. The difference of Snapshot Transactions and Update Snapshot Transactions reports the number of read-only snapshot transactions.
-
-Reference:
-http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
-http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
-Managing TempDB in SQL Server: TempDB Basics (Version Store: Why do we need it?)
-http://blogs.msdn.com/b/sqlserverstorageengine/archive/2008/12/22/managing-tempdb-in-sql-server-tempdb-basics-verison-store.aspx
-]]>
-The total number of active snapshot transactions.]]>
-Description
-
-The version cleanup rate in KB per seconds.
-
-Monitors the version cleanup rate in KBps in all version stores. If the version cleanup rate is lower than the version generation rate, the version store will use more and more space in tempdb. However, if the version cleanup rate is 0 but the version generation rate is not, there is probably a long-running transaction that is preventing the version store cleanup.
-
-Row versions are shared across sessions. The creator of the row version has no control over when the row version can be reclaimed. You will need to find and then possibly stop the longest-running transaction that is preventing the row version cleanup.
-
The following query returns the top two longest-running transactions that depend on the versions in the version store:
-
-select top 2
- transaction_id,
- transaction_sequence_num,
- elapsed_time_seconds
-from sys.dm_tran_active_snapshot_database_transactions
-order by elapsed_time_seconds DESC
-
-Reference
-
-Row Versioning Resource Usage
-http://msdn.microsoft.com/en-us/library/ms175492.aspx]]>
-Description: The version generation rate in KB per seconds.
-
-You can use the Version Generation Rate and Version Cleanup Rate counters to measure version store impact on TempDB. THe Version Generation Rate should not outpace the Cleanup Rate. Additionally, if your Version Cleanup Rate is 0, a long-running transaction could be preventing the version store cleanup. Incidentally, before generating an out-of-tempdb-space error, SQL Server 2008 makes a last-ditch attempt by forcing the version stores to shrink. During the shrink process, the longest-running transactions that have not yet generated any row versions are marked as victims. This frees up the version space used by them. Message 3967 is generated in the error log for each such victim transaction. If a transaction is marked as a victim, it can no longer read the row versions in the version store or create new ones. Message 3966 is generated and the transaction is rolled back when the victim transaction attempts to read row versions. If the shrink of the version store succeeds, more space is available in tempdb. Otherwise, tempdb runs out of space.
-
-If TempDB fills and runs out of space, writes will continue, butversions will not and reads will fail.
-
-Reference
-SQL Server, Transactions Object
-http://technet.microsoft.com/en-us/library/ms189038.aspx]]>
-
-
-
-Plan re-use is desirable for OLTP workloads because re-creating the same plan (for similar or identical transactions) is a waste of CPU resources.
-
To compute the plan re-use rate, compare SQL Server SQL Statistics: batch requests/sec to SQL compilations/sec.
-
Special exception to the plan re-use rule is that zero (or trivial) cost plans will not be cached (not re-used) in SQL 2005 SP2 and above.
-
Applications that use zero cost plans will have a lower plan re-use but this is not a performance issue, because it is cheaper to generate a new plan every time than to cache.
-
Reference:
-
Execution Plan Caching and Reuse
-
http://msdn.microsoft.com/en-us/library/ms181055.aspx
-
Top SQL Server 2005 Performance Issues for OLTP Applications
-
http://technet.microsoft.com/en-us/library/cc966401.aspx
-]]>
-
-Number of pages which are not from NUMA-local memory.
-
-When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
-
-If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
-
-Reference:
-http://msdn.microsoft.com/en-us/library/ms345597(v=sql.110).aspx]]>
-Maximum amount of memory in kilobytes the resource pool can have based on the settings and server state.]]>
Number of requests per second that failed to return a report from cache. Use this counter to find out whether the resources used for caching (disk or memory) are sufficient.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Number of reports that are currently active and being handled by the report server. Use this counter to evaluate caching strategy. There might be significantly more requests than reports generated.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Total number of cache misses against the in-memory cache after the service started. This counter resets when the application domain recycles.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Total number of reports that ran successfully after the service started. This counter resets when the application domain recycles.
-
-Performance Counters for the MSRS 2011 Windows Service Performance Object
-
-http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
-Number of running jobs. This counter can be used to find out if the current load on the system is potentially being driven from SQL Server Agent execution.]]>
-The number of Jobs that have failed to complete successfully for any reason since the last SQL Server Agent restart.]]>
-Percentage of successful jobs from the total number of executed jobs.
-
-]]>
-The number of Jobs that have successfully completed since the last SQL Server Agent restart.]]>
-Number of active steps.
-]]>
-The total number of times any Job Step execution is retried since the last SQL Server restart.]]>
-Number of AlwaysOn messages sent to this availability replica per second
-]]>
-Number of AlwaysOn messages resent in the last second
-
-
-]]>
-Number of AlwaysOn messages received from the replica per second
-]]>
-Bytes Received from Replica/sec: Number of bytes received from the availability replica per second
-]]>
-Number of bytes sent to the remote availability replica per second
-]]>
-Note: These counters are not defined by default and would be 0 unless configured through SQL Server through the sp_user_counter# stored procedures.]]>
-Number of milliseconds transaction termination waited for acknowledgement per second.
-
-The Replica:Transaction Delay counter measures the primary replica’s wait for acknowledgement that the transaction has committed at the secondary replica database in order to commit its own transaction. Since Asynchronous Commit Mode does not require acknowledgment to commit the transaction, this counter reports 0 when measured against a database in asynchronous commit mode.
-
-When there are multiple secondaries, this is a measure of the total time all transactions waited on the secondary acknowledgement.
-Note: This counter should be viewed on the Primary replica
-]]>
-The amount of log in kilobytes that need to be undone.
-
-Note: This counter should be viewed on the Secondary replica]]>
-Amount of log records redone on the secondary database in the last second.
-
-This counter can be compared to Log Bytes Received/Sec. If Log Bytes Received/Sec trends greater than Redone Bytes/Sec for sustained periods of time, then redo latency is building up between the primary and secondary replicas, which suggests that counter Redo Bytes Remaining and Recovery Queue is growing. This could indicate Redo is the bottleneck.
-
-To measure Recovery Time, divide Recovery Queue by Redone Bytes / Sec.
-
-Note: This counter should be viewed on the Secondary replica]]>
-The amount of log in kilobytes remaining to be redone to finish the reverting phase. If Redo Bytes Remaining counter is trending up, The redo process could be a bottleneck.
-
-Note: This counter should be viewed on the Secondary replica
-
]]>
-Number of times redo gets blocked in the last second]]>
-Amount of log records in the log files of the secondary replica that has not yet been redone.
-
-The Recovery Queue monitors the progress of the redo of flushed pages. If Recovery Queue is trending up, the redo process could be a bottleneck. For AlwaysON, the redo process is single threaded to ensure a consistent read for readable secondaries.
-
-Note: This counter should be viewed on the Secondary replica
-
]]>
-Amount of logs received by the availability replica for the database
-
-Note: This counter should be viewed on the Secondary replica
-]]>
-The amount of log in kilobytes remaining to finish the undo phase.
-
-Note: This counter should be viewed on the Secondary replica
-]]>
Amount of log records in the log files of the primary database, in kilobytes, that has not yet been sent to the secondary availability replica. This value is sent to the secondary availability replica from the primary availability replica.
-
Note: Queue size does not include FileStream files that are sent to a secondary.
-
The log send queue size at any point will give an indication approximately how much log has not been sent in KB. This is the amount of log secondary does not have at the time of failover and the amount of data loss that could be experienced. The log send queue size is also reported in DMV sys.dm_hadr_database_replica_states.log_send_queue_size column column in KB.
-
-Note: This counter should be viewed on the Secondary replica
-
-Reference:
-
http://technet.microsoft.com/en-us/library/ff877972.aspx
-
http://www.sqlskills.com/blogs/joe/answering-questions-with-the-alwayson-dashboard/
-
http://support.microsoft.com/kb/2857849]]>
Number of transactions that wrote to the mirrored database and waited for the log to be sent to the mirror in order to commit, in the last second.
-
-This counter is a measure of transactions that are waiting to be hardened to the primary because of Synchronous Availability Mode requiring that they harden at secondary also. When using Asynchronous availability mode this counter is 0.
-
Note: This counter should be viewed on the Primary replica
-
]]>
-Total amount of dynamic memory the server is using for query optimization]]>
-Amount of memory the server is currently using for the purposes other than the database pages.
-
-\SQLServer:Buffer Manager Stolen pages
-
-
Description: Number of pages used for miscellaneous server purposes (including procedure cache). This counter shows how many pages were taken from the buffer pool to accomodate non-buffer pool needs such as plan cache, procedure cache, the optimizer, workspace memory, etc. This counter should be baselined and can be analyzed by comparing this counter to the amount of buffer pool space and large requests that are hitting the SQL Server instance.
-
-
Note: DBCC MEMORYSTATUS can also be leveraged to examine the impact of stolen memory to the buffer pool.
-
Note: The lazywriter process is not permitted to flush Stolen buffers out of the buffer pool.
-
-
Reference:
-SQL Server, Buffer Manager Object
-
-http://technet.microsoft.com/en-us/library/ms189628(v=sql.105).aspx
-INF: Using DBCC MEMORYSTATUS to Monitor SQL Server Memory Usage
-
-http://support.microsoft.com/kb/271624]]>
-Ideal amount of memory the server is willing to consume]]>
-Total amount of dynamic memory the server is currently consuming]]>
-Amount of memory the server is using on this node for database pages.]]>
-Non NUMA-local amount of memory on this node.]]>
-Amount of memory the server is using on this node for the purposes other than database pages.]]>
-Ideal amount of memory for this node.]]>
-Total amount of memory the server has committed on this node.]]>
-Number of lookup requests from this node, which were satisfied from other nodes.
-
-Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
-Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
-
-References
-CoreInfo
-
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
-
Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
-
http://support.microsoft.com/kb/2806535
-]]>
-Number of lookup requests from this node, which were satisfied from this node.
-Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
-Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
-
-References
-CoreInfo
-
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
-Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
-
http://support.microsoft.com/kb/2806535
-]]>
-Database pages on node.
-
-SQL Server:Buffer Node
-
- http://technet.microsoft.com/en-us/library/ms345597.aspx
-]]>
-Number of pages flushed to enforce the recovery interval settings.
-
-When Indirect Checkpoints is enabled at the database level, you will notice a new background thread in sys.dm_exec_requests with the command token "RECOVERY WRITER". There is a single background writer for the SQL Server instance. The background writer performs aggressive flushing of dirty pages based on LSN order and reduces the redo phase recovery time.
-
-The catalog view sys.databases contains a column named target_recovery_time_in_seconds that indicates whether a specific database is using the new Indirect checkpoint algorithm. There is a new performance monitor counter called 'Background writer pages/sec' that exposes the amount of dirty pages processed by the background writer.
-
-SQL Server Books Online contains a discussion about Indirect Checkpoints and how it interacts with the recovery interval setting:
-
-
-Database Checkpoints (SQL Server)
-
-http://msdn.microsoft.com/en-us/library/ms189573(v=sql.110).aspx ]]>
-The ideal number of pages in the Buffer Pool according the maximum memory granted to SQL Server.]]>
-
-
-
-
-
-Description:
-Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
-
-When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
-
-The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
-
-Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
-
Threshold:
-
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
-
Red: Page life expectancy is less than 5 minutes (300 seconds)
-
-
Next Steps:
-If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
-Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
-Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
-Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
-
-
Reference:
-
-
SQL Server, Access Methods Object
-
-http://msdn.microsoft.com/en-us/library/ms177426.aspx
-]]>
-Number of active update transactions for the database.]]>
-Read/write throughput for backup/restore of a database.]]>
-KiloBytes bulk copied.]]>
-Number of temporary tables/table variables in use]]>
-Number of temporary tables/table variables created/sec]]>
-Number of temporary tables/table variables waiting to be destroyed by the cleanup system thread]]>
-Number of suboptimal query plans generated per second in the workload group.]]>
-Number of threads used by parallel queries in the workload group. Serial queries and the main thread of parallel queries are not included in this number.]]>
-Number of queries per second getting less than ideal amount of memory in the workload group.]]>
-Number of currently running requests in the workload group.]]>
-The total number of active transactions.]]>
-The size of the version store in KB.]]>
-The total number of errors that occur during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
-]]>
-The total number of errors that occur per second during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
-A number from 1-5 indicating the current memory state of the server.
-
-
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
-]]>
-Number of bytes the server requested to shrink.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
-Number of shrink notifications the server issued in the last second. Indicates how often the server believes it is under memory pressure.
-
-Performance Counters for the ReportServer:Service Performance Object
-
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
-Actual number of bytes sent per second over the network to the remote availability replica
-]]>
-
-
-Description: Number of buffer pool extension page reads/writes outstanding. In other words, this is the I/O queue length for the buffer pool extension file.
-
-
-Threshold:
-
Red: Numbers higher than 0 warrants more investigation on the I/O subsystem latencies. Latency on a disk hosting the Buffer Pool Extension file should be below 1ms.
-]]>
-
-
-
-
-Description: Average seconds a page will stay in the buffer pool extension without references.
-]]>
-
-
-Description: Total number of free cache pages in the buffer pool extension file.
-
-
Threshold:
-
Yellow: Less than 5120 Extension free Pages, or 40MB
-]]>
+
+
+
+
+
+Description: % Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.
+
+Privileged or kernel mode is the processing mode that allows code to have direct access to all hardware and memory in the system. I/O operations and other system services run in privileged (kernel) mode; user applications run in user mode. Unless the processes are graphics-intensive or I/O-intensive such as file and print services, most applications should not be processing much work in kernel mode.
+Privileged mode corresponds to the percentage of time the processor spends on execution of Microsoft Windows kernel commands, such as processing of SQL Server I/O requests. If this counter is consistently high when the Physical Disk counters are high, consider focusing on improving the disk subsystem.
+
+It is recommended to look for comparitive trends with other processes, work loads, error counts, and other behaviors to find what is driving Privileged Time.
+
+Note: Different disk controllers and drivers use different amounts of kernel processing time. Efficient controllers and drivers use less privileged time, leaving more processing time available for user applications, increasing overall throughput.
+
+
+Threshold:
+
+Yellow: SQL Server is using more than 20% Privileged (kernel) mode CPU usage
+
Red: SQL Server is using more than 30% Privileged (kernel) mode CPU usage
+
Next Steps:
+The key piece to diagnosing high processor conditions is to determine the ratio of privileged mode to user mode CPU.
+The counter '\Processor\% Processor Time' is the sum of '\Processor\% Privileged Time' and '\Processor\% User Time'. If Privileged Time is pushing the %Processor Time higher then it is due to processes executing in kernel mode. If '% User Time' is causing the % Processor Time to be higher then it is likely a user mode process that is causing the pressure.
+If %Privileged Time is consistently high or shows high under load, it could be several issues. The most common reason for high %Privileged Time is disk pressure which can be measured by correlating this counter with Physical Disk reads / sec and Physical Disk writes / sec. If these are also high you may also see a high number of Page Latch Waits for SQL Server which can be measured by examining the sys.dm_os_wait_stats dynamic management view and the perfmon SQL Server:Wait Statistics perfmon counters.
+
+If SQL Server Memory Manager: Page Life Expectancy is also low try to address by reducing the number of queries that are performing a high number of logical reads by adding indexes, ensuring that statistics are up to date, and potentially rewriting the query.
+
+You could add more physical RAM to help raise Page Life Expectancy if it is low (lower than your baseline, or critical when under 300) although we only recommend adding memory as an absolute last resort. We first recommended addressing design and addressing poor indexing first. Adding physical RAM only masks the real issue.
+
+The other potential reasons for high privileged mode are related to out of date drivers, BIOS being out of date, failing components, processes that run in kernel mode such as anti-virus, and other potential issues.
+
+Reference:
+
+Monitoring CPU Usage
+http://msdn.microsoft.com/en-us/library/ms178072.aspx
+
Ask the Performance Team
+http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
+
Clint Huffman's Windows Troubleshooting in the Field Blog
+http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx
+]]>
+
+
+
+
Description: Rows with varchar columns on tables without a clustered index can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded records occur when a data record in a heap increases in size and the record's current page does not have the space to store the size increase. The record is moved to a new location, becoming a forwarded record, and the forwarding record is left in the original location to point to the real location of the record. The forwarded record points back to the forwarding record in case its location ever needs to change again.
+Access Methods Forwarded Records/sec measures the number of records accessed through forwarded record pointers which are due to tables without a clustered index. A forwarded record is basically a pointer. For instance, if you start with a short row, and update the row creating a wider row, the row might not fit on the data page. A pointer is put in its location and the row is forwarded to another page. Forwarding Records are used as a performance optimization so that all the non-clustered indexes on the heap do not have to be altered with the new location of the heap record.
+If a table has lots of forwarded records, scanning the table can be very inefficient.
+Also, rows with varchar columns can experience expansion when varchar values are updated with a longer string. In the case where the row cannot fit in the existing page, the row migrates and access to the row will traverse a pointer.
+Forwarded Records only occurs on heaps which are tables without clustered indexes.
+
Threshold: (Yellow) - This value should not be greater than 10% of the number of Batch Requests/Sec
+
Next Steps:
+
Look at code to determine where the short row is inserted followed by an update.
Forwarded records can be avoided by:
+
Reference:
+
SQL Server Storage Engine
+
http://blogs.msdn.com/sqlserverstorageengine/archive/2006/09/19/761437.aspx
+
Forwarding and forwarded records, and the back-pointer size
+
http://www.sqlskills.com/BLOGS/PAUL/post/Forwarding-and-forwarded-records-and-the-back-pointer-size.aspx
+
sys.dm_db_index_physical_stats (Transact-SQL)
+
http://msdn.microsoft.com/en-us/library/ms188917.aspx
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
+
Description:
+
This counter represents inserts into a table with no physical ordering of the rows. A table with no ordering, without a clustered index, is known as a heap table. Inserts into heaps will require SQL Server to perform freespace scans to identify pages with free space to insert rows. A heap table also requires an additional, internal column called an uniquifier to be generated for each row inserted.
+Extra processing is required to define and store a heap table since SQL Server normally uses the clustered index as a storage mechanism for the table data. Freespace scans have an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when there are many connections inserting.
+It is usually recommended that you physically order the table rows by using a clustered index on the table.
+FreeSpace Scans/sec represents inserts into a table with no physical ordering of its rows which is called a heap. A heap table requires an additional column called an uniquifier to be generated for each row inserted. It is recommended that you physically order the table rows by using a clustered on the table for most tables.
+
+***Also, a heap table requires an additional column called an uniquifier to be generated for each row inserted. It is usually recommended that you physically order the table rows by using a clustered on the table for most tables.
+
+FreeSpace Scans/sec measures the number of scans per second that were initiated to search for free space within pages already allocated to an allocation unit to insert or modify record fragments. Each scan may find multiple pages. FreeSpace Scans are due to inserts into heaps that require SQL Server to perform freespace scans to identify pages with free space to insert rows. Freespace scans are an additional I/O expense for inserts and can possibly cause contention on the GAM, SGAM, and PFS pages when many spids are inserting. The solution is often to add a clustered index for base tables.
+One or more of the following symptoms may accompany poor performance during inserts to a large table on SQL Server:
+
+
Threshold:
+
Yellow: A ratio (10%) or more than 1 freespace scan for every 10 Batch Requests/Sec
+
Next Steps:
+Microsoft recommends that you add a clustered index to the table and test the effect of the clustered index on performance.
+Reference:
+
PRB: Poor Performance on a Heap
+
http://support.microsoft.com/kb/297861
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
+]]>
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
+
+This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
+
+Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also poor disk performance, and / or, high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
+
Next Steps:
+The main causes of high Full Scans/sec are:
+
Note: Identify disk bottlenecks by using Performance Counters, Profiler, sys.dm_io_virtual_file_stats and SHOWPLAN output.
+Also refer to the sys.dm_io_virtual_file_stats dynamic management view (DMV) to track io_stalls to help identify IO bottlenecks.
+To back up and support this information, compare the counters to sys.dm_os_wait_stats output. If you see high values in perfmon, you may also see high waits for the following:
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
+
+
+
+
+Description: The number of page splits per second that occurs as the result of overflowing index pages and new page allocations. When a record is inserted into an index, it must be inserted in order. If the data page is full, the page splits in order to maintain the appropriate order. A high value for this counter may warrant the consideration of a lower fill factor and pad_index to leave more empty space per page.
+This value should be as low as possible. Heavily fragmented indexes may be the result of high page splits/sec.
+
Note: A high value for this counter is not bad in situations where many new pages are being created, since it includes all new page allocations as well as splits when a data page spilts.
+
Threshold:
+
Yellow: A ratio of more than 1 page split for every 20 batch requests
+
Next Steps:
+If the number of page splits is high, consider increasing the fillfactor of your indexes. An increased fillfactor helps to reduce page splits by increasing the amount of free space on each page.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426(v=SQL.105).aspx
+
+To track page splits more accurately see the following SQLSkills blog article from Jonathan Kehayias:
+http://www.sqlskills.com/blogs/jonathan/post/Tracking-Problematic-Pages-Splits-in-SQL-Server-2012-Extended-Events-e28093-No-Really-This-Time!.aspx
+]]>
+Description: Scan Point Revalidations occurs during range scans. When a range scan occurs there is an optimization process that occurs where the pages are marked as satisfied with the WHERE predicate that does the range scan.
+Instead of scanning through each and every row in the page, it does not keep an exclusive lock on those pages; instead it just keeps a mark on it and continues with rest of the scan. If one or more rows in the page are modified by update or a delete operation, the update or delete process will notify the scan to recheck the page to see if the page is still valid for the range scan. This recheck is called a Scan Point Revalidation.
+Scan Point Revalidations shows the contention between range scans and modifications to the same pages. This counter also pinpoints hotspots within the cluster table competing between reads and writes.
+Scan Point Revalidations are the number of times per second that the scan point had to be revalidated before the scan could be continued. If a page latch has to be released due to contention, the scan point must be revalidated when the scan resumes.
+
+Note: This is an informative counter. It is not a critical counter that should be used for baselines or alerting.
+
Next Steps: You can correlate the Scan Count Revalidations/sec with the Range Scans/sec counter and Page Latch related counters. The higher the number of range scans on the same pages, the higher the number of scan point revalidations.
+High number of Scan Point Revalidations/sec potentially indicate hot spots in the data, probably due to a poor choice of clustered index putting the most active rows on the same page.
+Consider reducing the number of range scans, isolating reporting and application use, and most importantly ensuring that the clustered index choice is the right one. Clustered indexes should be on columns that are sorted on, grouped on, used in joins, used in between queries, and in other operations where the order of the returned data is critical.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
+
+
+
+Description:
+Number of Workfiles created in the last second. Workfiles in TempDB are used in processing hash operations when the amount of data being processed is too big to fit into the available memory. The Work files are similar to work tables but are created strictly by hashing operations. Workfiles are used to store temporary results for hash joins and hash aggregates.
+Hash joins can require large amounts of memory for execution. As part of executing a hash join, the memory required for the hash can become too large and require a spill to disk. The disk storage to backup the hash operation is called a workfile. Workfiles are collections of extents and pages that are managed strictly by the workfile code.
+
+Threshold:
+
Yellow: Greater than 20 Workfiles created per second
+
+Next Steps:
+Make queries more efficient by adding/changing indexes. Run expensive queries through the Database Tuning Advisor (DTA), look for expensive queries and consider rewriting them, and add as last resort consider adding additional memory.
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
+Working with tempdb in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
+Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx]]>
+
+
+
Description: Number of worktables created in the last second. The number of work tables created per second. Work tables are temporary objects and are used to store results for query spool, LOB variables, and cursors.
+
+
Threshold:
+
Yellow: Greater than 20 Worktables created per second. This will need to be baselined for accuracy.
+
Next Steps:
Look for expensive statements with high CPU, duration, and statements that run in parallel and tune them by adding indexes, reducing the volume of data being returned, and adding indexes where appropriate.
+Ensure that TempDB is not a bottleneck and is following best practices.
+If you determine that the throughput of your application has degraded because of contention in allocation structures, you can use the following techniques to minimize it.
+Evaluate your application and the query plans to see if you can minimize the creation of work tables and temporary tables. Monitor the perfmon counters as described in Monitoring contention caused by DML operations. Then, use SQL Profiler to correlate the values of these counters with the currently running queries. This helps you identify the queries that are causing the contention in allocation structures.
+Divide TempDB into multiple data files of equal size. These multiple files don't necessarily need to be on different disks/spindles unless you are also encountering I/O bottlenecks as well. The general recommendation is to have one file per CPU because only one thread is active per CPU at one time. SQL Server allocates pages for TempDB objects in a round-robin fashion (also referred to as proportional fill) so that the latches on PFS and SGAM pages are distributed among multiple files. This is supported both in SQL Server 2000 and SQL Server 2005. There are improvements to the proportional fill algorithm in SQL Server 2005.
+Use TF-1118. Under this trace flag SQL Server allocates full extents to each TempDB object, thereby eliminating the contention on SGAM page. This is done at the expense of some waste of disk space in TempDB. This trace flag has been available since SQL Server 2000. With improvements in TempDB object caching since SQL Server 2005, there should be significantly less contention in allocation structures. If you see contention in SGAM pages, you may want to use this trace flag. Cached TempDB objects may not always be available. For example, cached TempDB objects are destroyed when the query plan with which they are associated is recompiled or removed from the procedure cache.
+
+Note:For each release of SQL Server, TempDB has more potential uses such as with SNAPSHOT ISOLATION level, temporary statistics use for read-only databases in SQL Server 2012 and more. It is recommended to keep a close watch on the usage of TempDB and leverage the TF1118 if the data file and sizing best practices do not address allocation bottlenecks.
+
+Additionally consider putting TempDB on local SSD disks in order to maximize disk performance.
+
Reference:
+
SQL Server, Access Methods Object
+
http://technet.microsoft.com/en-us/library/ms177426.aspx
+
Working with TempDB in SQL Server 2005
+
http://msdn.microsoft.com/en-us/library/cc966545.aspx
+
Troubleshooting Performance Problems in SQL Server 2008
+
http://download.microsoft.com/download/D/B/D/DBDE7972-1EB9-470A-BA18-58849DB3EB3B/TShootPerfProbs2008.docx ]]>
+
+
+
Description: The Buffer Cache Hit Ratio measures the percentage of pages that were found in the buffer pool without having to incur a read from disk. This counter indicates how often SQL Server goes to the buffer, not the hard disk, to get data. The higher this ratio, the better. A high ratio, close to 100% indicates that SQL Server did not have to go to the hard disk often to fetch data, and performance overall is boosted. If the Buffer Cache Hit Ratio was 100% that would suggest that all of the pages are being accessed from cache and does not require trips to disk, because of the optimistic read ahead mechanism, this is not exactly the case.
+When a user session wants to read data from the database, it will read directly from the SQL Server buffer cache (a logical read), or, if the buffer cache does not have the data that is requested, the data will be read into the buffer cache from disk (a physical read) and then from the buffer cache. If the requested data is in the buffer cache, then it is called a 'buffer hit'. If the data is not in the buffer cache it is called a 'buffer miss'. The ratio of buffer hits to total buffer requests is called the buffer cache hit ratio as can be seen from the following:
+
Cache Hit Ratio = (Logical Reads - Physical Reads)/Logical Reads
+
A read from memory takes approximately 100 nanoseconds, while a read from disk takes about 8 milliseconds or more.
+1 millisecond = 1,000,000 nanoseconds
+The important point about SQL Server read operations is that when selecting data from the database, the user will wait on the complete read operation including all of the physical reads. The time is takes to select from the database depends on how much data will be read and how long it takes for those reads to occur. Even with cache reads, the time it takes to read a large amount of data can be significant. With physical reads, the time will be even longer.
+There are a few considerations to be aware of regarding the Buffer Cache Hit Ratio counter. First, unlike many of the other counters available for monitoring SQL Server, this counter averages the Buffer Cache Hit Ratio from the time the instance of SQL Server was started. In other words, this counter is not a real-time measurement, but an average. Secondly, the buffer cache hit ratio may be skewed by the read ahead mechanism. Read Ahead Reads are pages that were read into cache while the query was processed. Read aheads are an optimistic form of physical reads. Because of the read ahead mechanism, you should not infer from a high buffer cache hit ratio that SQL Server is not suffering from memory pressure or at least could not benefit from additional memory.
+
+
Threshold:
+
Yellow: Less than 97 percent buffer cache hit ratio
+
Red: Less than 90 percent buffer cache hit ratio
+
Next Steps:
+
Run expensive queries through the Database Tuning Advisor (DTA), add additional memory, and look for queries with a high number of logical reads and consider tuning and potentially rewriting them.
+
Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
+
+
+Description: Total number of pages on all free lists. The more free pages that are available then the less often the lazy writer will have to fire keeping pages in the buffer pool longer.
+
+
+A value less than 640 (or 5 MB) may indicate physical memory pressure.
+
+
Threshold:
+
Yellow: Less than 640 Free Pages
+
Next Steps:
+Compare the Buffer Manager\Free pages counter to the following:
+
The higher the Buffer Manager\Free pages then the higher the Buffer Manager\Page Life Expectancy should be. If Buffer Manager\Free pages is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+
Reference:
+
SQL Server, Access Methods Object
+
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx]]>
+
+
Description:
+The Lazy Writes/sec counter records the number of buffers written each second by the buffer manager's lazy write process. This counter tracks how many times a second that the Lazy Writer process is moving dirty pages from the buffer to disk in order to free up buffer space. This process is where the dirty, aged buffers are removed from the buffer by a system process that frees the memory up for other uses. A dirty, aged buffer is one that has changes and needs to be written to the disk. High value on this counter possibly indicates I/O issues or even SQL Server memory problems. The Lazy writes / sec values should consistently be less than 20 for the average system.
+Generally speaking, this should not be a high value, say more than 20 per second or so. Ideally, it should be close to zero. If it is zero, this indicates that your SQL Server's buffer cache is plenty big and SQL Server doesn't have to free up dirty pages, instead waiting for this to occur during regular checkpoints. If this value is high, then a need for more memory is indicated.
+
+
+Note: NUMA will increase the number of lazy writer threads per NUMA node and influence the behavior of the lazy writer by increasing its execution at this view. If the server is a NUMA environment other signs of memory pressure should be used and you should analyze the Buffer Node counters for Page Life Expectancy per node. There is not a lazy writer counter in Buffer Nodes.
+
+Threshold:
+
Red: Greater than 20 Lazy Writes per second
+
+
+
+
+Next Steps:
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 starts to flush pages out of the buffer pool cache under memory pressure.
+
+
+Reference:
+
SQL Server, Access Methods Object
+
http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
+
+Configure SQL Server to Use Soft-NUMA
+http://msdn.microsoft.com/en-us/library/ms345357.aspx]]>
+
+
+
+
+
+Description:
+Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
+
+When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
+
+The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
+
+Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
+
Threshold:
+
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
+
Red: Page life expectancy is less than 5 minutes (300 seconds)
+
+
Next Steps:
+If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because since SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
+Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
+
+
Reference:
+
+
SQL Server, Access Methods Object
+
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+
+
+
Description:
+Number of requests to find a page in the buffer pool. When the ratio of page lookups to batch requests is greater than 100, this is an indication that while query plans are looking up data in the buffer pool, these plans are inefficient or there was a large number of ad-hoc queries.
+
+
+Threshold:
+Ratio of Page Lookups/sec to Batch Requests/sec < 100 to 1.
+
Warning: Page life expectancy is less than 5 minutes (300 seconds)
+
Next Steps:
+Page Lookups/sec is the number of requests to find a page in the buffer pool made per second. If this number is high as compared to the number of batch requests, this indicates a degree of inefficiency and a potential opportunity for tuning.
+Identify queries with the highest amount of logical I/O's and tune them.
+
Note: You can track the Page Lookups/sec and other counters through the sys.dm_os_performance_counters DMV which contains all the SQL Server instance object-related counters that you can find in perfmon.
+
Reference:
+
SQL Server, Buffer Manager Object
+http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page reads issued per second. Number of physical database page reads issued. 80 to 90 per second is normal, anything that is above indicates indexing or memory constraint.
+
Threshold:
+
Yellow: Page Reads/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description: Number of physical database page writes issued per second. 80 to 90 per second is normal. Anything above 90, it is recommended to check the lazy writer/sec and Checkpoint pages/sec counter, if these counters are also relatively high then, this indicates a memory constraint.
+
+Threshold:
+
Yellow: Page Writes/sec > 90
+
Next Steps:
+Attempt to tune the application so that fewer I/O operations are required. For example, perhaps I/O would be reduced if there were appropriate indexes or if the database design were denormalized.
+If the applications cannot be tuned, you will need to acquire disk devices with more capacity.
+Compare to the Memory: Pages/sec counter to see if there is paging while the SQL Server:Buffer Manager\Page reads/sec is high.
+Note: Before adjusting the fill factor, at a database level compare the SQL Server:Buffer Manager\Page reads/sec counter to the SQL Server:Buffer Manager\Page writes/sec counter, and use the fill factor option only if writes are a substantial fraction of reads (greater than 30 percent).
+
Reference:
+
SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+]]>
+
+
Description:
+
Login and logout rates should be approximately the same. A login rate higher than the logout rate suggests that the server is not in a steady state, or that applications are not correctly using connection pooling. This could result in an increased load on the server.
+
Next Steps:
+
Verify if the .NET connection string is using the pooling=true e connection reset=true parameters.
+If so, a profiler trace with the Audit login and Audit logout Events would reveal the usage of sp_reset_connection stored procedure, which is used by SQL Server to support remote stored procedure calls in a transaction.
+This stored procedure also causes Audit Login and Audit Logout events to fire when a connection is reused from a connection pool.
+Also, the EventSubClass column in the trace will show if the connections are being pooled or not.
+Therefore focus the comparison only on the rate of non-pooled Logins and Logouts, as pooled connections will be reflected in the Logins/sec counter, but not on the Logouts/sec counter.
+
Reference:
+
SQL Server 2012 Books Online: SQL Server: General Statistics Object
+
http://technet.microsoft.com/en-us/library/ms190697(v=sql.110).aspx
+
SQL Server Connection Pooling
+
http://msdn.microsoft.com/en-us/library/8xx3tyca.aspx
+
SQL Server 2012 Books Online: Audit Login Event Class
+
http://msdn.microsoft.com/en-us/library/ms190260(v=sql.110).aspx
+]]>
+
+
+
Description: Total number of logouts started per second. Greater than 2 per second indicates that the application is not correctly using connection pooling.]]>
+
+
Description: Number of users connected to the system. The number of users currently connected to the SQL Server. This should correlate with the Batch Requests per second counter.]]>
+
+
+
Description: Current number of processes waiting for a workspace memory grant. Memory Grants Pending records the number of connections that are waiting for memory before they can begin processing a memory intensive query such as a sort or hash operation. Connections that wait in this state for a long enough time will eventually receive an 8645 error (A time out occurred while waiting for memory resources to execute the query. Rerun the query). A spid waiting in this state will have a waittype of 0x0040 (RESOURCE_SEMAPHORE) in sysprocesses. If this counter remains above zero for any significant amount of time then you will need to track down what queries are doing sorts/hashes and run them through Database Tuning Advisor (DTA) to see if they can get a more efficient plan.
+
+
+Threshold:
+
Red: Numbers higher than 0 indicate a lack of memory.]]>
+
+
+
+Total amount of dynamic memory the server is willing to consume]]>
+
+
+Description Number of SQL batch requests received by server. This counter measures the number of batch requests that SQL Server receives per second, and generally follows in step to how busy your server's CPUs are. Generally speaking, over 1000 batch requests per second indicates a very busy SQL Server, and could mean that if you are not already experiencing a CPU bottleneck, that you may very well soon. Of course, this is a relative number, and the bigger your hardware, the more batch requests per second SQL Server can handle. From a network bottleneck approach, a typical 100Mbs network card is only able to handle about 3000 batch requests per second. If you have a server that is this busy, you may need to have two or more network cards, or go to a 1Gbs network card.
+
+Note: Sometimes low batch requests/sec can be misleading. If there were a SQL statements/sec counter, this would be a more accurate measure of the amount of SQL Server activity. For example, an application may call only a few stored procedures yet each stored procedure does lot of work. In that case, we will see a low number for batch requests/sec but each stored procedure (one batch) will execute many SQL statements that drive CPU and other resources. As a result, many counter thresholds based on the number of batch requests/sec will seem to identify issues because the batch requests on such a server are unusually low for the level of activity on the server.
+
+We cannot conclude that a SQL Server is not active simply by looking at only batch requests/sec. Rather, you have to do more investigation before deciding there is no load on the server. If the average number of batch requests/sec is below 5 and other counters (such as SQL Server processor utilization) confirm the absence of significant activity, then there is not enough of a load to make any recommendations or identify issues regarding scalability.
+
+Note: Batch requests / sec is a great counter to use for baselining and to use as a measurement of how many batches the system could handle before a sympton was evident or a particular condition occured. This counter will greatly depend on SQL Server code and the hardware being used. It is often used as a gauge of saying that a particular system was able to handle x number of batch requests per second and then to examine system and SQL Server counters to determine what resource is the bottlneck at that particular workload.]]>
+
+
+
+Description: Number of SQL compilations that occured per second that includes recompiles. A high value subtracting recompiles can be an indication of a large number of ad hoc queries that can also be cross referenced with the number of ad hoc plans in the plan cache counter.
+
+Be aware of the following:
+
+Reference
+SQL Server, Plan Cache Object
+http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
+
SQL Server Compilation Bottlenecks
+
http://blogs.msdn.com/grahamk/archive/2009/02/03/compilation-bottlenecks-error-8628-severity-17-state-0-part-1.aspx
+
+
+]]>
+
+
+
+Description: Number of SQL re-compiles per second that measures the number of times that a statement executed, but had to be compiled again before the statement completed. There are a variety of reasons that a recompile occured such as statistics being out of date, an column was added to a table a store procedure depends on, statement was run with a recompile option, etc. This counter needs to be as close to 0 as possible. A recompile can cause deadlocks and compile locks that are not compatible with any locking type.
+
+SQL Server Trace / Profiler provides an execellent way to find out exactly why recompiles are occuring in your environment.
+
+Troubleshooting stored procedure recompilation
http://support.microsoft.com/kb/243586
+How to identify the cause of recompilation in an SP:Recompile event
+http://support.microsoft.com/kb/308737]]>
+
+
+
Description: This counter monitors the number of full scans on base tables or indexes. High values indicate that we may be having performance issues due to table / index page scans. If we see high CPU and / or drops in Page Life Expectancy (PLE) then we need to investigate this counter; however, if full scans are on small tables we can safely ignore this counter as this counter tracks all full table scans, not just those on large tables. A few of the main causes of high Full Scans/sec are missing indexes, too many rows requested, queries with missing indexes, or too many rows requested will have a large number of logical reads and an increased CPU time.
+
+This analysis throws a Warning alert if the ratio of Index Searches/sec to Full Scans/sec is less than 1000 to 1 and if there are more than 1000 Index Searches/sec.
+
+Note: This counter monitors the number of full scans on tables or indexes. This counter can be ignored unless there is also high CPU use along with high scan rates. High scan rates may be caused by missing indexes, very small tables, or requests for too many records.
+
Threshold:
+
Yellow: A ratio of more than 1 full scan for every 1000 index searches. The value of Index Searches/sec and Full Scans/sec should be greater than 1000.
+
Formula:
+(AvgSQLServerAccessMethodsIndexSearchessecAll / AvgSQLServerAccessMethods_FullScanssec) < 1000
+
Next Steps:
+The main causes of high Full Scans/sec are:
+
Reference:
+
SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+
SQL Server 2005 Waits and Queues
+http://download.microsoft.com/download/4/7/a/47a548b9-249e-484c-abd7-29f31282b04d/Performance_Tuning_Waits_Queues.doc
+
Wait Types and Correlation to Other Performance Info
+http://www.sqlmag.com/Files/09/40925/Webtable_01.doc]]>
+
Description: Lock Requests/sec reports the number of new locks and lock conversions requested from the lock manager per second. A Lock Requests/sec greater than 500 when compared to Batch Request/sec indicates that batches are acquiring a large number of locks.
+This suggests inefficient queries and there is a risk is that blocking may occur.
+Threshold: (Yellow) - This value should not be greater than 50% of the number of Batch Requests/Sec
+
Next Steps:
+
Review high-read queries. In addition, examine the code to determine where to reduce the number of reads by either tuning your application or the database.
+
Reference:
+
SQL Server, Locks Object
+
http://msdn.microsoft.com/en-us/library/ms190216.aspx
+]]>
+
+
+
+
+Description: Number of new locks and lock conversions requested from the lock manager. This value should tie close to the number of Batch Requests per second. Values greaters than 1000 may indicate queries are pulling large volumes of data thereby accessing large numbers of rows
+
Reference:
+
SQL Server, Locks Object
+
http://msdn.microsoft.com/en-us/library/ms190216.aspx
+
+Threshold
+
+Yellow Greater than > 1000 Lock Requests / sec]]>
+
Description: Number of lock requests that could not be satisfied immediately and required the caller to wait before being granted the lock. This is a sign that there is some blocking occuring and would be a good baseline measurement of lock waits for load testing.
+
Note: Lock waits are not recorded by until after the lock event completes. For examining active blocking it is recommended to query sys.dm_os_waiting_tasks.
+
+Threshold
+Yellow Values greater than 0]]>
Recommendation: Look for peaks that approach or exceed 60 seconds.
+
Even though this counter counts how many total milliseconds SQL Server is waiting on locks over the last second, the counter actually records the lock wait time for a particular lock wait at the end of the locking event.
+
The following methods can be used to reduce lock contention and increase overall throughput:
+
+
+
+
+
+
+Description: Number of lock requests that timed out. This does not include requests for NOWAIT locks. A value greater than zero might indicate that user queries are not completing.
+
+Threshold
+Yellow Greater than 1
+]]>
+
+Description:
+Number of lock requests, per second, which resulted in a deadlock. Deadlocks are always an issue that should be resolved. A deadlock transaction that is killed must be rerun. It is recommended to use the SQL Trace deadlock graph, trace flag 1222, and the extended events deadlock capture to help identify and solve all of the deadlocks in your environment.
+
+Threshold
+Red Any Deadlocks greater than 0
+
+Resources
+
Bart Duncan Deadlock Resources
+Getting historical deadlock info using extended events
+http://www.sqlskills.com/BLOGS/PAUL/post/Getting-historical-deadlock-info-using-extended-events.aspx]]>
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
+
Reference:
+
Performance Tuning Waits and Queues
+
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
+]]>
+
+
+
+
Recommendation: Review the wait statistics on the server to find the top resources that the SQL Server is waiting on.
+
Reference:
+
Performance Tuning Waits and Queues
+
http://www.microsoft.com/technet/prodtechnol/sql/bestpractice/performance_tuning_waits_queues.mspx
+]]>
+
+
+
+
+
+Description:
+% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this counter.
+
+This counter measures the percentage of total processor time spent (user mode and kernel mode) on SQL Server process threads. If this counter stays at 80% for sustained periods of time, then you may also wish to investigate other Process (sqlservr) such as Private Bytes, Virtual Bytes, and Working Set to get a better understanding of how SQL Server allocates certain segments of memory.
+
+
+Threshold:
+
Red: SQL Server is using more than 30% user mode CPU usage
+
+
+Reference:
+
+Monitoring CPU Usage
+http://msdn.microsoft.com/en-us/library/ms178072.aspx
+
Ask the Performance Team
+http://blogs.technet.com/askperf/archive/2008/01/18/do-you-know-where-your-processor-spends-its-time.aspx
+
Clint Huffman's Windows Troubleshooting in the Field Blog
+http://blogs.technet.com/clinth/archive/2009/10/28/the-case-of-the-2-million-context-switches.aspx]]>
+
Description: Total number of processes per second that have successfully acquired a workspace memory grant. This counter should be used as a baseline for comparisons under load.]]>
+
Description: Total amount of memory granted to executing processes. This memory is used for hash, sort and create index operations.]]>
+
Description: Total amount of memory granted to executing processes. This memory is used primarily for hash, sort and create index operations.]]>
+
Description: The cumulative size of all the data files in the database.]]>
+
Description: Total number of log bytes flushed.]]>
+
Description: The cumulative size of all the log files in the database.]]>
+
Description: The cumulative used size of all the log files in the database.]]>
+
+
Description: Total wait time (milliseconds).]]>
+
+
Description: Number of commits waiting on log flush.]]>
+
Description: Number of log flushes.]]>
+
+
Description: Total number of log growths for this database.]]>
+
Description: Total number of log truncations for this database.]]>
+
+
Description: Total number of log shrinks for this database.]]>
+
+
Description: The percent of space in the log that is in use.]]>
+
Description: Number of auto-parameterization attempts.]]>
+
Description: Number of failed auto-parameterizations.]]>
+
Description: Number of safe auto-parameterizations.]]>
+
Description: Number of unsafe auto-parameterizations.]]>
+
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
+
Description: Number of completed requests per second in the workload group.]]>
+
Description: Number of requests waiting in the queue due to resource governor limits in the workload group.]]>
+
Description: System CPU usage by all requests in the specified instance of the performance object.]]>
+
Description: Target amount of memory in kilobytes the resource pool is trying to attain based on the settings and server state.]]>
+
+
+
+
Description: Used amount of memory in kilobytes in the resource pool.
+
+
Threshold:
+
Yellow: A High ratio of Used to Target Resource Group memory
+The following table describes memory available to In-Memory OLTP database in a resource pool (named or default) before an OOM error is raised.
+
+Target Committed Memory Percent available for in-memory tables
+<= 8 GB 70%
+<= 16 GB 75%
+<= 32 GB 80%
+<= 96 GB 85%
+>96 GB 90%
+]]>
+
+Description: Number of requests that had to wait for a free page.
+
+Free list stalls/sec is the frequency with which requests for available database pages are suspended because no buffers are available. Free list stall rates of greater than 2 per second indicate too little SQL memory available.
+
+
Reference
+Threshold
+Yellow - Free list stalls/sec > 2
+SQL Server, Buffer Manager Object
+
+http://technet.microsoft.com/en-us/library/ms189628.aspx
+]]>
+Description
+
+Number of pages, per second, flushed by checkpoint or other operations that require all dirty pages to be flushed. The checkpoint frequency can be due to low memory conditions as well as the recovery interval set by sp_configure.
+
+Reference
+
+SQL Server, Buffer Manager Object
+
http://msdn.microsoft.com/en-us/library/ms189628.aspx
+
+A SQL Server DBA myth a day: (15/30) checkpoint only writes pages from committed transactions
+
http://www.sqlskills.com/BLOGS/PAUL/category/Checkpoint.aspx
+
+Database Checkpoints (SQL Server)
+
+http://technet.microsoft.com/en-us/library/ms189573(v=sql.110).aspx]]>
+
+
+
Description: Number of pages read, in second, in anticipation of use which is an optimistic physical read. This number should not exceed 20% of total page reads.
+Threshold:
+
Yellow:Greater than 20% of Page Reads/sec
+
+http://technet.microsoft.com/en-us/library/ms189628.aspx]]>
+
+Feature usage since last SQL Server startup
+
+You can also examine performance counters through the sys.dm_os_performance_counters DMV. By using the perfmon counters for deprecation and the DMVs, you can help your application prepare and avoid issue when migrating to the future versions of SQL Server.
+
+SELECT * FROM sys.dm_os_performance_counters
+WHERE object_name LIKE '%Deprecated Features%'
AND cntr_value > 0
+ORDER BY cntr_value DESC
+
+SQL Server, Deprecated Features Object
+
+http://technet.microsoft.com/en-us/library/bb510662.aspx]]>
+
+Number of attentions per second. Attentions are the number of user cancels and query timeout that occured per second. A high number of attentions may indicate slow query performance as users are cancelling queries.]]>
+
+Number of errors/sec]]>
+Description Ratio between cache hits and lookups
+
+The Plan Cacheobject provides counters to monitor how SQL Server uses memory to store objects such as stored procedures, ad hoc and prepared Transact-SQL statements, and triggers. Multiple instances of the Plan Cache object can be monitored at the same time, with each instance representing a different type of plan to monitor.
+
Compiled Plan Stubs & Plan Cache Perf Counters:
+
+In SQL Server 2008 R2, there are three options that can help in dealing with plan cache pollution issues.
+
+Contributor(s):
+
+Reference:
+SQL Server, Plan Cache Object
+http://msdn.microsoft.com/en-us/library/ms177441(v=sql.105).aspx
+]]>
+The average amount of wait time (milliseconds) for each lock request that resulted in a wait. This wait could indicate excessive blocking that can be verified by querying sys.dm_os_waiting_tasks. Compare this counter to "Lock Waits/sec" and look for trends.
+
+Threshold
+Yellow Greater than >500 Average Wait Time.
]]>
+
+
Description: Percentage of work tables created where the initial two pages of the work table were not allocated but were immediately available from the work table cache.
+
+Since SQL Server 2005 worktable caching was improved. When a query execution plan is cached, the work tables needed by the plan are not dropped across multiple executions of the plan but merely truncated. In addition, the first nine pages for the work table are kept. In SQL Server 2000, the work tables used during query plan execution are dropped. Because the work table is cached, the next execution of the query is faster. When the system is low on memory, the execution plan may be removed from the cache and the associated work tables dropped as well. Both SQL Server 2000 and SQL Server 2005 use a small global pool of pre-allocated pages and extents that make the initial creation of work tables faster.
+
+Note: When a work table is dropped, two pages may remain allocated and they are returned to the work table cache. A value less than 90% may indicate insufficient memory, since execution plans are being dropped, or may indicate, on 32-bit systems, the need for an upgrade to a 64-bit system.
+
+
Threshold:
+
Yellow: Less than 90% Worktables from Cache Ratio. This will need to be baselined for accuracy.
+
+
Reference:
+SQL Server, Access Methods Object
+http://msdn.microsoft.com/en-us/library/ms177426(v=sql.110).aspx]]>
+
Description:
+The number of times locks on a table were escalated from page- or row-level to table-level. Frequent or even occasional spiking in this value may indicate poorly coded transactions.
+
+
+Lock Escalation Thresholds
+
+Lock escalation is triggered when lock escalation is not disabled on the table by using the ALTER TABLE SET LOCK_ESCALATION option, and when either of the following conditions exists:
+
+
+If locks cannot be escalated because of lock conflicts, the Database Engine periodically triggers lock escalation at every 1,250 new locks acquired.
+
+Next Steps
+
Reducing Locking and Escalation
+In most cases, the Database Engine delivers the best performance when operating with its default settings for locking and lock escalation. If an instance of the Database Engine generates a lot of locks and is seeing frequent lock escalations, consider reducing the amount of locking by:
+
Using an isolation level that does not generate shared locks for read operations.
+
+
+Note: Changing the isolation level affects all tables on the instance of the Database Engine.
+
+
+You can also use trace flags 1211 and 1224 to disable all or some lock escalations. For more information, see Trace Flags (Transact-SQL). Also, monitor lock escalation by using the SQL Server Profiler Lock:Escalation event; and see Using SQL Server Profiler.
+
+Reference:
+Lock Escalation (Database Engine) -
http://msdn.microsoft.com/en-us/library/ms184286(SQL.105).aspx
+
+]]>
+The free space in tempdb in KB.]]>
+The longest running time of any transcation in seconds. This counter could indicate a long running statement pulling large amounts of data that normally takes a long time to execute or potentially a blocking condition.]]>
+The total number of active non-snapshot transactions that generate version records. These are all of the non-snapshot isolation versions such as triggers and online indexing.
+
+Note: The sum of Update Snapshot Transactions and NonSnapshot Version Transactions represents the total number of transactions that participate in version generation. The difference of Snapshot Transactions and Update Snapshot Transactions reports the number of read-only snapshot transactions.
+
+Reference:
+http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
+http://msdn.microsoft.com/en-us/library/ms176029(SQL.90).aspx
+Managing TempDB in SQL Server: TempDB Basics (Version Store: Why do we need it?)
+http://blogs.msdn.com/b/sqlserverstorageengine/archive/2008/12/22/managing-tempdb-in-sql-server-tempdb-basics-verison-store.aspx
+]]>
+The total number of active snapshot transactions.]]>
+Description
+
+The version cleanup rate in KB per seconds.
+
+Monitors the version cleanup rate in KBps in all version stores. If the version cleanup rate is lower than the version generation rate, the version store will use more and more space in tempdb. However, if the version cleanup rate is 0 but the version generation rate is not, there is probably a long-running transaction that is preventing the version store cleanup.
+
+Row versions are shared across sessions. The creator of the row version has no control over when the row version can be reclaimed. You will need to find and then possibly stop the longest-running transaction that is preventing the row version cleanup.
+
The following query returns the top two longest-running transactions that depend on the versions in the version store:
+
+select top 2
+ transaction_id,
+ transaction_sequence_num,
+ elapsed_time_seconds
+from sys.dm_tran_active_snapshot_database_transactions
+order by elapsed_time_seconds DESC
+
+Reference
+
+Row Versioning Resource Usage
+http://msdn.microsoft.com/en-us/library/ms175492.aspx]]>
+Description: The version generation rate in KB per seconds.
+
+You can use the Version Generation Rate and Version Cleanup Rate counters to measure version store impact on TempDB. THe Version Generation Rate should not outpace the Cleanup Rate. Additionally, if your Version Cleanup Rate is 0, a long-running transaction could be preventing the version store cleanup. Incidentally, before generating an out-of-tempdb-space error, SQL Server 2008 makes a last-ditch attempt by forcing the version stores to shrink. During the shrink process, the longest-running transactions that have not yet generated any row versions are marked as victims. This frees up the version space used by them. Message 3967 is generated in the error log for each such victim transaction. If a transaction is marked as a victim, it can no longer read the row versions in the version store or create new ones. Message 3966 is generated and the transaction is rolled back when the victim transaction attempts to read row versions. If the shrink of the version store succeeds, more space is available in tempdb. Otherwise, tempdb runs out of space.
+
+If TempDB fills and runs out of space, writes will continue, butversions will not and reads will fail.
+
+Reference
+SQL Server, Transactions Object
+http://technet.microsoft.com/en-us/library/ms189038.aspx]]>
+
+
+
+Plan re-use is desirable for OLTP workloads because re-creating the same plan (for similar or identical transactions) is a waste of CPU resources.
+
To compute the plan re-use rate, compare SQL Server SQL Statistics: batch requests/sec to SQL compilations/sec.
+
Special exception to the plan re-use rule is that zero (or trivial) cost plans will not be cached (not re-used) in SQL 2005 SP2 and above.
+
Applications that use zero cost plans will have a lower plan re-use but this is not a performance issue, because it is cheaper to generate a new plan every time than to cache.
+
Reference:
+
Execution Plan Caching and Reuse
+
http://msdn.microsoft.com/en-us/library/ms181055.aspx
+
Top SQL Server 2005 Performance Issues for OLTP Applications
+
http://technet.microsoft.com/en-us/library/cc966401.aspx
+]]>
+
+Number of pages which are not from NUMA-local memory.
+
+When we are using NUMA architecture which is becoming more common you will see memory nodes. We have one memory node per NUMA node and this is used to allocate memory in a particular node. This is visible in the SQL Server Buffer Node perfmon group. If you want to make sure you are performing local memory access versus foreign memory access we need to pay attention to where the memory is being allocated which can be tracked via sys.dm_os_memory_nodes.
+
+If we do not have enough memory in a particular NUMA node, we will perform a foreign access if we have to, but SQL Server tries to avoid this.
+
+Reference:
+http://msdn.microsoft.com/en-us/library/ms345597(v=sql.110).aspx]]>
+Maximum amount of memory in kilobytes the resource pool can have based on the settings and server state.]]>
Number of requests per second that failed to return a report from cache. Use this counter to find out whether the resources used for caching (disk or memory) are sufficient.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Number of reports that are currently active and being handled by the report server. Use this counter to evaluate caching strategy. There might be significantly more requests than reports generated.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Total number of cache misses against the in-memory cache after the service started. This counter resets when the application domain recycles.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Total number of reports that ran successfully after the service started. This counter resets when the application domain recycles.
+
+Performance Counters for the MSRS 2011 Windows Service Performance Object
+
+http://technet.microsoft.com/en-US/library/ms157314(v=sql.110).aspx]]>
+Number of running jobs. This counter can be used to find out if the current load on the system is potentially being driven from SQL Server Agent execution.]]>
+The number of Jobs that have failed to complete successfully for any reason since the last SQL Server Agent restart.]]>
+Percentage of successful jobs from the total number of executed jobs.
+
+]]>
+The number of Jobs that have successfully completed since the last SQL Server Agent restart.]]>
+Number of active steps.
+]]>
+The total number of times any Job Step execution is retried since the last SQL Server restart.]]>
+Number of AlwaysOn messages sent to this availability replica per second
+]]>
+Number of AlwaysOn messages resent in the last second
+
+
+]]>
+Number of AlwaysOn messages received from the replica per second
+]]>
+Bytes Received from Replica/sec: Number of bytes received from the availability replica per second
+]]>
+Number of bytes sent to the remote availability replica per second
+]]>
+Note: These counters are not defined by default and would be 0 unless configured through SQL Server through the sp_user_counter# stored procedures.]]>
+Number of milliseconds transaction termination waited for acknowledgement per second.
+
+The Replica:Transaction Delay counter measures the primary replica’s wait for acknowledgement that the transaction has committed at the secondary replica database in order to commit its own transaction. Since Asynchronous Commit Mode does not require acknowledgment to commit the transaction, this counter reports 0 when measured against a database in asynchronous commit mode.
+
+When there are multiple secondaries, this is a measure of the total time all transactions waited on the secondary acknowledgement.
+Note: This counter should be viewed on the Primary replica
+]]>
+The amount of log in kilobytes that need to be undone.
+
+Note: This counter should be viewed on the Secondary replica]]>
+Amount of log records redone on the secondary database in the last second.
+
+This counter can be compared to Log Bytes Received/Sec. If Log Bytes Received/Sec trends greater than Redone Bytes/Sec for sustained periods of time, then redo latency is building up between the primary and secondary replicas, which suggests that counter Redo Bytes Remaining and Recovery Queue is growing. This could indicate Redo is the bottleneck.
+
+To measure Recovery Time, divide Recovery Queue by Redone Bytes / Sec.
+
+Note: This counter should be viewed on the Secondary replica]]>
+The amount of log in kilobytes remaining to be redone to finish the reverting phase. If Redo Bytes Remaining counter is trending up, The redo process could be a bottleneck.
+
+Note: This counter should be viewed on the Secondary replica
+
]]>
+Number of times redo gets blocked in the last second]]>
+Amount of log records in the log files of the secondary replica that has not yet been redone.
+
+The Recovery Queue monitors the progress of the redo of flushed pages. If Recovery Queue is trending up, the redo process could be a bottleneck. For AlwaysON, the redo process is single threaded to ensure a consistent read for readable secondaries.
+
+Note: This counter should be viewed on the Secondary replica
+
]]>
+Amount of logs received by the availability replica for the database
+
+Note: This counter should be viewed on the Secondary replica
+]]>
+The amount of log in kilobytes remaining to finish the undo phase.
+
+Note: This counter should be viewed on the Secondary replica
+]]>
Amount of log records in the log files of the primary database, in kilobytes, that has not yet been sent to the secondary availability replica. This value is sent to the secondary availability replica from the primary availability replica.
+
Note: Queue size does not include FileStream files that are sent to a secondary.
+
The log send queue size at any point will give an indication approximately how much log has not been sent in KB. This is the amount of log secondary does not have at the time of failover and the amount of data loss that could be experienced. The log send queue size is also reported in DMV sys.dm_hadr_database_replica_states.log_send_queue_size column column in KB.
+
+Note: This counter should be viewed on the Secondary replica
+
+Reference:
+
http://technet.microsoft.com/en-us/library/ff877972.aspx
+
http://www.sqlskills.com/blogs/joe/answering-questions-with-the-alwayson-dashboard/
+
http://support.microsoft.com/kb/2857849]]>
Number of transactions that wrote to the mirrored database and waited for the log to be sent to the mirror in order to commit, in the last second.
+
+This counter is a measure of transactions that are waiting to be hardened to the primary because of Synchronous Availability Mode requiring that they harden at secondary also. When using Asynchronous availability mode this counter is 0.
+
Note: This counter should be viewed on the Primary replica
+
]]>
+Total amount of dynamic memory the server is using for query optimization]]>
+Amount of memory the server is currently using for the purposes other than the database pages.
+
+\SQLServer:Buffer Manager Stolen pages
+
+
Description: Number of pages used for miscellaneous server purposes (including procedure cache). This counter shows how many pages were taken from the buffer pool to accomodate non-buffer pool needs such as plan cache, procedure cache, the optimizer, workspace memory, etc. This counter should be baselined and can be analyzed by comparing this counter to the amount of buffer pool space and large requests that are hitting the SQL Server instance.
+
+
Note: DBCC MEMORYSTATUS can also be leveraged to examine the impact of stolen memory to the buffer pool.
+
Note: The lazywriter process is not permitted to flush Stolen buffers out of the buffer pool.
+
+
Reference:
+SQL Server, Buffer Manager Object
+
+http://technet.microsoft.com/en-us/library/ms189628(v=sql.105).aspx
+INF: Using DBCC MEMORYSTATUS to Monitor SQL Server Memory Usage
+
+http://support.microsoft.com/kb/271624]]>
+Ideal amount of memory the server is willing to consume]]>
+Total amount of dynamic memory the server is currently consuming]]>
+Amount of memory the server is using on this node for database pages.]]>
+Non NUMA-local amount of memory on this node.]]>
+Amount of memory the server is using on this node for the purposes other than database pages.]]>
+Ideal amount of memory for this node.]]>
+Total amount of memory the server has committed on this node.]]>
+Number of lookup requests from this node, which were satisfied from other nodes.
+
+Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
+Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
+
+References
+CoreInfo
+
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
+
Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
+
http://support.microsoft.com/kb/2806535
+]]>
+Number of lookup requests from this node, which were satisfied from this node.
+Note: It is recommended to set the maximum degree of parallelism MAXDOP to the number of processors per NUMA node to encourage queries to leverage memory on the local NUMA node though memory can always be used from other NUMA nodes if it is needed. Dynamic Management Views (DMVs) and performance monitor (perfmon) counters can be used to find out the degree local memory and foreign memory is being used.
+Additionally, it is recommended to leverage the SysInternals tool 'CoreInfo' to find out specifically the processors being used, hyperthreading, and the overall NUMA cost between NUMA nodes. Furthermore, it is recommended to configure MAXDOP correctly and monitor foreign memory use, install the latest hotfixes that would affect NUMA configurations, and ensure the latest firmware is installed for the hardware being used in your environment.
+
+References
+CoreInfo
+
http://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
+Recommendations and guidelines for the "max degree of parallelism" configuration option in SQL Server
+
http://support.microsoft.com/kb/2806535
+]]>
+Database pages on node.
+
+SQL Server:Buffer Node
+
+ http://technet.microsoft.com/en-us/library/ms345597.aspx
+]]>
+Number of pages flushed to enforce the recovery interval settings.
+
+When Indirect Checkpoints is enabled at the database level, you will notice a new background thread in sys.dm_exec_requests with the command token "RECOVERY WRITER". There is a single background writer for the SQL Server instance. The background writer performs aggressive flushing of dirty pages based on LSN order and reduces the redo phase recovery time.
+
+The catalog view sys.databases contains a column named target_recovery_time_in_seconds that indicates whether a specific database is using the new Indirect checkpoint algorithm. There is a new performance monitor counter called 'Background writer pages/sec' that exposes the amount of dirty pages processed by the background writer.
+
+SQL Server Books Online contains a discussion about Indirect Checkpoints and how it interacts with the recovery interval setting:
+
+
+Database Checkpoints (SQL Server)
+
+http://msdn.microsoft.com/en-us/library/ms189573(v=sql.110).aspx ]]>
+The ideal number of pages in the Buffer Pool according the maximum memory granted to SQL Server.]]>
+
+
+
+
+
+Description:
+Number of seconds a page will stay in the buffer pool without references. This performance monitor counter tells you, on average, how long data pages are staying in the buffer. Any large drops of 30% or more should be investigated. Below 600 should be monitored and very low values near zero are considered a critical state. For monitoring, we are alerting at a warning level at 600 and a critical state of lower than 300 seconds, though getting a baseline is the best approach.
+
+When page life expectancy gets too low, this is an indication that SQL Server is doing too many logical reads putting pressure on the buffer pool. It is recommended to correlate page life expectancy with lazy writer activity. When page life expectancy becomes low, then SQL Server will respond by sweeping through the buffer pool using the lazy writer, increasing lazy writer activity. Low page life expectancy may cause more physical reads increasing pressure on disk and slowing down SQL Server responsiveness.
+
+The Page life expectancy counter is considered one of the most critical counters for SQL Server. If Page life expectancy becomes low SQL Server will attempt physical reads from disk into the buffer pool to honor requests. Requests from physical disk will take considerably longer causing higher disk costs.
+
+Note: NUMA systems will have a CPU and memory grouping per node. If the server is a NUMA environment you should analyze the Buffer Node counters for Page Life Expectancy per node. You can tell a server is a NUMA system by checking the SQL Server error log or by querying sys.dm_os_memory_nodes. A non-NUMA system will have 2 nodes listed, A NUMA system will have additional nodes for each of the hardware NUMA nodes in the system.
+
Threshold:
+
Yellow: Page life expectancy is less than 10 minutes (600 seconds)
+
Red: Page life expectancy is less than 5 minutes (300 seconds)
+
+
Next Steps:
+If Buffer Manager\Page life expectancy is low then the Buffer Manager\Lazy Writes /sec will be higher as the Lazy Writer will become active attempting to free the buffer cache as SQL Server will be under memory pressure.
+Due to the disk impact of the physical reads incurred, the \Physical Disk \Avg. Disk sec/Read counter may also become a bottleneck as SQL Server is reading from disk instead of the buffer pull to honor requests.
+Look for an increase in SQL Server: Buffer Manager: Checkpoint Pages/sec and SQL Server:Buffer Manager: Lazy Writes/sec performance object counters because SQL Server 2005 / 2008 starts to flush pages out of the buffer pool cache under memory pressure.
+Run expensive queries through the Database Tuning Advisor (DTA), look for queries with a high number of logical reads and consider tuning and potentially rewriting them, and potentially add additional memory if non-hardware options to not address the issue.
+
+
Reference:
+
+
SQL Server, Access Methods Object
+
+http://msdn.microsoft.com/en-us/library/ms177426.aspx
+]]>
+Number of active update transactions for the database.]]>
+Read/write throughput for backup/restore of a database.]]>
+KiloBytes bulk copied.]]>
+Number of temporary tables/table variables in use]]>
+Number of temporary tables/table variables created/sec]]>
+Number of temporary tables/table variables waiting to be destroyed by the cleanup system thread]]>
+Number of suboptimal query plans generated per second in the workload group.]]>
+Number of threads used by parallel queries in the workload group. Serial queries and the main thread of parallel queries are not included in this number.]]>
+Number of queries per second getting less than ideal amount of memory in the workload group.]]>
+Number of currently running requests in the workload group.]]>
+The total number of active transactions.]]>
+The size of the version store in KB.]]>
+The total number of errors that occur during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
+]]>
+The total number of errors that occur per second during the processing of HTTP requests. These errors include HTTP status codes in the 400s and 500s.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
+A number from 1-5 indicating the current memory state of the server.
+
+
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx
+]]>
+Number of bytes the server requested to shrink.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
+Number of shrink notifications the server issued in the last second. Indicates how often the server believes it is under memory pressure.
+
+Performance Counters for the ReportServer:Service Performance Object
+
http://technet.microsoft.com/en-us/library/cc627471(v=sql.110).aspx]]>
+Actual number of bytes sent per second over the network to the remote availability replica
+]]>
+
+
+Description: Number of buffer pool extension page reads/writes outstanding. In other words, this is the I/O queue length for the buffer pool extension file.
+
+
+Threshold:
+
Red: Numbers higher than 0 warrants more investigation on the I/O subsystem latencies. Latency on a disk hosting the Buffer Pool Extension file should be below 1ms.
+]]>
+
+
+
+
+Description: Average seconds a page will stay in the buffer pool extension without references.
+]]>
+
+
+Description: Total number of free cache pages in the buffer pool extension file.
+
+
Threshold:
+
Yellow: Less than 5120 Extension free Pages, or 40MB
+]]>
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-Use this analysis in correlation with the \Memory\Committed Bytes. If you suspect a memory leak condition, then install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
-
-References:
-
-Debug Diagnostic Tool v1.1 http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
-
-
-
-
-This analysis checks each process to determine if it is leaking more than 100 threads per hour and if it has more than 1000 threads. A process with a large number of threads and/or an aggressive upward trend could indicate a thread leak which typically results in a system committed memory leak and/or high context switching. High context switching will result in high privileged mode CPU usage.]]>
-
-
-Reference:
-
-]]>
-
-
-
-
-If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment article in the references section for more information.
-
-Role Specific
-
-- \Process(MSExchangeMailboxAssistants*)\% Processor Time should be less than 5% of overll CPU
-
-- \Process(msftefd*)\% Processor Time should be less than 10% of what the store process is consuming. Note: If indexing is running and overall CPU is greater than 80%, then msfte should backoff it's CPU usage if that threshold is hit.
-
-References:
-Measuring .NET Application Performance
-http://msdn2.microsoft.com/en-us/library/ms998579.aspx
-
-How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment http://www.codeplex.com/PerfTesting/Wiki/View.aspx?title=How%20To%3a%20Identify%20a%20Disk%20Performance%20Bottleneck%20Using%20SPA&referringTitle=How%20Tos ]]>
-
-
-
-
-Note: This analysis assumes that all processes are 32-bit on 32-bit Windows and Windows Server and assumes that all processes are 64-bit on 64-bit Windows and Windows Server.]]>
-
-
-
-
-
-
-
-
-
+
+
+
+Use this analysis in correlation with the \Memory\Committed Bytes. If you suspect a memory leak condition, then install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
+
+References:
+
+Debug Diagnostic Tool v1.1 http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
+
+
+
+
+This analysis checks each process to determine if it is leaking more than 100 threads per hour and if it has more than 1000 threads. A process with a large number of threads and/or an aggressive upward trend could indicate a thread leak which typically results in a system committed memory leak and/or high context switching. High context switching will result in high privileged mode CPU usage.]]>
+
+
+Reference:
+
+]]>
+
+
+
+
+If a user-mode processor bottleneck is suspected, then consider using a process profiler to analyze the functions causing the high CPU consumption. See How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment article in the references section for more information.
+
+Role Specific
+
+- \Process(MSExchangeMailboxAssistants*)\% Processor Time should be less than 5% of overll CPU
+
+- \Process(msftefd*)\% Processor Time should be less than 10% of what the store process is consuming. Note: If indexing is running and overall CPU is greater than 80%, then msfte should backoff it's CPU usage if that threshold is hit.
+
+References:
+Measuring .NET Application Performance
+http://msdn2.microsoft.com/en-us/library/ms998579.aspx
+
+How To: Identify Functions causing a High User-mode CPU Bottleneck for Server Applications in a Production Environment http://www.codeplex.com/PerfTesting/Wiki/View.aspx?title=How%20To%3a%20Identify%20a%20Disk%20Performance%20Bottleneck%20Using%20SPA&referringTitle=How%20Tos ]]>
+
+
+
+
+Note: This analysis assumes that all processes are 32-bit on 32-bit Windows and Windows Server and assumes that all processes are 64-bit on 64-bit Windows and Windows Server.]]>
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Location: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\RAT\Stingray\Debug\FWSRV
-Key: FWS_PRIVATE_PERFORMANCE_COUNTERS
-Type: DWORD
-Value: 1
-The restart is not neccesary.]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Location: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\RAT\Stingray\Debug\FWSRV
+Key: FWS_PRIVATE_PERFORMANCE_COUNTERS
+Type: DWORD
+Value: 1
+The restart is not neccesary.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-Use this analysis in correlation with the \Memory\Committed Bytes. If you suspect a memory leak condition, then install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
-
-References:
-
-Debug Diagnostic Tool v1.1 http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
-
+
+
+
+Use this analysis in correlation with the \Memory\Committed Bytes. If you suspect a memory leak condition, then install and use the Debug Diag tool. For more information on the Debug Diag Tool, see the references section.
+
+References:
+
+Debug Diagnostic Tool v1.1 http://www.microsoft.com/downloads/details.aspx?FamilyID=28bd5941-c458-46f1-b24d-f60151d875a3&displaylang=en]]>
+
-
-
-
-
+
+
+
+