View Javadoc

1   // ========================================================================
2   // Copyright (c) Webtide LLC
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  
15  package org.eclipse.jetty.monitor;
16  
17  import java.lang.management.ManagementFactory;
18  import java.lang.management.ThreadMXBean;
19  import java.util.ArrayList;
20  import java.util.Collections;
21  import java.util.Comparator;
22  import java.util.HashMap;
23  import java.util.List;
24  import java.util.Map;
25  
26  import org.eclipse.jetty.monitor.thread.ThreadMonitorException;
27  import org.eclipse.jetty.monitor.thread.ThreadMonitorInfo;
28  import org.eclipse.jetty.util.component.AbstractLifeCycle;
29  import org.eclipse.jetty.util.component.Dumpable;
30  import org.eclipse.jetty.util.log.Log;
31  import org.eclipse.jetty.util.log.Logger;
32  import org.eclipse.jetty.util.log.Logger;
33  
34  
35  /* ------------------------------------------------------------ */
36  public class ThreadMonitor extends AbstractLifeCycle implements Runnable
37  {
38      private static final Logger LOG = Log.getLogger(ThreadMonitor.class);
39  
40      private int _scanInterval;
41      private int _logInterval;
42      private int _busyThreshold;
43      private int _logThreshold;
44      private int _stackDepth;
45      private int _trailLength;
46      
47      private ThreadMXBean _threadBean;
48      
49      private Thread _runner;
50      private Logger _logger;
51      private volatile boolean _done = true;
52      private Dumpable _dumpable;
53  
54      private Map<Long,ThreadMonitorInfo> _monitorInfo;
55      
56      /* ------------------------------------------------------------ */
57      /**
58       * Instantiates a new thread monitor.
59       *
60       * @throws Exception
61       */
62      public ThreadMonitor() throws Exception
63      {
64          this(5000);
65      }
66      
67      /* ------------------------------------------------------------ */
68      /**
69       * Instantiates a new thread monitor.
70       *
71       * @param intervalMs scan interval
72       * @throws Exception
73       */
74      public ThreadMonitor(int intervalMs) throws Exception
75      {
76          this(intervalMs, 95);
77      }
78      
79      /* ------------------------------------------------------------ */
80      /**
81       * Instantiates a new thread monitor.
82       *
83       * @param intervalMs scan interval
84       * @param threshold busy threshold
85       * @throws Exception
86       */
87      public ThreadMonitor(int intervalMs, int threshold) throws Exception
88      {
89          this(intervalMs, threshold, 3);
90      }
91      
92      /* ------------------------------------------------------------ */
93      /**
94       * Instantiates a new thread monitor.
95       *
96       * @param intervalMs scan interval
97       * @param threshold busy threshold
98       * @param depth stack compare depth
99       * @throws Exception
100      */
101     public ThreadMonitor(int intervalMs, int threshold, int depth) throws Exception
102     {
103         this(intervalMs, threshold, depth, 3);
104     }
105     
106     /* ------------------------------------------------------------ */
107     /**
108      * Instantiates a new thread monitor.
109      *
110      * @param intervalMs scan interval
111      * @param threshold busy threshold
112      * @param depth stack compare depth
113      * @param trail length of stack trail
114      * @throws Exception
115      */
116     public ThreadMonitor(int intervalMs, int threshold, int depth, int trail) throws Exception
117     {
118         _scanInterval = intervalMs;
119         _busyThreshold = threshold;
120         _stackDepth = depth;
121         _trailLength = trail;
122         
123         _logger = Log.getLogger(ThreadMonitor.class.getName());
124         _monitorInfo = new HashMap<Long, ThreadMonitorInfo>();
125        
126         init();
127     }
128     
129     /* ------------------------------------------------------------ */
130     /**
131      * Gets the scan interval.
132      *
133      * @return the scan interval
134      */
135     public int getScanInterval()
136     {
137         return _scanInterval;
138     }
139 
140     /* ------------------------------------------------------------ */
141     /**
142      * Sets the scan interval.
143      *
144      * @param ms the new scan interval
145      */
146     public void setScanInterval(int ms)
147     {
148         _scanInterval = ms;
149     }
150 
151     /* ------------------------------------------------------------ */
152     /**
153      * Gets the log interval.
154      *
155      * @return the log interval
156      */
157     public int getLogInterval()
158     {
159         return _logInterval;
160     }
161 
162     /* ------------------------------------------------------------ */
163     /**
164      * Sets the log interval.
165      *
166      * @param ms the new log interval
167      */
168     public void setLogInterval(int ms)
169     {
170         _logInterval = ms;
171     }
172 
173     /* ------------------------------------------------------------ */
174     /**
175      * Gets the busy threshold.
176      *
177      * @return the busy threshold
178      */
179     public int getBusyThreshold()
180     {
181         return _busyThreshold;
182     }
183 
184     /* ------------------------------------------------------------ */
185     /**
186      * Sets the busy threshold.
187      *
188      * @param percent the new busy threshold
189      */
190     public void setBusyThreshold(int percent)
191     {
192         _busyThreshold = percent;
193     }
194 
195     /* ------------------------------------------------------------ */
196     /**
197      * Gets the log threshold.
198      *
199      * @return the log threshold
200      */
201     public int getLogThreshold()
202     {
203         return _logThreshold;
204     }
205 
206     /* ------------------------------------------------------------ */
207     /**
208      * Sets the log threshold.
209      *
210      * @param percent the new log threshold
211      */
212     public void setLogThreshold(int percent)
213     {
214         _logThreshold = percent;
215     }
216 
217     /* ------------------------------------------------------------ */
218     /**
219      * Gets the stack depth.
220      *
221      * @return the stack depth
222      */
223     public int getStackDepth()
224     {
225         return _stackDepth;
226     }
227 
228     /* ------------------------------------------------------------ */
229     /**
230      * Sets the stack depth.
231      *
232      * @param stackDepth the new stack depth
233      */
234     public void setStackDepth(int stackDepth)
235     {
236         _stackDepth = stackDepth;
237     }
238     
239     /* ------------------------------------------------------------ */
240     /**
241      * Sets the stack trace trail length.
242      *
243      * @param trailLength the new trail length
244      */
245     public void setTrailLength(int trailLength)
246     {
247         _trailLength = trailLength;
248     }
249 
250     /* ------------------------------------------------------------ */
251     /**
252      * Gets the stack trace trail length.
253      *
254      * @return the trail length
255      */
256     public int getTrailLength()
257     {
258         return _trailLength;
259     }
260 
261     /* ------------------------------------------------------------ */
262     /**
263      * Enable logging of CPU usage.
264      *
265      * @param frequencyMs the logging frequency 
266      * @param thresholdPercent the logging threshold
267      */
268     public void logCpuUsage(int frequencyMs, int thresholdPercent)
269     {
270         setLogInterval(frequencyMs);
271         setLogThreshold(thresholdPercent);
272     }
273     
274     /* ------------------------------------------------------------ */
275     /**
276      * @return A {@link Dumpable} that is dumped whenever spinning threads are detected
277      */
278     public Dumpable getDumpable()
279     {
280         return _dumpable;
281     }
282 
283     /* ------------------------------------------------------------ */
284     /**
285      * @param dumpable A {@link Dumpable} that is dumped whenever spinning threads are detected
286      */
287     public void setDumpable(Dumpable dumpable)
288     {
289         _dumpable = dumpable;
290     }
291 
292     /* ------------------------------------------------------------ */
293     /**
294      * @see org.eclipse.jetty.util.component.AbstractLifeCycle#doStart()
295      */
296     public void doStart()
297     {
298         _done = false;
299         
300         _runner = new Thread(this);
301         _runner.setDaemon(true);
302         _runner.start();
303 
304         LOG.info("Thread Monitor started successfully");
305     }
306     
307     /* ------------------------------------------------------------ */
308     /**
309      * @see org.eclipse.jetty.util.component.AbstractLifeCycle#doStop()
310      */
311     public void doStop()
312     {
313         if (_runner != null)
314         {
315             _done = true;
316             try
317             {
318                 _runner.join();
319             }
320             catch (InterruptedException ex) {}
321         }
322     }
323 
324     /* ------------------------------------------------------------ */
325     /**
326      * Retrieve all avaliable thread ids
327      *
328      * @return array of thread ids
329      */
330     protected long[] getAllThreadIds()
331     {
332         return _threadBean.getAllThreadIds();
333     }
334 
335     /* ------------------------------------------------------------ */
336     /**
337      * Retrieve the cpu time for specified thread.
338      *
339      * @param id thread id
340      * @return cpu time of the thread
341      */
342     protected long getThreadCpuTime(long id)
343     {
344         return _threadBean.getThreadCpuTime(id);
345     }
346     
347     /* ------------------------------------------------------------ */
348     /**
349      * Initialize JMX objects.
350      */
351     protected void init()
352     {
353         _threadBean = ManagementFactory.getThreadMXBean();
354         if (_threadBean.isThreadCpuTimeSupported())
355         {
356             _threadBean.setThreadCpuTimeEnabled(true);
357         }
358     }
359     
360     /* ------------------------------------------------------------ */
361     /**
362      * @see java.lang.Runnable#run()
363      */
364     public void run()
365     {
366         // Initialize repeat flag
367         boolean repeat = false;
368         boolean scanNow, logNow;
369 
370         // Set next scan time and log time
371         long nextScanTime = System.currentTimeMillis();
372         long nextLogTime = nextScanTime + _logInterval;
373         
374         while (!_done)
375         {
376             long currTime = System.currentTimeMillis();
377             scanNow = (currTime > nextScanTime);
378             logNow = (_logInterval > 0 && currTime > nextLogTime);
379             if (repeat || scanNow || logNow)
380             {
381                 repeat = collectThreadInfo();
382                 logThreadInfo(logNow);
383 
384                 if (scanNow)
385                 {
386                     nextScanTime = System.currentTimeMillis() + _scanInterval;
387                 }
388                 if (logNow)
389                 {
390                     nextLogTime = System.currentTimeMillis() + _logInterval;
391                 }
392             }
393 
394             // Sleep only if not going to repeat scanning immediately
395             if (!repeat)
396             {
397                 try
398                 {
399                     Thread.sleep(100);
400                 }
401                 catch (InterruptedException ex)
402                 {
403                     LOG.ignore(ex);
404                 }
405             }
406         }
407         
408     }
409     
410     /* ------------------------------------------------------------ */
411     /**
412      * Collect thread info.
413      */
414     private boolean collectThreadInfo()
415     {
416         boolean repeat = false;
417         try
418         {
419             // Retrieve stack traces for all threads at once as it 
420             // was proven to be an order of magnitude faster when
421             // retrieving a single thread stack trace.
422             Map<Thread,StackTraceElement[]> all = Thread.getAllStackTraces();
423             
424             for (Map.Entry<Thread,StackTraceElement[]> entry : all.entrySet())
425             {
426                 Thread thread = entry.getKey();
427                 long threadId = thread.getId();
428                 
429                 // Skip our own runner thread
430                 if (threadId == _runner.getId())
431                 {
432                     continue;
433                 }
434 
435                 ThreadMonitorInfo currMonitorInfo = _monitorInfo.get(Long.valueOf(threadId));
436                 if (currMonitorInfo == null)
437                 {
438                     // Create thread info object for a new thread 
439                     currMonitorInfo = new ThreadMonitorInfo(thread);
440                     currMonitorInfo.setStackTrace(entry.getValue());
441                     currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
442                     currMonitorInfo.setSampleTime(System.nanoTime());
443                     _monitorInfo.put(Long.valueOf(threadId), currMonitorInfo);
444                 }
445                 else
446                 {
447                     // Update the existing thread info object
448                     currMonitorInfo.setStackTrace(entry.getValue());
449                     currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
450                     currMonitorInfo.setSampleTime(System.nanoTime());
451     
452                     // Stack trace count holds logging state
453                     int count = currMonitorInfo.getTraceCount();
454                     if (count >= 0 && currMonitorInfo.isSpinning())
455                     {
456                         // Thread was spinning and was logged before
457                         if (count < _trailLength) 
458                         {
459                             // Log another stack trace
460                             currMonitorInfo.setTraceCount(count+1);
461                             repeat = true;
462                             continue;
463                         }
464                         
465                         // Reset spin flag and trace count
466                         currMonitorInfo.setSpinning(false);
467                         currMonitorInfo.setTraceCount(-1);
468                     }
469                     if (currMonitorInfo.getCpuUtilization() > _busyThreshold)
470                     {
471                         // Thread is busy
472                         StackTraceElement[] lastStackTrace = currMonitorInfo.getStackTrace();
473     
474                         if (lastStackTrace != null 
475                         && matchStackTraces(lastStackTrace, entry.getValue()))
476                         {
477                             // Thread is spinning
478                             currMonitorInfo.setSpinning(true);
479                             if (count < 0)
480                             {
481                                 // Enable logging of spin status and stack traces 
482                                 // only if the incoming trace count is negative
483                                 // that indicates a new scan for this thread
484                                 currMonitorInfo.setTraceCount(0);
485                                 repeat = (_trailLength > 0);
486                             }
487                         }
488                     }
489                 }
490             }
491         }
492         catch (Exception ex)
493         {
494             LOG.debug(ex);
495         }
496         return repeat;
497     }
498 
499     /* ------------------------------------------------------------ */
500     protected void logThreadInfo(boolean logAll)
501     {
502         if (_monitorInfo.size() > 0)
503         {
504             // Select thread objects for all live threads
505             long[] running = getAllThreadIds();
506             List<ThreadMonitorInfo> all = new ArrayList<ThreadMonitorInfo>();
507             for (int idx=0; idx<running.length; idx++)
508             {
509                 ThreadMonitorInfo info = _monitorInfo.get(running[idx]); 
510                 if (info != null)
511                 {
512                     all.add(info);
513                 }
514             }
515 
516             // Sort selected thread objects by their CPU utilization
517             Collections.sort(all, new Comparator<ThreadMonitorInfo>()
518             {
519                 /* ------------------------------------------------------------ */
520                 public int compare(ThreadMonitorInfo info1, ThreadMonitorInfo info2)
521                 {
522                     return (int)Math.signum(info2.getCpuUtilization()-info1.getCpuUtilization());
523                 }
524             });
525             
526             String format = "Thread '%2$s'[%3$s,id:%1$d,cpu:%4$.2f%%]%5$s";
527             
528             // Log thread information for threads that exceed logging threshold
529             // or log spinning threads if their trace count is zero
530             boolean spinning=false;
531             for (ThreadMonitorInfo info : all)
532             {
533                 if (logAll && info.getCpuUtilization() > _logThreshold 
534                 || info.isSpinning() && info.getTraceCount() == 0)
535                 {
536                     String message = String.format(format, 
537                             info.getThreadId(), info.getThreadName(), 
538                             info.getThreadState(), info.getCpuUtilization(),
539                             info.isSpinning() ? " SPINNING" : "");
540                    _logger.info(message);
541                    spinning=true;
542                 }
543             }
544             
545             // Dump info
546             if (spinning && _dumpable!=null)
547             {
548                 System.err.println(_dumpable.dump());
549             }
550 
551             // Log stack traces for spinning threads with positive trace count
552             for (ThreadMonitorInfo info : all)
553             {
554                 if (info.isSpinning() && info.getTraceCount() >= 0)
555                 {
556                     String message = String.format(format,
557                             info.getThreadId(), info.getThreadName(), 
558                             info.getThreadState(), info.getCpuUtilization(),
559                             " STACK TRACE");
560                     _logger.warn(new ThreadMonitorException(message, info.getStackTrace()));
561                 }
562             }
563         }
564     }
565 
566     /* ------------------------------------------------------------ */
567     /**
568      * Match stack traces.
569      *
570      * @param lastStackTrace last stack trace
571      * @param stackTrace current stack trace
572      * @return true, if successful
573      */
574     private boolean matchStackTraces(StackTraceElement[] lastStackTrace, StackTraceElement[] stackTrace)
575     {
576         boolean match = true;
577         int count = Math.min(_stackDepth, Math.min(lastStackTrace.length, stackTrace.length));
578         
579         for (int idx=0; idx < count; idx++)
580         {
581             if (!stackTrace[idx].equals(lastStackTrace[idx]))
582             {
583                 match = false;
584                 break;
585             }
586         }
587         return match;
588     }
589 }