View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  
20  package org.eclipse.jetty.monitor;
21  
22  import java.lang.management.ManagementFactory;
23  import java.lang.management.ThreadMXBean;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.HashMap;
28  import java.util.List;
29  import java.util.Map;
30  
31  import org.eclipse.jetty.monitor.thread.ThreadMonitorException;
32  import org.eclipse.jetty.monitor.thread.ThreadMonitorInfo;
33  import org.eclipse.jetty.util.component.AbstractLifeCycle;
34  import org.eclipse.jetty.util.component.Dumpable;
35  import org.eclipse.jetty.util.log.Log;
36  import org.eclipse.jetty.util.log.Logger;
37  import org.eclipse.jetty.util.log.Logger;
38  
39  
40  /* ------------------------------------------------------------ */
41  public class ThreadMonitor extends AbstractLifeCycle implements Runnable
42  {
43      private static final Logger LOG = Log.getLogger(ThreadMonitor.class);
44  
45      private int _scanInterval;
46      private int _logInterval;
47      private int _busyThreshold;
48      private int _logThreshold;
49      private int _stackDepth;
50      private int _trailLength;
51      
52      private ThreadMXBean _threadBean;
53      
54      private Thread _runner;
55      private Logger _logger;
56      private volatile boolean _done = true;
57      private Dumpable _dumpable;
58  
59      private Map<Long,ThreadMonitorInfo> _monitorInfo;
60      
61      /* ------------------------------------------------------------ */
62      /**
63       * Instantiates a new thread monitor.
64       *
65       * @throws Exception
66       */
67      public ThreadMonitor() throws Exception
68      {
69          this(5000);
70      }
71      
72      /* ------------------------------------------------------------ */
73      /**
74       * Instantiates a new thread monitor.
75       *
76       * @param intervalMs scan interval
77       * @throws Exception
78       */
79      public ThreadMonitor(int intervalMs) throws Exception
80      {
81          this(intervalMs, 95);
82      }
83      
84      /* ------------------------------------------------------------ */
85      /**
86       * Instantiates a new thread monitor.
87       *
88       * @param intervalMs scan interval
89       * @param threshold busy threshold
90       * @throws Exception
91       */
92      public ThreadMonitor(int intervalMs, int threshold) throws Exception
93      {
94          this(intervalMs, threshold, 3);
95      }
96      
97      /* ------------------------------------------------------------ */
98      /**
99       * Instantiates a new thread monitor.
100      *
101      * @param intervalMs scan interval
102      * @param threshold busy threshold
103      * @param depth stack compare depth
104      * @throws Exception
105      */
106     public ThreadMonitor(int intervalMs, int threshold, int depth) throws Exception
107     {
108         this(intervalMs, threshold, depth, 3);
109     }
110     
111     /* ------------------------------------------------------------ */
112     /**
113      * Instantiates a new thread monitor.
114      *
115      * @param intervalMs scan interval
116      * @param threshold busy threshold
117      * @param depth stack compare depth
118      * @param trail length of stack trail
119      * @throws Exception
120      */
121     public ThreadMonitor(int intervalMs, int threshold, int depth, int trail) throws Exception
122     {
123         _scanInterval = intervalMs;
124         _busyThreshold = threshold;
125         _stackDepth = depth;
126         _trailLength = trail;
127         
128         _logger = Log.getLogger(ThreadMonitor.class.getName());
129         _monitorInfo = new HashMap<Long, ThreadMonitorInfo>();
130        
131         init();
132     }
133     
134     /* ------------------------------------------------------------ */
135     /**
136      * Gets the scan interval.
137      *
138      * @return the scan interval
139      */
140     public int getScanInterval()
141     {
142         return _scanInterval;
143     }
144 
145     /* ------------------------------------------------------------ */
146     /**
147      * Sets the scan interval.
148      *
149      * @param ms the new scan interval
150      */
151     public void setScanInterval(int ms)
152     {
153         _scanInterval = ms;
154     }
155 
156     /* ------------------------------------------------------------ */
157     /**
158      * Gets the log interval.
159      *
160      * @return the log interval
161      */
162     public int getLogInterval()
163     {
164         return _logInterval;
165     }
166 
167     /* ------------------------------------------------------------ */
168     /**
169      * Sets the log interval.
170      *
171      * @param ms the new log interval
172      */
173     public void setLogInterval(int ms)
174     {
175         _logInterval = ms;
176     }
177 
178     /* ------------------------------------------------------------ */
179     /**
180      * Gets the busy threshold.
181      *
182      * @return the busy threshold
183      */
184     public int getBusyThreshold()
185     {
186         return _busyThreshold;
187     }
188 
189     /* ------------------------------------------------------------ */
190     /**
191      * Sets the busy threshold.
192      *
193      * @param percent the new busy threshold
194      */
195     public void setBusyThreshold(int percent)
196     {
197         _busyThreshold = percent;
198     }
199 
200     /* ------------------------------------------------------------ */
201     /**
202      * Gets the log threshold.
203      *
204      * @return the log threshold
205      */
206     public int getLogThreshold()
207     {
208         return _logThreshold;
209     }
210 
211     /* ------------------------------------------------------------ */
212     /**
213      * Sets the log threshold.
214      *
215      * @param percent the new log threshold
216      */
217     public void setLogThreshold(int percent)
218     {
219         _logThreshold = percent;
220     }
221 
222     /* ------------------------------------------------------------ */
223     /**
224      * Gets the stack depth.
225      *
226      * @return the stack depth
227      */
228     public int getStackDepth()
229     {
230         return _stackDepth;
231     }
232 
233     /* ------------------------------------------------------------ */
234     /**
235      * Sets the stack depth.
236      *
237      * @param stackDepth the new stack depth
238      */
239     public void setStackDepth(int stackDepth)
240     {
241         _stackDepth = stackDepth;
242     }
243     
244     /* ------------------------------------------------------------ */
245     /**
246      * Sets the stack trace trail length.
247      *
248      * @param trailLength the new trail length
249      */
250     public void setTrailLength(int trailLength)
251     {
252         _trailLength = trailLength;
253     }
254 
255     /* ------------------------------------------------------------ */
256     /**
257      * Gets the stack trace trail length.
258      *
259      * @return the trail length
260      */
261     public int getTrailLength()
262     {
263         return _trailLength;
264     }
265 
266     /* ------------------------------------------------------------ */
267     /**
268      * Enable logging of CPU usage.
269      *
270      * @param frequencyMs the logging frequency 
271      * @param thresholdPercent the logging threshold
272      */
273     public void logCpuUsage(int frequencyMs, int thresholdPercent)
274     {
275         setLogInterval(frequencyMs);
276         setLogThreshold(thresholdPercent);
277     }
278     
279     /* ------------------------------------------------------------ */
280     /**
281      * @return A {@link Dumpable} that is dumped whenever spinning threads are detected
282      */
283     public Dumpable getDumpable()
284     {
285         return _dumpable;
286     }
287 
288     /* ------------------------------------------------------------ */
289     /**
290      * @param dumpable A {@link Dumpable} that is dumped whenever spinning threads are detected
291      */
292     public void setDumpable(Dumpable dumpable)
293     {
294         _dumpable = dumpable;
295     }
296 
297     /* ------------------------------------------------------------ */
298     /**
299      * @see org.eclipse.jetty.util.component.AbstractLifeCycle#doStart()
300      */
301     public void doStart()
302     {
303         _done = false;
304         
305         _runner = new Thread(this);
306         _runner.setDaemon(true);
307         _runner.start();
308 
309         LOG.info("Thread Monitor started successfully");
310     }
311     
312     /* ------------------------------------------------------------ */
313     /**
314      * @see org.eclipse.jetty.util.component.AbstractLifeCycle#doStop()
315      */
316     public void doStop()
317     {
318         if (_runner != null)
319         {
320             _done = true;
321             try
322             {
323                 _runner.join();
324             }
325             catch (InterruptedException ex) {}
326         }
327     }
328 
329     /* ------------------------------------------------------------ */
330     /**
331      * Retrieve all avaliable thread ids
332      *
333      * @return array of thread ids
334      */
335     protected long[] getAllThreadIds()
336     {
337         return _threadBean.getAllThreadIds();
338     }
339 
340     /* ------------------------------------------------------------ */
341     /**
342      * Retrieve the cpu time for specified thread.
343      *
344      * @param id thread id
345      * @return cpu time of the thread
346      */
347     protected long getThreadCpuTime(long id)
348     {
349         return _threadBean.getThreadCpuTime(id);
350     }
351     
352     /* ------------------------------------------------------------ */
353     /**
354      * Initialize JMX objects.
355      */
356     protected void init()
357     {
358         _threadBean = ManagementFactory.getThreadMXBean();
359         if (_threadBean.isThreadCpuTimeSupported())
360         {
361             _threadBean.setThreadCpuTimeEnabled(true);
362         }
363     }
364     
365     /* ------------------------------------------------------------ */
366     /**
367      * @see java.lang.Runnable#run()
368      */
369     public void run()
370     {
371         // Initialize repeat flag
372         boolean repeat = false;
373         boolean scanNow, logNow;
374 
375         // Set next scan time and log time
376         long nextScanTime = System.currentTimeMillis();
377         long nextLogTime = nextScanTime + _logInterval;
378         
379         while (!_done)
380         {
381             long currTime = System.currentTimeMillis();
382             scanNow = (currTime > nextScanTime);
383             logNow = (_logInterval > 0 && currTime > nextLogTime);
384             if (repeat || scanNow || logNow)
385             {
386                 repeat = collectThreadInfo();
387                 logThreadInfo(logNow);
388 
389                 if (scanNow)
390                 {
391                     nextScanTime = System.currentTimeMillis() + _scanInterval;
392                 }
393                 if (logNow)
394                 {
395                     nextLogTime = System.currentTimeMillis() + _logInterval;
396                 }
397             }
398 
399             // Sleep only if not going to repeat scanning immediately
400             if (!repeat)
401             {
402                 try
403                 {
404                     Thread.sleep(100);
405                 }
406                 catch (InterruptedException ex)
407                 {
408                     LOG.ignore(ex);
409                 }
410             }
411         }
412         
413     }
414     
415     /* ------------------------------------------------------------ */
416     /**
417      * Collect thread info.
418      */
419     private boolean collectThreadInfo()
420     {
421         boolean repeat = false;
422         try
423         {
424             // Retrieve stack traces for all threads at once as it 
425             // was proven to be an order of magnitude faster when
426             // retrieving a single thread stack trace.
427             Map<Thread,StackTraceElement[]> all = Thread.getAllStackTraces();
428             
429             for (Map.Entry<Thread,StackTraceElement[]> entry : all.entrySet())
430             {
431                 Thread thread = entry.getKey();
432                 long threadId = thread.getId();
433                 
434                 // Skip our own runner thread
435                 if (threadId == _runner.getId())
436                 {
437                     continue;
438                 }
439 
440                 ThreadMonitorInfo currMonitorInfo = _monitorInfo.get(Long.valueOf(threadId));
441                 if (currMonitorInfo == null)
442                 {
443                     // Create thread info object for a new thread 
444                     currMonitorInfo = new ThreadMonitorInfo(thread);
445                     currMonitorInfo.setStackTrace(entry.getValue());
446                     currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
447                     currMonitorInfo.setSampleTime(System.nanoTime());
448                     _monitorInfo.put(Long.valueOf(threadId), currMonitorInfo);
449                 }
450                 else
451                 {
452                     // Update the existing thread info object
453                     currMonitorInfo.setStackTrace(entry.getValue());
454                     currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
455                     currMonitorInfo.setSampleTime(System.nanoTime());
456     
457                     // Stack trace count holds logging state
458                     int count = currMonitorInfo.getTraceCount();
459                     if (count >= 0 && currMonitorInfo.isSpinning())
460                     {
461                         // Thread was spinning and was logged before
462                         if (count < _trailLength) 
463                         {
464                             // Log another stack trace
465                             currMonitorInfo.setTraceCount(count+1);
466                             repeat = true;
467                             continue;
468                         }
469                         
470                         // Reset spin flag and trace count
471                         currMonitorInfo.setSpinning(false);
472                         currMonitorInfo.setTraceCount(-1);
473                     }
474                     if (currMonitorInfo.getCpuUtilization() > _busyThreshold)
475                     {
476                         // Thread is busy
477                         StackTraceElement[] lastStackTrace = currMonitorInfo.getStackTrace();
478     
479                         if (lastStackTrace != null 
480                         && matchStackTraces(lastStackTrace, entry.getValue()))
481                         {
482                             // Thread is spinning
483                             currMonitorInfo.setSpinning(true);
484                             if (count < 0)
485                             {
486                                 // Enable logging of spin status and stack traces 
487                                 // only if the incoming trace count is negative
488                                 // that indicates a new scan for this thread
489                                 currMonitorInfo.setTraceCount(0);
490                                 repeat = (_trailLength > 0);
491                             }
492                         }
493                     }
494                 }
495             }
496         }
497         catch (Exception ex)
498         {
499             LOG.debug(ex);
500         }
501         return repeat;
502     }
503 
504     /* ------------------------------------------------------------ */
505     protected void logThreadInfo(boolean logAll)
506     {
507         if (_monitorInfo.size() > 0)
508         {
509             // Select thread objects for all live threads
510             long[] running = getAllThreadIds();
511             List<ThreadMonitorInfo> all = new ArrayList<ThreadMonitorInfo>();
512             for (int idx=0; idx<running.length; idx++)
513             {
514                 ThreadMonitorInfo info = _monitorInfo.get(running[idx]); 
515                 if (info != null)
516                 {
517                     all.add(info);
518                 }
519             }
520 
521             // Sort selected thread objects by their CPU utilization
522             Collections.sort(all, new Comparator<ThreadMonitorInfo>()
523             {
524                 /* ------------------------------------------------------------ */
525                 public int compare(ThreadMonitorInfo info1, ThreadMonitorInfo info2)
526                 {
527                     return (int)Math.signum(info2.getCpuUtilization()-info1.getCpuUtilization());
528                 }
529             });
530             
531             String format = "Thread '%2$s'[%3$s,id:%1$d,cpu:%4$.2f%%]%5$s";
532             
533             // Log thread information for threads that exceed logging threshold
534             // or log spinning threads if their trace count is zero
535             boolean spinning=false;
536             for (ThreadMonitorInfo info : all)
537             {
538                 if (logAll && info.getCpuUtilization() > _logThreshold 
539                 || info.isSpinning() && info.getTraceCount() == 0)
540                 {
541                     String message = String.format(format, 
542                             info.getThreadId(), info.getThreadName(), 
543                             info.getThreadState(), info.getCpuUtilization(),
544                             info.isSpinning() ? " SPINNING" : "");
545                    _logger.info(message);
546                    spinning=true;
547                 }
548             }
549             
550             // Dump info
551             if (spinning && _dumpable!=null)
552             {
553                 System.err.println(_dumpable.dump());
554             }
555 
556             // Log stack traces for spinning threads with positive trace count
557             for (ThreadMonitorInfo info : all)
558             {
559                 if (info.isSpinning() && info.getTraceCount() >= 0)
560                 {
561                     String message = String.format(format,
562                             info.getThreadId(), info.getThreadName(), 
563                             info.getThreadState(), info.getCpuUtilization(),
564                             " STACK TRACE");
565                     _logger.warn(new ThreadMonitorException(message, info.getStackTrace()));
566                 }
567             }
568         }
569     }
570 
571     /* ------------------------------------------------------------ */
572     /**
573      * Match stack traces.
574      *
575      * @param lastStackTrace last stack trace
576      * @param stackTrace current stack trace
577      * @return true, if successful
578      */
579     private boolean matchStackTraces(StackTraceElement[] lastStackTrace, StackTraceElement[] stackTrace)
580     {
581         boolean match = true;
582         int count = Math.min(_stackDepth, Math.min(lastStackTrace.length, stackTrace.length));
583         
584         for (int idx=0; idx < count; idx++)
585         {
586             if (!stackTrace[idx].equals(lastStackTrace[idx]))
587             {
588                 match = false;
589                 break;
590             }
591         }
592         return match;
593     }
594 }