1 // ========================================================================
2 // Copyright (c) Webtide LLC
3 // ------------------------------------------------------------------------
4 // All rights reserved. This program and the accompanying materials
5 // are made available under the terms of the Eclipse Public License v1.0
6 // and Apache License v2.0 which accompanies this distribution.
7 // The Eclipse Public License is available at
8 // http://www.eclipse.org/legal/epl-v10.html
9 // The Apache License v2.0 is available at
10 // http://www.opensource.org/licenses/apache2.0.php
11 // You may elect to redistribute this code under either of these licenses.
12 // ========================================================================
13
14
15 package org.eclipse.jetty.monitor;
16
17 import java.lang.management.ManagementFactory;
18 import java.lang.management.ThreadMXBean;
19 import java.util.ArrayList;
20 import java.util.Collections;
21 import java.util.Comparator;
22 import java.util.HashMap;
23 import java.util.List;
24 import java.util.Map;
25
26 import org.eclipse.jetty.monitor.thread.ThreadMonitorException;
27 import org.eclipse.jetty.monitor.thread.ThreadMonitorInfo;
28 import org.eclipse.jetty.util.component.AbstractLifeCycle;
29 import org.eclipse.jetty.util.component.Dumpable;
30 import org.eclipse.jetty.util.log.Log;
31 import org.eclipse.jetty.util.log.Logger;
32 import org.eclipse.jetty.util.log.Logger;
33
34
35 /* ------------------------------------------------------------ */
36 public class ThreadMonitor extends AbstractLifeCycle implements Runnable
37 {
38 private static final Logger LOG = Log.getLogger(ThreadMonitor.class);
39
40 private int _scanInterval;
41 private int _logInterval;
42 private int _busyThreshold;
43 private int _logThreshold;
44 private int _stackDepth;
45 private int _trailLength;
46
47 private ThreadMXBean _threadBean;
48
49 private Thread _runner;
50 private Logger _logger;
51 private volatile boolean _done = true;
52 private Dumpable _dumpable;
53
54 private Map<Long,ThreadMonitorInfo> _monitorInfo;
55
56 /* ------------------------------------------------------------ */
57 /**
58 * Instantiates a new thread monitor.
59 *
60 * @throws Exception
61 */
62 public ThreadMonitor() throws Exception
63 {
64 this(5000);
65 }
66
67 /* ------------------------------------------------------------ */
68 /**
69 * Instantiates a new thread monitor.
70 *
71 * @param intervalMs scan interval
72 * @throws Exception
73 */
74 public ThreadMonitor(int intervalMs) throws Exception
75 {
76 this(intervalMs, 95);
77 }
78
79 /* ------------------------------------------------------------ */
80 /**
81 * Instantiates a new thread monitor.
82 *
83 * @param intervalMs scan interval
84 * @param threshold busy threshold
85 * @throws Exception
86 */
87 public ThreadMonitor(int intervalMs, int threshold) throws Exception
88 {
89 this(intervalMs, threshold, 3);
90 }
91
92 /* ------------------------------------------------------------ */
93 /**
94 * Instantiates a new thread monitor.
95 *
96 * @param intervalMs scan interval
97 * @param threshold busy threshold
98 * @param depth stack compare depth
99 * @throws Exception
100 */
101 public ThreadMonitor(int intervalMs, int threshold, int depth) throws Exception
102 {
103 this(intervalMs, threshold, depth, 3);
104 }
105
106 /* ------------------------------------------------------------ */
107 /**
108 * Instantiates a new thread monitor.
109 *
110 * @param intervalMs scan interval
111 * @param threshold busy threshold
112 * @param depth stack compare depth
113 * @param trail length of stack trail
114 * @throws Exception
115 */
116 public ThreadMonitor(int intervalMs, int threshold, int depth, int trail) throws Exception
117 {
118 _scanInterval = intervalMs;
119 _busyThreshold = threshold;
120 _stackDepth = depth;
121 _trailLength = trail;
122
123 _logger = Log.getLogger(ThreadMonitor.class.getName());
124 _monitorInfo = new HashMap<Long, ThreadMonitorInfo>();
125
126 init();
127 }
128
129 /* ------------------------------------------------------------ */
130 /**
131 * Gets the scan interval.
132 *
133 * @return the scan interval
134 */
135 public int getScanInterval()
136 {
137 return _scanInterval;
138 }
139
140 /* ------------------------------------------------------------ */
141 /**
142 * Sets the scan interval.
143 *
144 * @param ms the new scan interval
145 */
146 public void setScanInterval(int ms)
147 {
148 _scanInterval = ms;
149 }
150
151 /* ------------------------------------------------------------ */
152 /**
153 * Gets the log interval.
154 *
155 * @return the log interval
156 */
157 public int getLogInterval()
158 {
159 return _logInterval;
160 }
161
162 /* ------------------------------------------------------------ */
163 /**
164 * Sets the log interval.
165 *
166 * @param ms the new log interval
167 */
168 public void setLogInterval(int ms)
169 {
170 _logInterval = ms;
171 }
172
173 /* ------------------------------------------------------------ */
174 /**
175 * Gets the busy threshold.
176 *
177 * @return the busy threshold
178 */
179 public int getBusyThreshold()
180 {
181 return _busyThreshold;
182 }
183
184 /* ------------------------------------------------------------ */
185 /**
186 * Sets the busy threshold.
187 *
188 * @param percent the new busy threshold
189 */
190 public void setBusyThreshold(int percent)
191 {
192 _busyThreshold = percent;
193 }
194
195 /* ------------------------------------------------------------ */
196 /**
197 * Gets the log threshold.
198 *
199 * @return the log threshold
200 */
201 public int getLogThreshold()
202 {
203 return _logThreshold;
204 }
205
206 /* ------------------------------------------------------------ */
207 /**
208 * Sets the log threshold.
209 *
210 * @param percent the new log threshold
211 */
212 public void setLogThreshold(int percent)
213 {
214 _logThreshold = percent;
215 }
216
217 /* ------------------------------------------------------------ */
218 /**
219 * Gets the stack depth.
220 *
221 * @return the stack depth
222 */
223 public int getStackDepth()
224 {
225 return _stackDepth;
226 }
227
228 /* ------------------------------------------------------------ */
229 /**
230 * Sets the stack depth.
231 *
232 * @param stackDepth the new stack depth
233 */
234 public void setStackDepth(int stackDepth)
235 {
236 _stackDepth = stackDepth;
237 }
238
239 /* ------------------------------------------------------------ */
240 /**
241 * Sets the stack trace trail length.
242 *
243 * @param trailLength the new trail length
244 */
245 public void setTrailLength(int trailLength)
246 {
247 _trailLength = trailLength;
248 }
249
250 /* ------------------------------------------------------------ */
251 /**
252 * Gets the stack trace trail length.
253 *
254 * @return the trail length
255 */
256 public int getTrailLength()
257 {
258 return _trailLength;
259 }
260
261 /* ------------------------------------------------------------ */
262 /**
263 * Enable logging of CPU usage.
264 *
265 * @param frequencyMs the logging frequency
266 * @param thresholdPercent the logging threshold
267 */
268 public void logCpuUsage(int frequencyMs, int thresholdPercent)
269 {
270 setLogInterval(frequencyMs);
271 setLogThreshold(thresholdPercent);
272 }
273
274 /* ------------------------------------------------------------ */
275 /**
276 * @return A {@link Dumpable} that is dumped whenever spinning threads are detected
277 */
278 public Dumpable getDumpable()
279 {
280 return _dumpable;
281 }
282
283 /* ------------------------------------------------------------ */
284 /**
285 * @param dumpable A {@link Dumpable} that is dumped whenever spinning threads are detected
286 */
287 public void setDumpable(Dumpable dumpable)
288 {
289 _dumpable = dumpable;
290 }
291
292 /* ------------------------------------------------------------ */
293 /**
294 * @see org.eclipse.jetty.util.component.AbstractLifeCycle#doStart()
295 */
296 public void doStart()
297 {
298 _done = false;
299
300 _runner = new Thread(this);
301 _runner.setDaemon(true);
302 _runner.start();
303
304 LOG.info("Thread Monitor started successfully");
305 }
306
307 /* ------------------------------------------------------------ */
308 /**
309 * @see org.eclipse.jetty.util.component.AbstractLifeCycle#doStop()
310 */
311 public void doStop()
312 {
313 if (_runner != null)
314 {
315 _done = true;
316 try
317 {
318 _runner.join();
319 }
320 catch (InterruptedException ex) {}
321 }
322 }
323
324 /* ------------------------------------------------------------ */
325 /**
326 * Retrieve all avaliable thread ids
327 *
328 * @return array of thread ids
329 */
330 protected long[] getAllThreadIds()
331 {
332 return _threadBean.getAllThreadIds();
333 }
334
335 /* ------------------------------------------------------------ */
336 /**
337 * Retrieve the cpu time for specified thread.
338 *
339 * @param id thread id
340 * @return cpu time of the thread
341 */
342 protected long getThreadCpuTime(long id)
343 {
344 return _threadBean.getThreadCpuTime(id);
345 }
346
347 /* ------------------------------------------------------------ */
348 /**
349 * Initialize JMX objects.
350 */
351 protected void init()
352 {
353 _threadBean = ManagementFactory.getThreadMXBean();
354 if (_threadBean.isThreadCpuTimeSupported())
355 {
356 _threadBean.setThreadCpuTimeEnabled(true);
357 }
358 }
359
360 /* ------------------------------------------------------------ */
361 /**
362 * @see java.lang.Runnable#run()
363 */
364 public void run()
365 {
366 // Initialize repeat flag
367 boolean repeat = false;
368 boolean scanNow, logNow;
369
370 // Set next scan time and log time
371 long nextScanTime = System.currentTimeMillis();
372 long nextLogTime = nextScanTime + _logInterval;
373
374 while (!_done)
375 {
376 long currTime = System.currentTimeMillis();
377 scanNow = (currTime > nextScanTime);
378 logNow = (_logInterval > 0 && currTime > nextLogTime);
379 if (repeat || scanNow || logNow)
380 {
381 repeat = collectThreadInfo();
382 logThreadInfo(logNow);
383
384 if (scanNow)
385 {
386 nextScanTime = System.currentTimeMillis() + _scanInterval;
387 }
388 if (logNow)
389 {
390 nextLogTime = System.currentTimeMillis() + _logInterval;
391 }
392 }
393
394 // Sleep only if not going to repeat scanning immediately
395 if (!repeat)
396 {
397 try
398 {
399 Thread.sleep(100);
400 }
401 catch (InterruptedException ex)
402 {
403 LOG.ignore(ex);
404 }
405 }
406 }
407
408 }
409
410 /* ------------------------------------------------------------ */
411 /**
412 * Collect thread info.
413 */
414 private boolean collectThreadInfo()
415 {
416 boolean repeat = false;
417 try
418 {
419 // Retrieve stack traces for all threads at once as it
420 // was proven to be an order of magnitude faster when
421 // retrieving a single thread stack trace.
422 Map<Thread,StackTraceElement[]> all = Thread.getAllStackTraces();
423
424 for (Map.Entry<Thread,StackTraceElement[]> entry : all.entrySet())
425 {
426 Thread thread = entry.getKey();
427 long threadId = thread.getId();
428
429 // Skip our own runner thread
430 if (threadId == _runner.getId())
431 {
432 continue;
433 }
434
435 ThreadMonitorInfo currMonitorInfo = _monitorInfo.get(Long.valueOf(threadId));
436 if (currMonitorInfo == null)
437 {
438 // Create thread info object for a new thread
439 currMonitorInfo = new ThreadMonitorInfo(thread);
440 currMonitorInfo.setStackTrace(entry.getValue());
441 currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
442 currMonitorInfo.setSampleTime(System.nanoTime());
443 _monitorInfo.put(Long.valueOf(threadId), currMonitorInfo);
444 }
445 else
446 {
447 // Update the existing thread info object
448 currMonitorInfo.setStackTrace(entry.getValue());
449 currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
450 currMonitorInfo.setSampleTime(System.nanoTime());
451
452 // Stack trace count holds logging state
453 int count = currMonitorInfo.getTraceCount();
454 if (count >= 0 && currMonitorInfo.isSpinning())
455 {
456 // Thread was spinning and was logged before
457 if (count < _trailLength)
458 {
459 // Log another stack trace
460 currMonitorInfo.setTraceCount(count+1);
461 repeat = true;
462 continue;
463 }
464
465 // Reset spin flag and trace count
466 currMonitorInfo.setSpinning(false);
467 currMonitorInfo.setTraceCount(-1);
468 }
469 if (currMonitorInfo.getCpuUtilization() > _busyThreshold)
470 {
471 // Thread is busy
472 StackTraceElement[] lastStackTrace = currMonitorInfo.getStackTrace();
473
474 if (lastStackTrace != null
475 && matchStackTraces(lastStackTrace, entry.getValue()))
476 {
477 // Thread is spinning
478 currMonitorInfo.setSpinning(true);
479 if (count < 0)
480 {
481 // Enable logging of spin status and stack traces
482 // only if the incoming trace count is negative
483 // that indicates a new scan for this thread
484 currMonitorInfo.setTraceCount(0);
485 repeat = (_trailLength > 0);
486 }
487 }
488 }
489 }
490 }
491 }
492 catch (Exception ex)
493 {
494 LOG.debug(ex);
495 }
496 return repeat;
497 }
498
499 /* ------------------------------------------------------------ */
500 protected void logThreadInfo(boolean logAll)
501 {
502 if (_monitorInfo.size() > 0)
503 {
504 // Select thread objects for all live threads
505 long[] running = getAllThreadIds();
506 List<ThreadMonitorInfo> all = new ArrayList<ThreadMonitorInfo>();
507 for (int idx=0; idx<running.length; idx++)
508 {
509 ThreadMonitorInfo info = _monitorInfo.get(running[idx]);
510 if (info != null)
511 {
512 all.add(info);
513 }
514 }
515
516 // Sort selected thread objects by their CPU utilization
517 Collections.sort(all, new Comparator<ThreadMonitorInfo>()
518 {
519 /* ------------------------------------------------------------ */
520 public int compare(ThreadMonitorInfo info1, ThreadMonitorInfo info2)
521 {
522 return (int)Math.signum(info2.getCpuUtilization()-info1.getCpuUtilization());
523 }
524 });
525
526 String format = "Thread '%2$s'[%3$s,id:%1$d,cpu:%4$.2f%%]%5$s";
527
528 // Log thread information for threads that exceed logging threshold
529 // or log spinning threads if their trace count is zero
530 boolean spinning=false;
531 for (ThreadMonitorInfo info : all)
532 {
533 if (logAll && info.getCpuUtilization() > _logThreshold
534 || info.isSpinning() && info.getTraceCount() == 0)
535 {
536 String message = String.format(format,
537 info.getThreadId(), info.getThreadName(),
538 info.getThreadState(), info.getCpuUtilization(),
539 info.isSpinning() ? " SPINNING" : "");
540 _logger.info(message);
541 spinning=true;
542 }
543 }
544
545 // Dump info
546 if (spinning && _dumpable!=null)
547 {
548 System.err.println(_dumpable.dump());
549 }
550
551 // Log stack traces for spinning threads with positive trace count
552 for (ThreadMonitorInfo info : all)
553 {
554 if (info.isSpinning() && info.getTraceCount() >= 0)
555 {
556 String message = String.format(format,
557 info.getThreadId(), info.getThreadName(),
558 info.getThreadState(), info.getCpuUtilization(),
559 " STACK TRACE");
560 _logger.warn(new ThreadMonitorException(message, info.getStackTrace()));
561 }
562 }
563 }
564 }
565
566 /* ------------------------------------------------------------ */
567 /**
568 * Match stack traces.
569 *
570 * @param lastStackTrace last stack trace
571 * @param stackTrace current stack trace
572 * @return true, if successful
573 */
574 private boolean matchStackTraces(StackTraceElement[] lastStackTrace, StackTraceElement[] stackTrace)
575 {
576 boolean match = true;
577 int count = Math.min(_stackDepth, Math.min(lastStackTrace.length, stackTrace.length));
578
579 for (int idx=0; idx < count; idx++)
580 {
581 if (!stackTrace[idx].equals(lastStackTrace[idx]))
582 {
583 match = false;
584 break;
585 }
586 }
587 return match;
588 }
589 }