Line data Source code
1 : /*******************************************************************\
2 :
3 : Name: msysmon.cxx
4 : Created by: J.T.K.McKenna
5 :
6 : Contents: Front end for monitoring CPU and Memory usage with MIDAS
7 : *
8 : * Parse /proc/stat and /proc/memstat like htop
9 : *
10 : * Equipment names are assiged by the local hostname, so run an
11 : * instance for each system you want to monitor... eg:
12 : * ssh mydaq msysmon
13 : * ssh myvme msysmon
14 : * ssh mypi msysmon
15 :
16 : \********************************************************************/
17 :
18 : #ifndef PROCSTATFILE
19 : #define PROCSTATFILE "/proc/stat"
20 : #endif
21 :
22 : #ifndef PROCMEMINFOFILE
23 : #define PROCMEMINFOFILE "/proc/meminfo"
24 : #endif
25 :
26 : #ifndef PROCNETSTATFILE
27 : #define PROCNETSTATFILE "/proc/net/dev"
28 : #endif
29 :
30 : #define String_startsWith(s, match) (strstr((s), (match)) == (s))
31 :
32 : #undef NDEBUG // midas required assert() to be always enabled
33 :
34 : #include <stdio.h>
35 : #include <stdlib.h>
36 : #include <unistd.h>
37 : #include <stdint.h>
38 : #include <sys/time.h>
39 : #include <sys/types.h>
40 : #include <sys/stat.h>
41 : #include <fcntl.h>
42 : #include <errno.h>
43 : #include <math.h>
44 : #include <ctype.h>
45 : #include <assert.h>
46 : #include <string.h>
47 : #include <iostream>
48 : #include "midas.h"
49 : #include "mfe.h"
50 : #include "mstrlcpy.h"
51 :
52 : #ifdef HAVE_LM_SENSORS
53 : #include <sensors/sensors.h>
54 : #endif
55 :
56 : /*-- Globals -------------------------------------------------------*/
57 :
58 : /* The frontend name (client name) as seen by other MIDAS clients */
59 : #ifdef HAVE_LM_SENSORS
60 : const char *frontend_name = "msysmon-lmsensors";
61 : #else
62 : const char *frontend_name = "msysmon";
63 : #endif
64 : /* The frontend file name, don't change it */
65 : const char *frontend_file_name = __FILE__;
66 :
67 : /* frontend_loop is called periodically if this variable is TRUE */
68 : BOOL frontend_call_loop = TRUE;
69 :
70 : /* a frontend status page is displayed with this frequency in ms */
71 : //INT display_period = 3000;
72 : INT display_period = 0;
73 :
74 : /* maximum event size produced by this frontend */
75 : INT max_event_size = 4*1024*1024;
76 : INT max_event_size_frag = 4*1024*1024;
77 :
78 : /* buffer size to hold events */
79 : INT event_buffer_size = 10*1024*1024;
80 :
81 : /*-- Function declarations -----------------------------------------*/
82 :
83 : INT frontend_init();
84 : INT frontend_exit();
85 : INT begin_of_run(INT run_number, char *error);
86 : INT end_of_run(INT run_number, char *error);
87 : INT pause_run(INT run_number, char *error);
88 : INT resume_run(INT run_number, char *error);
89 : INT frontend_loop();
90 : INT poll_event(INT source, INT count, BOOL test);
91 : INT interrupt_configure(INT cmd, INT source, PTYPE adr);
92 :
93 : int read_system_load(char *pevent, int off);
94 :
95 : /*-- Equipment list ------------------------------------------------*/
96 :
97 : #define EVID_MONITOR 63
98 :
99 : BOOL equipment_common_overwrite = FALSE;
100 :
101 : EQUIPMENT equipment[] = {
102 :
103 : { "${HOSTNAME}_msysmon", /* equipment name */ {
104 : EVID_MONITOR, 0, /* event ID, trigger mask */
105 : "SYSTEM", /* event buffer */
106 : EQ_PERIODIC, /* equipment type */
107 : 0, /* event source */
108 : "MIDAS", /* format */
109 : TRUE, /* enabled */
110 : RO_ALWAYS | RO_ODB, /* Read when running */
111 : 10000, /* poll every so milliseconds */
112 : 0, /* stop run after this event limit */
113 : 0, /* number of sub events */
114 : 1, /* history period */
115 : "", "", ""
116 : },
117 : read_system_load,/* readout routine */
118 : },
119 : { "" }
120 : };
121 :
122 : // Not all items in struct are logged, but all are calculated
123 : // leaving options to log more if we want to...
124 : typedef struct CPUData_ {
125 : unsigned long long int totalTime;
126 : unsigned long long int userTime;
127 : unsigned long long int systemTime;
128 : unsigned long long int systemAllTime;
129 : unsigned long long int idleAllTime;
130 : unsigned long long int idleTime;
131 : unsigned long long int niceTime;
132 : unsigned long long int ioWaitTime;
133 : unsigned long long int irqTime;
134 : unsigned long long int softIrqTime;
135 : unsigned long long int stealTime;
136 : unsigned long long int guestTime;
137 : unsigned long long int totalPeriod;
138 : unsigned long long int userPeriod;
139 : unsigned long long int systemPeriod;
140 : unsigned long long int systemAllPeriod;
141 : unsigned long long int idleAllPeriod;
142 : unsigned long long int idlePeriod;
143 : unsigned long long int nicePeriod;
144 : unsigned long long int ioWaitPeriod;
145 : unsigned long long int irqPeriod;
146 : unsigned long long int softIrqPeriod;
147 : unsigned long long int stealPeriod;
148 : unsigned long long int guestPeriod;
149 : } CPUData;
150 : int cpuCount;
151 : std::vector<CPUData*> cpus;
152 : void ReadCPUData();
153 : unsigned long long int usertime, nicetime, systemtime, idletime;
154 :
155 :
156 : struct NetStat
157 : {
158 : std::string face;
159 : unsigned long int bytes;
160 : unsigned long int packets;
161 : unsigned long int errs;
162 : unsigned long int drop;
163 : unsigned long int fifo;
164 : unsigned long int frame;
165 : unsigned long int compressed;
166 : unsigned long int multicast;
167 : unsigned long int bytesPeriod;
168 : unsigned long int packetsPeriod;
169 : unsigned long int errsPeriod;
170 : unsigned long int dropPeriod;
171 : unsigned long int fifoPeriod;
172 : unsigned long int framePeriod;
173 : unsigned long int compressedPeriod;
174 : unsigned long int multicastPeriod;
175 : timeval tv; //Time of these integrated values
176 : };
177 : int networkInterfaceCount=0;
178 : std::vector<NetStat*> NetReceive;
179 : std::vector<NetStat*> NetTransmit;
180 : void ReadNetData();
181 :
182 :
183 : #ifdef HAVE_NVIDIA
184 : #include "nvml.h"
185 :
186 : enum feature {
187 : TEMPERATURE = 1 << 0,
188 : COMPUTE_MODE = 1 << 1,
189 : POWER_USAGE = 1 << 2,
190 : MEMORY_INFO = 1 << 3,
191 : CLOCK_INFO = 1 << 4,
192 : FAN_INFO = 1 << 5,
193 : UTILIZATION_INFO = 1 << 6
194 : };
195 : struct GPU {
196 : unsigned index;
197 :
198 : nvmlDevice_t handle;
199 :
200 : nvmlPciInfo_t pci;
201 : nvmlComputeMode_t compute_mode;
202 : nvmlMemory_t memory;
203 : nvmlEventSet_t event_set;
204 : // Current device resource utilization rates (as percentages)
205 : nvmlUtilization_t util;
206 :
207 : // In Celsius
208 : unsigned temperature;
209 :
210 : // In milliwatts
211 : unsigned power_usage;
212 :
213 : // Maximum clock speeds, in MHz
214 : nvmlClockType_t clock[NVML_CLOCK_COUNT], max_clock[NVML_CLOCK_COUNT];
215 :
216 : // Fan speed, percentage
217 : unsigned fan;
218 :
219 : char name[NVML_DEVICE_NAME_BUFFER_SIZE];
220 : char serial[NVML_DEVICE_SERIAL_BUFFER_SIZE];
221 : char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];
222 :
223 : // Bitmask of enum feature
224 : unsigned feature_support;
225 : };
226 : unsigned nGPUs=HAVE_NVIDIA;
227 : std::vector<GPU*> GPUs;
228 :
229 : // Return string representation of return code
230 : // Strings are directly from NVML documentation
231 :
232 : const char* nvml_error_code_string(nvmlReturn_t ret)
233 : {
234 : switch(ret) {
235 : case NVML_SUCCESS:
236 : return "The operation was successful";
237 : case NVML_ERROR_UNINITIALIZED:
238 : return "was not first initialized with nvmlInit()";
239 : case NVML_ERROR_INVALID_ARGUMENT:
240 : return "A supplied argument is invalid";
241 : case NVML_ERROR_NOT_SUPPORTED:
242 : return "The requested operation is not available on target device";
243 : case NVML_ERROR_NO_PERMISSION:
244 : return "The current user does not have permission for operation";
245 : case NVML_ERROR_ALREADY_INITIALIZED:
246 : return"Deprecated: Multiple initializations are now allowed through ref counting";
247 : case NVML_ERROR_NOT_FOUND:
248 : return "A query to find an object was unsuccessful";
249 : case NVML_ERROR_INSUFFICIENT_SIZE:
250 : return "An input argument is not large enough";
251 : case NVML_ERROR_INSUFFICIENT_POWER:
252 : return "A device’s external power cables are not properly attached";
253 : case NVML_ERROR_DRIVER_NOT_LOADED:
254 : return "NVIDIA driver is not loaded";
255 : case NVML_ERROR_TIMEOUT:
256 : return "User provided timeout passed";
257 : case NVML_ERROR_IRQ_ISSUE:
258 : return "NVIDIA Kernel detected an interrupt issue with a GPU";
259 : case NVML_ERROR_LIBRARY_NOT_FOUND:
260 : return "NVML Shared Library couldn’t be found or loaded";
261 : case NVML_ERROR_FUNCTION_NOT_FOUND:
262 : return"Local version of NVML doesn’t implement this function";
263 : case NVML_ERROR_CORRUPTED_INFOROM:
264 : return "infoROM is corrupted";
265 : case NVML_ERROR_GPU_IS_LOST:
266 : return "The GPU has fallen off the bus or has otherwise become inaccessible.";
267 : case NVML_ERROR_RESET_REQUIRED:
268 : return "The GPU requires a reset before it can be used again";
269 : case NVML_ERROR_OPERATING_SYSTEM:
270 : return "The GPU control device has been blocked by the operating system/cgroups.";
271 : case NVML_ERROR_LIB_RM_VERSION_MISMATCH:
272 : return "RM detects a driver/library version mismatch.";
273 : case NVML_ERROR_IN_USE:
274 : return "An operation cannot be performed because the GPU is currently in use.";
275 : case NVML_ERROR_MEMORY:
276 : return "Insufficient memory.";
277 : case NVML_ERROR_NO_DATA:
278 : return "No data.";
279 : case NVML_ERROR_VGPU_ECC_NOT_SUPPORTED:
280 : return "The requested vgpu operation is not available on target device, becasue ECC is enabled.";
281 : case NVML_ERROR_UNKNOWN:
282 : return "An internal driver error occurred";
283 : }
284 :
285 : return "Unknown error";
286 : }
287 :
288 :
289 : // Simple wrapper function to remove boiler plate code of checking
290 : // NVML API return codes.
291 : //
292 : // Returns non-zero on error, 0 otherwise
293 : static inline int nvml_try(nvmlReturn_t ret, const char* fn)
294 : {
295 : // We ignore the TIMEOUT error, as it simply indicates that
296 : // no events (errors) were triggered in the given interval.
297 : if(ret != NVML_SUCCESS && ret != NVML_ERROR_TIMEOUT) {
298 : fprintf(stderr, "%s: %s: %s\n", fn, nvml_error_code_string(ret),
299 : nvmlErrorString(ret));
300 : return 1;
301 : }
302 :
303 : return 0;
304 : }
305 :
306 : #define NVML_TRY(code) nvml_try(code, #code)
307 :
308 : #endif
309 :
310 : //Cycle through these 16 colours when installing History graphs
311 : std::string colours[16]={
312 : "#00AAFF", "#FF9000", "#FF00A0", "#00C030",
313 : "#A0C0D0", "#D0A060", "#C04010", "#807060",
314 : "#F0C000", "#2090A0", "#D040D0", "#90B000",
315 : "#B0B040", "#B0B0FF", "#FFA0A0", "#A0FFA0"};
316 :
317 :
318 : #ifdef HAVE_LM_SENSORS
319 : class LM_Sensors
320 : {
321 : private:
322 : class MySensor
323 : {
324 : private:
325 : const std::string SensorName;
326 : const std::string FeatureName;
327 : const sensors_chip_name* cn;
328 : const int number;
329 : public:
330 : MySensor(const std::string _SensorName, const std::string _name, const sensors_chip_name* _cn, int _number):
331 : SensorName(_SensorName), FeatureName(_name), cn(_cn), number(_number)
332 : {
333 : }
334 : double GetValue()
335 : {
336 : double value;
337 : sensors_get_value(this->cn,this->number,&value);
338 : return value;
339 : }
340 : std::string GetFullName(size_t limit)
341 : {
342 : std::string fullname = SensorName + "(" + FeatureName + ")";
343 : if (fullname.size()> limit)
344 : return fullname.substr(0,limit-1);
345 : return fullname;
346 : }
347 : };
348 : int status;
349 : std::vector<MySensor*> Temperatures;
350 : std::vector<MySensor*> Fans;
351 :
352 : public:
353 : LM_Sensors()
354 : {
355 : //FILE* = fopen("");
356 : status = sensors_init(NULL);
357 : if (status!=0)
358 : {
359 : printf("Issue with sensors\n");
360 : exit(status);
361 : }
362 : int nr = 0;
363 : while (const sensors_chip_name* cn = sensors_get_detected_chips(0, &nr))
364 : {
365 : int fnr = 0;
366 : while (const sensors_feature * cf = sensors_get_features(cn,&fnr))
367 : {
368 : int sfnr = 0;
369 : //std::cout << sensors_get_label(cn,cf) <<"\t";// <<std::endl;
370 : while (const sensors_subfeature * scf = sensors_get_all_subfeatures(cn , cf, &sfnr))
371 : {
372 : //For more sensor subfeature types, see full list:
373 : //https://github.com/lm-sensors/lm-sensors/blob/master/lib/sensors.h
374 : if (scf->type == SENSORS_SUBFEATURE_TEMP_INPUT )
375 : {
376 : Temperatures.push_back(
377 : new MySensor(
378 : sensors_get_label(cn,cf),
379 : scf->name,
380 : cn,
381 : scf->number)
382 : );
383 : }
384 : if (scf->type == SENSORS_SUBFEATURE_FAN_INPUT )
385 : {
386 : Fans.push_back(
387 : new MySensor(
388 : sensors_get_label(cn,cf),
389 : scf->name,
390 : cn,
391 : scf->number)
392 : );
393 : }
394 : }
395 : }
396 : }
397 : }
398 :
399 : void BuildHostTemperaturePlot()
400 : {
401 : //Insert per Temperature monitor graphs into the history
402 : int status, size;
403 : char path[256];
404 : int NVARS=Temperatures.size();
405 : /////////////////////////////////////////////////////
406 : // Setup variables to plot:
407 : /////////////////////////////////////////////////////
408 : size = 64;
409 : sprintf(path,"/History/Display/msysmon/%s-Temperature/Variables",equipment[0].info.frontend_host);
410 : {
411 : char vars[size*NVARS];
412 : memset(vars, 0, size*NVARS);
413 : for (int i=0; i<NVARS; i++)
414 : {
415 : sprintf(vars+size*i,"%s/TEMP:TEMP[%d]",equipment[0].name,i);
416 : }
417 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
418 : }
419 : assert(status == DB_SUCCESS);
420 :
421 : /////////////////////////////////////////////////////
422 : // Setup labels
423 : /////////////////////////////////////////////////////
424 : size = 32;
425 : sprintf(path,"/History/Display/msysmon/%s-Temperature/Label",equipment[0].info.frontend_host);
426 : {
427 : char vars[size*NVARS];
428 : memset(vars, 0, size*NVARS);
429 : for (int i=0; i<NVARS; i++)
430 : sprintf(vars+size*i,Temperatures.at(i)->GetFullName(size).c_str(),i+1);
431 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
432 : }
433 : assert(status == DB_SUCCESS);
434 :
435 : /////////////////////////////////////////////////////
436 : // Setup colours:
437 : /////////////////////////////////////////////////////
438 : size = 32;
439 : sprintf(path,"/History/Display/msysmon/%s-Temperature/Colour",equipment[0].info.frontend_host);
440 : {
441 : char vars[size*NVARS];
442 : memset(vars, 0, size*NVARS);
443 : for (int i=0; i<NVARS; i++)
444 : sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
445 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
446 : }
447 : assert(status == DB_SUCCESS);
448 :
449 : /////////////////////////////////////////////////////
450 : // Setup time scale and range:
451 : /////////////////////////////////////////////////////
452 : sprintf(path,"/History/Display/msysmon/%s-Temperature/Timescale",equipment[0].info.frontend_host);
453 : status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
454 : double *m=new double();
455 : *m=0.;
456 : sprintf(path,"/History/Display/msysmon/%s-Temperature/Minimum",equipment[0].info.frontend_host);
457 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
458 : *m=100.;
459 : sprintf(path,"/History/Display/msysmon/%s-Temperature/Maximum",equipment[0].info.frontend_host);
460 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
461 : delete m;
462 : }
463 :
464 : char* ReadAndLogSensors(char* pevent)
465 : {
466 : //If sensors_init failed, do nothing
467 : if (status!=0)
468 : return pevent;
469 :
470 : double* v;
471 :
472 : if (Temperatures.size())
473 : {
474 : bk_create(pevent, "TEMP", TID_DOUBLE, (void**)&v);
475 : for ( MySensor* s: Temperatures)
476 : {
477 : *v = s->GetValue();
478 : v++;
479 : }
480 : bk_close(pevent,v);
481 : }
482 : if (Fans.size())
483 : {
484 : bk_create(pevent, "FANS", TID_DOUBLE, (void**)&v);
485 : for ( MySensor* s: Fans)
486 : {
487 : *v = s->GetValue();
488 : v++;
489 : }
490 : bk_close(pevent,v);
491 : }
492 : return pevent;
493 : }
494 :
495 : };
496 :
497 : LM_Sensors* sensors = NULL;
498 : #endif
499 :
500 :
501 : /********************************************************************\
502 : Callback routines for system transitions
503 :
504 : These routines are called whenever a system transition like start/
505 : stop of a run occurs. The routines are called on the following
506 : occations:
507 :
508 : frontend_init: When the frontend program is started. This routine
509 : should initialize the hardware.
510 :
511 : frontend_exit: When the frontend program is shut down. Can be used
512 : to releas any locked resources like memory, commu-
513 : nications ports etc.
514 :
515 : begin_of_run: When a new run is started. Clear scalers, open
516 : rungates, etc.
517 :
518 : end_of_run: Called on a request to stop a run. Can send
519 : end-of-run event and close run gates.
520 :
521 : pause_run: When a run is paused. Should disable trigger events.
522 :
523 : resume_run: When a run is resumed. Should enable trigger events.
524 :
525 : \********************************************************************/
526 :
527 : int event_size = 10*1024;
528 :
529 : /*-- Frontend Init -------------------------------------------------*/
530 :
531 : HNDLE hSet;
532 : int test_rb_wait_sleep = 1;
533 :
534 : // RPC handler
535 :
536 0 : INT rpc_callback(INT index, void *prpc_param[])
537 : {
538 0 : const char* cmd = CSTRING(0);
539 0 : const char* args = CSTRING(1);
540 0 : char* return_buf = CSTRING(2);
541 0 : int return_max_length = CINT(3);
542 :
543 0 : cm_msg(MINFO, "rpc_callback", "--------> rpc_callback: index %d, max_length %d, cmd [%s], args [%s]", index, return_max_length, cmd, args);
544 :
545 : //int example_int = strtol(args, NULL, 0);
546 : //int size = sizeof(int);
547 : //int status = db_set_value(hDB, 0, "/Equipment/" EQ_NAME "/Settings/example_int", &example_int, size, 1, TID_INT);
548 :
549 : char tmp[256];
550 0 : time_t now = time(NULL);
551 0 : sprintf(tmp, "{ \"current_time\" : [ %d, \"%s\"] }", (int)now, ctime(&now));
552 :
553 0 : mstrlcpy(return_buf, tmp, return_max_length);
554 :
555 0 : return RPC_SUCCESS;
556 : }
557 :
558 :
559 :
560 : #include "msystem.h"
561 :
562 0 : void BuildHostHistoryPlot()
563 : {
564 : //Insert myself into the history
565 :
566 : char path[256];
567 : int status;
568 : int size;
569 0 : int NVARS=5;
570 :
571 : /////////////////////////////////////////////////////
572 : // Setup variables to plot:
573 : /////////////////////////////////////////////////////
574 0 : size = 64; // String length in ODB
575 0 : sprintf(path,"/History/Display/msysmon/%s/Variables",equipment[0].info.frontend_host);
576 : {
577 0 : char vars[size*NVARS];
578 0 : memset(vars, 0, size*NVARS);
579 0 : sprintf(vars+size*0,"%s:LOAD[%d]",equipment[0].name,0);
580 0 : sprintf(vars+size*1,"%s:LOAD[%d]",equipment[0].name,1);
581 0 : sprintf(vars+size*2,"%s:LOAD[%d]",equipment[0].name,2);
582 0 : sprintf(vars+size*3,"%s:MEMP",equipment[0].name);
583 0 : sprintf(vars+size*4,"%s:SWAP",equipment[0].name);
584 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
585 0 : }
586 0 : assert(status == DB_SUCCESS);
587 :
588 : /////////////////////////////////////////////////////
589 : // Setup labels
590 : /////////////////////////////////////////////////////
591 0 : size = 32;
592 0 : sprintf(path,"/History/Display/msysmon/%s/Label",equipment[0].info.frontend_host);
593 : {
594 0 : char vars[size*NVARS];
595 0 : memset(vars, 0, size*NVARS);
596 0 : sprintf(vars+size*0,"NICE CPU Load (%%)");
597 0 : sprintf(vars+size*1,"USER CPU Load (%%)");
598 0 : sprintf(vars+size*2,"SYSTEM CPU Load (%%)");
599 0 : sprintf(vars+size*3,"Memory Usage (%%)");
600 0 : sprintf(vars+size*4,"Swap Usage (%%)");
601 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
602 0 : }
603 0 : assert(status == DB_SUCCESS);
604 :
605 : /////////////////////////////////////////////////////
606 : // Setup colours:
607 : /////////////////////////////////////////////////////
608 0 : size = 32;
609 0 : sprintf(path,"/History/Display/msysmon/%s/Colour",equipment[0].info.frontend_host);
610 : {
611 0 : char vars[size*NVARS];
612 0 : memset(vars, 0, size*NVARS);
613 0 : for (int i=0; i<NVARS; i++)
614 0 : sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
615 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
616 0 : }
617 0 : assert(status == DB_SUCCESS);
618 :
619 : /////////////////////////////////////////////////////
620 : // Setup time scale and range:
621 : /////////////////////////////////////////////////////
622 0 : sprintf(path,"/History/Display/msysmon/%s/Timescale",equipment[0].info.frontend_host);
623 0 : status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
624 0 : double *m=new double();
625 0 : *m=0.;
626 0 : sprintf(path,"/History/Display/msysmon/%s/Minimum",equipment[0].info.frontend_host);
627 0 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
628 0 : *m=100.;
629 0 : sprintf(path,"/History/Display/msysmon/%s/Maximum",equipment[0].info.frontend_host);
630 0 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
631 0 : delete m;
632 0 : }
633 :
634 0 : void BuildHostCPUPlot()
635 : {
636 : //Insert per CPU graphs into the history
637 : int status, size;
638 : char path[256];
639 0 : int NVARS=cpuCount;
640 : /////////////////////////////////////////////////////
641 : // Setup variables to plot:
642 : /////////////////////////////////////////////////////
643 0 : size = 64;
644 0 : sprintf(path,"/History/Display/msysmon/%s-CPU/Variables",equipment[0].info.frontend_host);
645 : {
646 0 : char vars[size*NVARS];
647 0 : memset(vars, 0, size*NVARS);
648 : #ifdef CLASSIC_CPU_VARS
649 : for (int i=0; i<cpuCount; i++)
650 : {
651 : int icpu=i+1;
652 : int h='0'+icpu/100;
653 : int t='0'+(icpu%100)/10;
654 : int u='0'+icpu%10;
655 : if (icpu<10)
656 : sprintf(vars+size*i,"%s:CPU%c[3]",equipment[0].name,u);
657 : else if (icpu<100)
658 : sprintf(vars+size*i,"%s:CP%c%c[3]",equipment[0].name,t,u);
659 : else if (icpu<1000)
660 : sprintf(vars+size*i,"%s:C%c%c%c[3]",equipment[0].name,h,t,u);
661 : else
662 : {
663 : cm_msg(MERROR, frontend_name, "Cannot handle a system with more than 1000 CPUs");
664 : exit(FE_ERR_HW);
665 : }
666 : }
667 : #else
668 0 : for (int i=0; i<cpuCount; i++)
669 : {
670 0 : sprintf(vars+size*i,"%s:CPUA[%d]",equipment[0].name,i);
671 : }
672 : #endif
673 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
674 0 : }
675 0 : assert(status == DB_SUCCESS);
676 :
677 : /////////////////////////////////////////////////////
678 : // Setup labels
679 : /////////////////////////////////////////////////////
680 0 : size = 32;
681 0 : sprintf(path,"/History/Display/msysmon/%s-CPU/Label",equipment[0].info.frontend_host);
682 : {
683 0 : char vars[size*NVARS];
684 0 : memset(vars, 0, size*NVARS);
685 0 : for (int i=0; i<cpuCount; i++)
686 0 : sprintf(vars+size*i,"CPU%d Load (%%)",i+1);
687 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
688 0 : }
689 0 : assert(status == DB_SUCCESS);
690 :
691 : /////////////////////////////////////////////////////
692 : // Setup colours:
693 : /////////////////////////////////////////////////////
694 0 : size = 32;
695 0 : sprintf(path,"/History/Display/msysmon/%s-CPU/Colour",equipment[0].info.frontend_host);
696 : {
697 0 : char vars[size*NVARS];
698 0 : memset(vars, 0, size*NVARS);
699 0 : for (int i=0; i<NVARS; i++)
700 0 : sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
701 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
702 0 : }
703 0 : assert(status == DB_SUCCESS);
704 : /////////////////////////////////////////////////////
705 : // Setup time scale and range:
706 : /////////////////////////////////////////////////////
707 0 : sprintf(path,"/History/Display/msysmon/%s-CPU/Timescale",equipment[0].info.frontend_host);
708 0 : status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
709 0 : double *m=new double();
710 0 : *m=0.;
711 0 : sprintf(path,"/History/Display/msysmon/%s-CPU/Minimum",equipment[0].info.frontend_host);
712 0 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
713 0 : *m=100.;
714 0 : sprintf(path,"/History/Display/msysmon/%s-CPU/Maximum",equipment[0].info.frontend_host);
715 0 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
716 0 : delete m;
717 0 : }
718 :
719 0 : void BuildHostNetPlot()
720 : {
721 : //Insert per CPU graphs into the history
722 : int status, size;
723 : char path[256];
724 0 : int NVARS=networkInterfaceCount*2;
725 : /////////////////////////////////////////////////////
726 : // Setup variables to plot:
727 : /////////////////////////////////////////////////////
728 0 : size = 64;
729 0 : sprintf(path,"/History/Display/msysmon/%s-net/Variables",equipment[0].info.frontend_host);
730 : {
731 0 : char vars[size*NVARS];
732 0 : memset(vars, 0, size*NVARS);
733 0 : for (int i=0; i<networkInterfaceCount; i++)
734 0 : sprintf(vars+size*i,"%s:NETR[%d]",equipment[0].name,i);
735 0 : for (int i=networkInterfaceCount; i<NVARS; i++)
736 0 : sprintf(vars+size*i,"%s:NETT[%d]",equipment[0].name,i-networkInterfaceCount);
737 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
738 0 : }
739 0 : assert(status == DB_SUCCESS);
740 :
741 : /////////////////////////////////////////////////////
742 : // Setup labels
743 : /////////////////////////////////////////////////////
744 0 : size = 32;
745 0 : sprintf(path,"/History/Display/msysmon/%s-net/Label",equipment[0].info.frontend_host);
746 : {
747 0 : char vars[size*NVARS];
748 0 : memset(vars, 0, size*NVARS);
749 0 : for (int i=0; i<networkInterfaceCount; i++)
750 0 : sprintf(vars+size*i,"%s Received (Mbps)",NetReceive.at(i)->face.c_str());
751 0 : for (int i=networkInterfaceCount; i<NVARS; i++)
752 0 : sprintf(vars+size*i,"%s Transmitted (Mbps)",NetReceive.at(i-networkInterfaceCount)->face.c_str());
753 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
754 0 : }
755 0 : assert(status == DB_SUCCESS);
756 :
757 : /////////////////////////////////////////////////////
758 : // Setup colours:
759 : /////////////////////////////////////////////////////
760 0 : size = 32;
761 0 : sprintf(path,"/History/Display/msysmon/%s-net/Colour",equipment[0].info.frontend_host);
762 : {
763 0 : char vars[size*NVARS];
764 0 : memset(vars, 0, size*NVARS);
765 0 : for (int i=0; i<NVARS; i++)
766 0 : sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
767 0 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
768 0 : }
769 0 : assert(status == DB_SUCCESS);
770 : /////////////////////////////////////////////////////
771 : // Setup time scale and range:
772 : /////////////////////////////////////////////////////
773 0 : sprintf(path,"/History/Display/msysmon/%s-net/Timescale",equipment[0].info.frontend_host);
774 0 : status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
775 0 : double *m=new double();
776 0 : *m=0.;
777 0 : sprintf(path,"/History/Display/msysmon/%s-net/Minimum",equipment[0].info.frontend_host);
778 0 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
779 0 : *m=1.0/0.0; //infinity
780 0 : sprintf(path,"/History/Display/msysmon/%s-net/Maximum",equipment[0].info.frontend_host);
781 0 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
782 0 : delete m;
783 0 : }
784 :
785 :
786 : #ifdef HAVE_NVIDIA
787 : void BuildHostGPUPlot()
788 : {
789 : //Insert myself into the history
790 :
791 : char path[256];
792 : int status;
793 : int size;
794 : //5 vars per GPU
795 : int NVARS=5*HAVE_NVIDIA;
796 :
797 : /////////////////////////////////////////////////////
798 : // Setup variables to plot:
799 : /////////////////////////////////////////////////////
800 : size = 64; // String length in ODB
801 : sprintf(path,"/History/Display/msysmon/%s-GPU/Variables",equipment[0].info.frontend_host);
802 : {
803 : char vars[size*NVARS];
804 : memset(vars, 0, size*NVARS);
805 : for (int i=0; i<HAVE_NVIDIA; i++)
806 : {
807 : sprintf(vars+size*0+i*size*5,"%s:GPUT[%d]",equipment[0].name,i);
808 : sprintf(vars+size*1+i*size*5,"%s:GPUF[%d]",equipment[0].name,i);
809 : sprintf(vars+size*2+i*size*5,"%s:GPUP[%d]",equipment[0].name,i);
810 : sprintf(vars+size*3+i*size*5,"%s:GPUU[%d]",equipment[0].name,i);
811 : sprintf(vars+size*4+i*size*5,"%s:GPUM[%d]",equipment[0].name,i);
812 : }
813 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
814 : }
815 : assert(status == DB_SUCCESS);
816 :
817 : /////////////////////////////////////////////////////
818 : // Setup labels
819 : /////////////////////////////////////////////////////
820 : size = 32;
821 : sprintf(path,"/History/Display/msysmon/%s-GPU/Label",equipment[0].info.frontend_host);
822 : {
823 : char vars[size*NVARS];
824 : memset(vars, 0, size*NVARS);
825 : for (int i=0; i<HAVE_NVIDIA; i++)
826 : {
827 : sprintf(vars+size*0+i*size*5,"GPU %d Temperature (C)",i);
828 : sprintf(vars+size*1+i*size*5,"GPU %d FAN (%%)",i);
829 : sprintf(vars+size*2+i*size*5,"GPU %d Power (W)",i);
830 : sprintf(vars+size*3+i*size*5,"GPU %d Utilisation (%%)",i);
831 : sprintf(vars+size*4+i*size*5,"GPU %d Memory Usage (%%)",i);
832 : }
833 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
834 : }
835 : assert(status == DB_SUCCESS);
836 :
837 : /////////////////////////////////////////////////////
838 : // Setup colours:
839 : /////////////////////////////////////////////////////
840 : size = 32;
841 : sprintf(path,"/History/Display/msysmon/%s-GPU/Colour",equipment[0].info.frontend_host);
842 : {
843 : char vars[size*NVARS];
844 : memset(vars, 0, size*NVARS);
845 : for (int i=0; i<NVARS; i++)
846 : for (int j=0; j<HAVE_NVIDIA; j++)
847 : sprintf(vars+size*i+j*size*5,"%s",(colours[i%16]).c_str());
848 : status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
849 : }
850 : assert(status == DB_SUCCESS);
851 :
852 : /////////////////////////////////////////////////////
853 : // Setup time scale and range:
854 : /////////////////////////////////////////////////////
855 : sprintf(path,"/History/Display/msysmon/%s-GPU/Timescale",equipment[0].info.frontend_host);
856 : status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
857 : double *m=new double();
858 : *m=0.;
859 : sprintf(path,"/History/Display/msysmon/%s-GPU/Minimum",equipment[0].info.frontend_host);
860 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
861 : *m=100.;
862 : sprintf(path,"/History/Display/msysmon/%s-GPU/Maximum",equipment[0].info.frontend_host);
863 : status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
864 : delete m;
865 : }
866 : #endif
867 : void InitGPU();
868 0 : INT frontend_init()
869 : {
870 : int status;
871 0 : printf("frontend_init!\n");
872 :
873 0 : FILE* file = fopen(PROCSTATFILE, "r");
874 0 : if (file == NULL) {
875 0 : cm_msg(MERROR, frontend_name, "Cannot open " PROCSTATFILE);
876 0 : return FE_ERR_HW;
877 : }
878 : char buffer[256];
879 0 : int Ncpus = -1;
880 : do {
881 0 : Ncpus++;
882 0 : const char*s = fgets(buffer, 255, file);
883 0 : if (!s) // EOF
884 0 : break;
885 0 : } while (String_startsWith(buffer, "cpu"));
886 0 : fclose(file);
887 0 : cpuCount = MAX(Ncpus - 1, 1);
888 0 : printf("%d CPUs found\n",cpuCount);
889 : //Note, cpus[0] is a total for all CPUs
890 0 : for (int i = 0; i <= cpuCount; i++) {
891 0 : cpus.push_back(new CPUData);
892 : }
893 :
894 0 : file = fopen(PROCNETSTATFILE, "r");
895 0 : if (file == NULL) {
896 0 : cm_msg(MERROR, frontend_name, "Cannot open " PROCNETSTATFILE);
897 0 : return FE_ERR_HW;
898 : }
899 : do {
900 0 : if (!fgets(buffer, 255, file)) break;
901 0 : for (int i=0; i<255; i++)
902 : {
903 0 : if (!buffer[i]) break;
904 0 : if (buffer[i]==':')
905 : {
906 0 : NetStat* r=new NetStat;
907 0 : r->face=std::string(buffer,&buffer[i]);
908 0 : NetReceive.push_back(r);
909 :
910 0 : NetStat* t=new NetStat;
911 0 : t->face=std::string(buffer,&buffer[i]);
912 0 : NetTransmit.push_back(t);
913 :
914 0 : networkInterfaceCount++;
915 : }
916 : }
917 0 : } while (1);
918 0 : fclose(file);
919 0 : printf("%d network inferfaces found\n",networkInterfaceCount);
920 :
921 0 : ReadCPUData();
922 0 : ReadNetData();
923 0 : BuildHostHistoryPlot();
924 0 : BuildHostCPUPlot();
925 0 : BuildHostNetPlot();
926 :
927 : #ifdef HAVE_NVIDIA
928 : BuildHostGPUPlot();
929 : InitGPU();
930 : #endif
931 :
932 : #ifdef HAVE_LM_SENSORS
933 : if (!sensors)
934 : sensors= new LM_Sensors();
935 : sensors->BuildHostTemperaturePlot();
936 :
937 : #endif
938 :
939 : #ifdef RPC_JRPC
940 0 : status = cm_register_function(RPC_JRPC, rpc_callback);
941 0 : assert(status == SUCCESS);
942 : #endif
943 :
944 0 : return SUCCESS;
945 : }
946 :
947 : /*-- Frontend Exit -------------------------------------------------*/
948 :
949 0 : INT frontend_exit()
950 : {
951 0 : return SUCCESS;
952 : }
953 :
954 : /*-- Begin of Run --------------------------------------------------*/
955 :
956 0 : INT begin_of_run(INT run_number, char *error)
957 : {
958 0 : return SUCCESS;
959 : }
960 :
961 : /*-- End of Run ----------------------------------------------------*/
962 :
963 0 : INT end_of_run(INT run_number, char *error)
964 : {
965 0 : return SUCCESS;
966 : }
967 :
968 : /*-- Pause Run -----------------------------------------------------*/
969 :
970 0 : INT pause_run(INT run_number, char *error)
971 : {
972 0 : return SUCCESS;
973 : }
974 :
975 : /*-- Resume Run ----------------------------------------------------*/
976 :
977 0 : INT resume_run(INT run_number, char *error)
978 : {
979 0 : return SUCCESS;
980 : }
981 :
982 : /*-- Frontend Loop -------------------------------------------------*/
983 :
984 0 : INT frontend_loop()
985 : {
986 : /* if frontend_call_loop is true, this routine gets called when
987 : the frontend is idle or once between every event */
988 0 : ss_sleep(100); // don't eat all CPU
989 0 : return SUCCESS;
990 : }
991 :
992 : /*------------------------------------------------------------------*/
993 :
994 : /********************************************************************\
995 :
996 : Readout routines for different events
997 :
998 : \********************************************************************/
999 :
1000 0 : INT poll_event(INT source, INT count, BOOL test)
1001 : /* Polling routine for events. Returns TRUE if event
1002 : is available. If test equals TRUE, don't return. The test
1003 : flag is used to time the polling */
1004 : {
1005 0 : if (test) {
1006 0 : ss_sleep (count);
1007 : }
1008 0 : return (0);
1009 : }
1010 :
1011 : /*-- Interrupt configuration ---------------------------------------*/
1012 :
1013 0 : INT interrupt_configure(INT cmd, INT source, PTYPE adr)
1014 : {
1015 0 : printf("interrupt_configure!\n");
1016 :
1017 0 : switch(cmd)
1018 : {
1019 0 : case CMD_INTERRUPT_ENABLE:
1020 0 : break;
1021 0 : case CMD_INTERRUPT_DISABLE:
1022 0 : break;
1023 0 : case CMD_INTERRUPT_ATTACH:
1024 0 : break;
1025 0 : case CMD_INTERRUPT_DETACH:
1026 0 : break;
1027 : }
1028 0 : return SUCCESS;
1029 : }
1030 :
1031 0 : void ReadCPUData()
1032 : {
1033 : //Largely from htop: https://github.com/hishamhm/htop (GNU licence)
1034 0 : FILE* file = fopen(PROCSTATFILE, "r");
1035 0 : if (file == NULL) {
1036 0 : cm_msg(MERROR, frontend_name, "Cannot open " PROCSTATFILE);
1037 : }
1038 0 : for (int i = 0; i <= cpuCount; i++) {
1039 : char buffer[256];
1040 : int cpuid;
1041 : unsigned long long int ioWait, irq, softIrq, steal, guest, guestnice;
1042 : unsigned long long int systemalltime, idlealltime, totaltime, virtalltime;
1043 0 : ioWait = irq = softIrq = steal = guest = guestnice = 0;
1044 : // Dependending on your kernel version,
1045 : // 5, 7, 8 or 9 of these fields will be set.
1046 : // The rest will remain at zero.
1047 0 : const char*s = fgets(buffer, 255, file);
1048 0 : if (!s) // EOF
1049 0 : break;
1050 0 : if (i == 0)
1051 0 : sscanf(buffer, "cpu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu", &usertime, &nicetime, &systemtime, &idletime, &ioWait, &irq, &softIrq, &steal, &guest, &guestnice);
1052 : else {
1053 0 : sscanf(buffer, "cpu%4d %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu", &cpuid, &usertime, &nicetime, &systemtime, &idletime, &ioWait, &irq, &softIrq, &steal, &guest, &guestnice);
1054 0 : assert(cpuid == i - 1);
1055 : }
1056 : // Guest time is already accounted in usertime
1057 0 : usertime = usertime - guest;
1058 0 : nicetime = nicetime - guestnice;
1059 : // Fields existing on kernels >= 2.6
1060 : // (and RHEL's patched kernel 2.4...)
1061 0 : idlealltime = idletime + ioWait;
1062 0 : systemalltime = systemtime + irq + softIrq;
1063 0 : virtalltime = guest + guestnice;
1064 0 : totaltime = usertime + nicetime + systemalltime + idlealltime + steal + virtalltime;
1065 0 : CPUData* cpuData = cpus.at(i);
1066 0 : cpuData->userPeriod = usertime - cpuData->userTime;
1067 0 : cpuData->nicePeriod = nicetime - cpuData->niceTime;
1068 0 : cpuData->systemPeriod = systemtime - cpuData->systemTime;
1069 0 : cpuData->systemAllPeriod = systemalltime - cpuData->systemAllTime;
1070 0 : cpuData->idleAllPeriod = idlealltime - cpuData->idleAllTime;
1071 0 : cpuData->idlePeriod = idletime - cpuData->idleTime;
1072 0 : cpuData->ioWaitPeriod = ioWait - cpuData->ioWaitTime;
1073 0 : cpuData->irqPeriod = irq - cpuData->irqTime;
1074 0 : cpuData->softIrqPeriod = softIrq - cpuData->softIrqTime;
1075 0 : cpuData->stealPeriod = steal - cpuData->stealTime;
1076 0 : cpuData->guestPeriod = virtalltime - cpuData->guestTime;
1077 0 : cpuData->totalPeriod = totaltime - cpuData->totalTime;
1078 0 : cpuData->userTime = usertime;
1079 0 : cpuData->niceTime = nicetime;
1080 0 : cpuData->systemTime = systemtime;
1081 0 : cpuData->systemAllTime = systemalltime;
1082 0 : cpuData->idleAllTime = idlealltime;
1083 0 : cpuData->idleTime = idletime;
1084 0 : cpuData->ioWaitTime = ioWait;
1085 0 : cpuData->irqTime = irq;
1086 0 : cpuData->softIrqTime = softIrq;
1087 0 : cpuData->stealTime = steal;
1088 0 : cpuData->guestTime = virtalltime;
1089 0 : cpuData->totalTime = totaltime;
1090 : }
1091 0 : fclose(file);
1092 : //end htop code
1093 0 : }
1094 : #include <sys/time.h>
1095 : timeval tv, old_tv, new_tv;
1096 0 : void ReadNetData()
1097 : {
1098 0 : FILE* file = fopen(PROCNETSTATFILE,"r");
1099 0 : if (file == NULL) {
1100 0 : cm_msg(MERROR, frontend_name, "Cannot open " PROCNETSTATFILE);
1101 : }
1102 0 : gettimeofday(&new_tv, NULL);
1103 0 : timersub(&new_tv, &old_tv, &tv);
1104 : //Note, there are two title lines (hence +2)
1105 0 : const int title_lines=2;
1106 0 : for (int i = 0; i < networkInterfaceCount+title_lines; i++) {
1107 : char buffer[256];
1108 : char InterfaceName[20];
1109 : unsigned long int rbytes, rpackets, rerrs, rdrop, rfifo, rframe, rcompressed, rmulticast;
1110 : unsigned long int sbytes, spackets, serrs, sdrop, sfifo, sframe, scompressed, smulticast;
1111 : // Dependending on your kernel version,
1112 : // 5, 7, 8 or 9 of these fields will be set.
1113 : // The rest will remain at zero.
1114 0 : const char*s = fgets(buffer, 255, file);
1115 0 : if (!s) // EOF
1116 0 : break;
1117 0 : if (i < 2)
1118 0 : continue; //Title lines
1119 : else
1120 0 : sscanf(buffer, "%[^:]: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",InterfaceName, &rbytes, &rpackets, &rerrs, &rdrop, &rfifo, &rframe, &rcompressed, &rmulticast,&sbytes, &spackets, &serrs, &sdrop, &sfifo, &sframe, &scompressed, &smulticast);
1121 : #ifdef FE_DEBUG
1122 : printf("--------------------Parsing line %d from " PROCNETSTATFILE "---------------------\n",i);
1123 : printf("Intput: %s\n",buffer);
1124 : printf("Output: %s: %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu\n\n",InterfaceName, rbytes, rpackets, rerrs, rdrop, rfifo, rframe, rcompressed, rmulticast,sbytes, spackets, serrs, sdrop, sfifo, sframe, scompressed, smulticast);
1125 : printf("-------------------------------------------------------------------------------\n");
1126 : #endif
1127 0 : NetStat* RData = NetReceive.at(i-title_lines);
1128 0 : NetStat* SData = NetTransmit.at(i-title_lines);
1129 :
1130 0 : RData->bytesPeriod =rbytes-RData->bytes;
1131 0 : RData->packetsPeriod =rpackets-RData->packets;
1132 0 : RData->errsPeriod =rerrs-RData->errs;
1133 0 : RData->dropPeriod =rdrop-RData->drop;
1134 0 : RData->fifoPeriod =rfifo-RData->fifo;
1135 0 : RData->framePeriod =rframe-RData->frame;
1136 0 : RData->compressedPeriod =rcompressed-RData->compressed;
1137 0 : RData->multicastPeriod =rmulticast-RData->multicast;
1138 :
1139 0 : RData->bytes =rbytes;
1140 0 : RData->packets =rpackets;
1141 0 : RData->errs =rerrs;
1142 0 : RData->drop =rdrop;
1143 0 : RData->fifo =rfifo;
1144 0 : RData->frame =rframe;
1145 0 : RData->compressed =rcompressed;
1146 0 : RData->multicast =rmulticast;
1147 0 : RData->tv =tv;
1148 :
1149 0 : SData->bytesPeriod =sbytes-SData->bytes;
1150 0 : SData->packetsPeriod =spackets-SData->packets;
1151 0 : SData->errsPeriod =serrs-SData->errs;
1152 0 : SData->dropPeriod =sdrop-SData->drop;
1153 0 : SData->fifoPeriod =sfifo-SData->fifo;
1154 0 : SData->framePeriod =sframe-SData->frame;
1155 0 : SData->compressedPeriod =scompressed-SData->compressed;
1156 0 : SData->multicastPeriod =smulticast-SData->multicast;
1157 :
1158 0 : SData->bytes =sbytes;
1159 0 : SData->packets =spackets;
1160 0 : SData->errs =serrs;
1161 0 : SData->drop =sdrop;
1162 0 : SData->fifo =sfifo;
1163 0 : SData->frame =sframe;
1164 0 : SData->compressed =scompressed;
1165 0 : SData->multicast =smulticast;
1166 0 : SData->tv =tv;
1167 : }
1168 0 : old_tv = new_tv;
1169 0 : fclose(file);
1170 0 : }
1171 : #if HAVE_NVIDIA
1172 :
1173 : // Build the set of device features
1174 : static void get_device_features(GPU* dev)
1175 : {
1176 : if(nvmlDeviceGetTemperature(dev->handle, NVML_TEMPERATURE_GPU,
1177 : &dev->temperature) == NVML_SUCCESS) {
1178 : dev->feature_support |= TEMPERATURE;
1179 : }
1180 :
1181 : if(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory) == NVML_SUCCESS) {
1182 : dev->feature_support |= MEMORY_INFO;
1183 : }
1184 :
1185 : if(nvmlDeviceGetPowerUsage(dev->handle, &dev->power_usage) == NVML_SUCCESS) {
1186 : dev->feature_support |= POWER_USAGE;
1187 : }
1188 :
1189 : if(nvmlDeviceGetFanSpeed(dev->handle, &dev->fan) == NVML_SUCCESS) {
1190 : dev->feature_support |= FAN_INFO;
1191 : }
1192 :
1193 : if(nvmlDeviceGetUtilizationRates(dev->handle, &dev->util) == NVML_SUCCESS) {
1194 : dev->feature_support |= UTILIZATION_INFO;
1195 : }
1196 : }
1197 :
1198 : void InitGPU()
1199 : {
1200 : printf("Initialising NVIDIA monitoring\n");
1201 : // No point in continuing if we can't even initialize the library.
1202 : if(NVML_TRY(nvmlInit()))
1203 : exit(1);
1204 : NVML_TRY(nvmlDeviceGetCount(&nGPUs));
1205 :
1206 : for(unsigned i = 0; i < nGPUs; ++i) {
1207 : GPU* dev=new GPU();
1208 : GPUs.push_back(dev);
1209 :
1210 : dev->index = i;
1211 :
1212 : NVML_TRY(nvmlDeviceGetHandleByIndex(i, &dev->handle));
1213 :
1214 : NVML_TRY(nvmlDeviceGetName(dev->handle, dev->name, sizeof(dev->name)));
1215 : NVML_TRY(nvmlDeviceGetSerial(dev->handle, dev->serial, sizeof(dev->serial)));
1216 : NVML_TRY(nvmlDeviceGetUUID(dev->handle, dev->uuid, sizeof(dev->uuid)));
1217 :
1218 : NVML_TRY(nvmlDeviceGetPciInfo(dev->handle, &dev->pci));
1219 : NVML_TRY(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory));
1220 :
1221 : unsigned long long event_types;
1222 : NVML_TRY(nvmlEventSetCreate(&dev->event_set));
1223 : if(0 == NVML_TRY(nvmlDeviceGetSupportedEventTypes(dev->handle, &event_types))) {
1224 : NVML_TRY(nvmlDeviceRegisterEvents(dev->handle, event_types, dev->event_set));
1225 : } else {
1226 : dev->event_set = NULL;
1227 : }
1228 :
1229 : get_device_features(dev);
1230 :
1231 : }
1232 : printf("OK\n");
1233 : }
1234 :
1235 : void ReadGPUData()
1236 : {
1237 : unsigned i;
1238 :
1239 : for(i = 0; i < nGPUs; ++i) {
1240 : GPU* dev = GPUs[i];
1241 :
1242 : if(dev->feature_support & MEMORY_INFO) {
1243 : NVML_TRY(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory));
1244 : }
1245 :
1246 : if(dev->feature_support & TEMPERATURE) {
1247 : NVML_TRY(nvmlDeviceGetTemperature(dev->handle, NVML_TEMPERATURE_GPU,
1248 : &dev->temperature));
1249 : }
1250 :
1251 : if(dev->feature_support & POWER_USAGE) {
1252 : NVML_TRY(nvmlDeviceGetPowerUsage(dev->handle, &dev->power_usage));
1253 : }
1254 :
1255 : if(dev->feature_support & UTILIZATION_INFO) {
1256 : NVML_TRY(nvmlDeviceGetUtilizationRates(dev->handle, &dev->util));
1257 : }
1258 :
1259 : if(dev->feature_support & FAN_INFO) {
1260 : NVML_TRY(nvmlDeviceGetFanSpeed(dev->handle, &dev->fan));
1261 : }
1262 :
1263 : if(dev->event_set != NULL) {
1264 : nvmlEventData_t data;
1265 :
1266 : NVML_TRY(nvmlEventSetWait(dev->event_set, &data, 1));
1267 :
1268 : }
1269 : }
1270 :
1271 : }
1272 : #endif
1273 : /*-- Event readout -------------------------------------------------*/
1274 : #include <fstream>
1275 0 : int read_system_load(char *pevent, int off)
1276 : {
1277 0 : bk_init32(pevent);
1278 :
1279 0 : ReadCPUData();
1280 :
1281 0 : ReadNetData();
1282 :
1283 : //Calculate load:
1284 : // The classic layout of CPU variables would be to log a bank for 4 doubles for each CPU core. This will not scale with very high core counts in the future
1285 : #ifdef CLASSIC_CPU_VARS
1286 : double CPULoadTotal[4]; //nice, user, system, total
1287 : for (int j=0; j<4; j++)
1288 : CPULoadTotal[j]=0;
1289 :
1290 : double CPULoad[4]; //nice, user, system, total
1291 : for (int i = 0; i <= cpuCount; i++) {
1292 : CPUData* cpuData = (cpus[i]);
1293 : double total = (double) ( cpuData->totalPeriod == 0 ? 1 : cpuData->totalPeriod);
1294 : CPULoad[0] = cpuData->nicePeriod / total * 100.0;
1295 : CPULoad[1] = cpuData->userPeriod / total * 100.0;
1296 : CPULoad[2] = cpuData->systemPeriod / total * 100.0;
1297 : CPULoad[3]=CPULoad[0]+CPULoad[1]+CPULoad[2];
1298 :
1299 : for (int j=0; j<4; j++)
1300 : {
1301 : CPULoadTotal[j]+=CPULoad[j];
1302 : }
1303 :
1304 : // This is a little long for just setting a bank name, but it
1305 : // avoids format-truncation warnings and supports machines with upto
1306 : // 1000 CPUs... another case can be put in when we reach that new limit
1307 : char name[5]="LOAD";
1308 : //i==0 is a total for ALL Cpus
1309 : if (i!=0)
1310 : {
1311 : int h='0'+i/100;
1312 : int t='0'+(i%100)/10;
1313 : int u='0'+i%10;
1314 : if (i<10)
1315 : snprintf(name,5,"CPU%c",u);
1316 : else if (i<100)
1317 : snprintf(name,5,"CP%c%c",t,u);
1318 : else if (i<1000)
1319 : snprintf(name,5,"C%c%c%c",h,t,u);
1320 : else
1321 : cm_msg(MERROR, frontend_name, "Cannot handle a system with more than 1000 CPUs");
1322 : }
1323 : double* a;
1324 : bk_create(pevent, name, TID_DOUBLE, (void**)&a);
1325 : for (int k=0; k<4; k++)
1326 : {
1327 : *a=CPULoad[k];
1328 : a++;
1329 : }
1330 : bk_close(pevent,a);
1331 :
1332 : }
1333 : #else
1334 : //Instead of the 'Classic' variables. Log 4 banks with N doubles, where N is the number of CPUs
1335 0 : double CPUN[cpuCount]; // % nice time
1336 0 : double CPUU[cpuCount]; // % user time
1337 0 : double CPUS[cpuCount]; // % system time
1338 0 : double CPUA[cpuCount]; // % total CPU time
1339 0 : for (int i = 0; i <= cpuCount; i++) {
1340 0 : CPUData* cpuData = (cpus[i]);
1341 0 : double total = (double) ( cpuData->totalPeriod == 0 ? 1 : cpuData->totalPeriod);
1342 0 : CPUN[i] = cpuData->nicePeriod / total * 100.0;
1343 0 : CPUU[i] = cpuData->userPeriod / total * 100.0;
1344 0 : CPUS[i] = cpuData->systemPeriod / total * 100.0;
1345 0 : CPUA[i] = CPUN[i]+CPUU[i]+CPUS[i];
1346 : }
1347 : double* c;
1348 0 : bk_create(pevent, "CPUN", TID_DOUBLE, (void**)&c);
1349 0 : for (int k=0; k<cpuCount; k++)
1350 : {
1351 0 : *c=CPUN[k];
1352 0 : c++;
1353 : }
1354 0 : bk_close(pevent,c);
1355 0 : bk_create(pevent, "CPUU", TID_DOUBLE, (void**)&c);
1356 0 : for (int k=0; k<cpuCount; k++)
1357 : {
1358 0 : *c=CPUU[k];
1359 0 : c++;
1360 : }
1361 0 : bk_close(pevent,c);
1362 :
1363 0 : bk_create(pevent, "CPUS", TID_DOUBLE, (void**)&c);
1364 0 : for (int k=0; k<cpuCount; k++)
1365 : {
1366 0 : *c=CPUS[k];
1367 0 : c++;
1368 : }
1369 0 : bk_close(pevent,c);
1370 :
1371 0 : bk_create(pevent, "CPUA", TID_DOUBLE, (void**)&c);
1372 0 : for (int k=0; k<cpuCount; k++)
1373 : {
1374 0 : *c=CPUA[k];
1375 0 : c++;
1376 : }
1377 0 : bk_close(pevent,c);
1378 0 : double TotalLoad[4]={0};
1379 0 : for (int k=0; k<cpuCount; k++)
1380 : {
1381 0 : TotalLoad[0]+=CPUN[k];
1382 0 : TotalLoad[1]+=CPUU[k];
1383 0 : TotalLoad[2]+=CPUS[k];
1384 0 : TotalLoad[3]+=CPUA[k];
1385 : }
1386 0 : bk_create(pevent, "LOAD", TID_DOUBLE, (void**)&c);
1387 0 : for (int k=0; k<4; k++)
1388 : {
1389 0 : *c=TotalLoad[k]/(double)cpuCount;
1390 0 : c++;
1391 : }
1392 0 : bk_close(pevent,c);
1393 : #endif
1394 :
1395 :
1396 : //Read and log system temperatures
1397 : #ifdef HAVE_LM_SENSORS
1398 : pevent = sensors->ReadAndLogSensors(pevent);
1399 : #endif
1400 :
1401 : double DataRecieve;
1402 : double DataTransmit;
1403 :
1404 : double* a;
1405 0 : char name[5]="NETR";
1406 0 : bk_create(pevent, name, TID_DOUBLE, (void**)&a);
1407 0 : for (int i=0; i<networkInterfaceCount; i++)
1408 : {
1409 0 : NetStat* RData = NetReceive.at(i);
1410 0 : double Rdt=RData->tv.tv_sec + (RData->tv.tv_usec * 1e-6);
1411 0 : DataRecieve=(double)(RData->bytesPeriod)*8./1024./1024. / Rdt; //Megabits / s
1412 0 : *a=DataRecieve;
1413 0 : a++;
1414 : }
1415 0 : bk_close(pevent,a);
1416 :
1417 : double* b;
1418 0 : sprintf(name,"NETT");
1419 0 : bk_create(pevent, name, TID_DOUBLE, (void**)&b);
1420 0 : for (int i=0; i<networkInterfaceCount; i++)
1421 : {
1422 0 : NetStat* SData = NetTransmit.at(i);
1423 0 : double Sdt=SData->tv.tv_sec + (SData->tv.tv_usec * 1e-6);
1424 0 : DataTransmit=(double)(SData->bytesPeriod)*8./1024./1024. / Sdt; //Megabits /s
1425 0 : *b=DataTransmit;
1426 0 : b++;
1427 : }
1428 0 : bk_close(pevent,b);
1429 :
1430 : //Again from htop:
1431 : unsigned long long int totalMem;
1432 : unsigned long long int usedMem;
1433 : unsigned long long int freeMem;
1434 : unsigned long long int sharedMem;
1435 : unsigned long long int buffersMem;
1436 : unsigned long long int cachedMem;
1437 : unsigned long long int totalSwap;
1438 : unsigned long long int usedSwap;
1439 : unsigned long long int freeSwap;
1440 0 : FILE* file = fopen(PROCMEMINFOFILE, "r");
1441 0 : if (file == NULL) {
1442 0 : cm_msg(MERROR, frontend_name, "Cannot open " PROCMEMINFOFILE);
1443 : }
1444 : char buffer[128];
1445 0 : while (fgets(buffer, 128, file)) {
1446 0 : switch (buffer[0]) {
1447 0 : case 'M':
1448 0 : if (String_startsWith(buffer, "MemTotal:"))
1449 0 : sscanf(buffer, "MemTotal: %32llu kB", &totalMem);
1450 0 : else if (String_startsWith(buffer, "MemFree:"))
1451 0 : sscanf(buffer, "MemFree: %32llu kB", &freeMem);
1452 0 : else if (String_startsWith(buffer, "MemShared:"))
1453 0 : sscanf(buffer, "MemShared: %32llu kB", &sharedMem);
1454 0 : break;
1455 0 : case 'B':
1456 0 : if (String_startsWith(buffer, "Buffers:"))
1457 0 : sscanf(buffer, "Buffers: %32llu kB", &buffersMem);
1458 0 : break;
1459 0 : case 'C':
1460 0 : if (String_startsWith(buffer, "Cached:"))
1461 0 : sscanf(buffer, "Cached: %32llu kB", &cachedMem);
1462 0 : break;
1463 0 : case 'S':
1464 0 : if (String_startsWith(buffer, "SwapTotal:"))
1465 0 : sscanf(buffer, "SwapTotal: %32llu kB", &totalSwap);
1466 0 : if (String_startsWith(buffer, "SwapFree:"))
1467 0 : sscanf(buffer, "SwapFree: %32llu kB", &freeSwap);
1468 0 : break;
1469 : }
1470 : }
1471 0 : fclose(file);
1472 : //end htop code
1473 :
1474 0 : usedMem = totalMem - cachedMem- freeMem;
1475 0 : usedSwap = totalSwap - freeSwap;
1476 0 : double mem_percent=100.*(double)usedMem/(double)totalMem;
1477 0 : double swap_percent=100;
1478 0 : if (totalSwap) //If there is an swap space, calculate... else always say 100% used
1479 0 : swap_percent=100*(double)usedSwap/(double)totalSwap;
1480 : #ifdef FE_DEBUG
1481 : printf("-----------------------------\n");
1482 : printf("MemUsed: %lld kB (%lld GB) (%.2f%%)\n",usedMem,usedMem/1024/1024,mem_percent);
1483 : printf("SwapUsed: %lld kB (%lld GB) (%.2f%%)\n",usedSwap,usedSwap/1024/1024,swap_percent);
1484 : printf("-----------------------------\n");
1485 : #endif
1486 : double* m;
1487 0 : bk_create(pevent, "MEMP", TID_DOUBLE, (void**)&m);
1488 0 : *m=mem_percent;
1489 0 : bk_close(pevent,m+1);
1490 :
1491 0 : if (totalSwap) //Only log SWAP if there is any
1492 : {
1493 0 : bk_create(pevent, "SWAP", TID_DOUBLE, (void**)&m);
1494 0 : *m=swap_percent;
1495 0 : bk_close(pevent,m+1);
1496 : }
1497 :
1498 : #if HAVE_NVIDIA
1499 : ReadGPUData();
1500 : int* t;
1501 :
1502 : //GPU Temperature
1503 : bk_create(pevent, "GPUT", TID_INT, (void**)&t);
1504 : for (unsigned i=0; i<nGPUs; i++)
1505 : {
1506 : *t=GPUs[i]->temperature;
1507 : t++;
1508 : }
1509 : bk_close(pevent,t);
1510 :
1511 : //GPU Fan speed
1512 : bk_create(pevent, "GPUF", TID_INT, (void**)&t);
1513 : for (unsigned i=0; i<nGPUs; i++)
1514 : {
1515 : *t=GPUs[i]->fan;
1516 : t++;
1517 : }
1518 : bk_close(pevent,t);
1519 :
1520 : //GPU Power (W)
1521 : bk_create(pevent, "GPUP", TID_INT, (void**)&t);
1522 : for (unsigned i=0; i<nGPUs; i++)
1523 : {
1524 : *t=GPUs[i]->power_usage/1000;
1525 : t++;
1526 : }
1527 : bk_close(pevent,t);
1528 :
1529 : //GPU Utilisiation (%)
1530 : bk_create(pevent, "GPUU", TID_INT, (void**)&t);
1531 : for (unsigned i=0; i<nGPUs; i++)
1532 : {
1533 : *t=GPUs[i]->util.gpu;
1534 : t++;
1535 : }
1536 : bk_close(pevent,t);
1537 :
1538 : //GPU Memory Utilisiation (%)
1539 : bk_create(pevent, "GPUM", TID_DOUBLE, (void**)&m);
1540 : for (unsigned i=0; i<nGPUs; i++)
1541 : {
1542 : *m=100.*(double)GPUs[i]->memory.used/(double)GPUs[i]->memory.total;
1543 : m++;
1544 : }
1545 : bk_close(pevent,m);
1546 :
1547 : #endif
1548 :
1549 0 : return bk_size(pevent);
1550 0 : }
1551 :
1552 : /* emacs
1553 : * Local Variables:
1554 : * tab-width: 8
1555 : * c-basic-offset: 3
1556 : * indent-tabs-mode: nil
1557 : * End:
1558 : */
|