MIDAS
Loading...
Searching...
No Matches
msysmon.cxx
Go to the documentation of this file.
1/*******************************************************************\
2
3 Name: msysmon.cxx
4 Created by: J.T.K.McKenna
5
6 Contents: Front end for monitoring CPU and Memory usage with MIDAS
7 *
8 * Parse /proc/stat and /proc/memstat like htop
9 *
10 * Equipment names are assiged by the local hostname, so run an
11 * instance for each system you want to monitor... eg:
12 * ssh mydaq msysmon
13 * ssh myvme msysmon
14 * ssh mypi msysmon
15
16\********************************************************************/
17
18#ifndef PROCSTATFILE
19#define PROCSTATFILE "/proc/stat"
20#endif
21
22#ifndef PROCMEMINFOFILE
23#define PROCMEMINFOFILE "/proc/meminfo"
24#endif
25
26#ifndef PROCNETSTATFILE
27#define PROCNETSTATFILE "/proc/net/dev"
28#endif
29
30#define String_startsWith(s, match) (strstr((s), (match)) == (s))
31
32#undef NDEBUG // midas required assert() to be always enabled
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <unistd.h>
37#include <stdint.h>
38#include <sys/time.h>
39#include <sys/types.h>
40#include <sys/stat.h>
41#include <fcntl.h>
42#include <errno.h>
43#include <math.h>
44#include <ctype.h>
45#include <assert.h>
46#include <string.h>
47#include <iostream>
48#include "midas.h"
49#include "mfe.h"
50#include "mstrlcpy.h"
51
52#ifdef HAVE_LM_SENSORS
53#include <sensors/sensors.h>
54#endif
55
56/*-- Globals -------------------------------------------------------*/
57
58/* The frontend name (client name) as seen by other MIDAS clients */
59#ifdef HAVE_LM_SENSORS
60const char *frontend_name = "msysmon-lmsensors";
61#else
62const char *frontend_name = "msysmon";
63#endif
64/* The frontend file name, don't change it */
66
67/* frontend_loop is called periodically if this variable is TRUE */
69
70/* a frontend status page is displayed with this frequency in ms */
71//INT display_period = 3000;
73
74/* maximum event size produced by this frontend */
75INT max_event_size = 4*1024*1024;
77
78/* buffer size to hold events */
79INT event_buffer_size = 10*1024*1024;
80
81/*-- Function declarations -----------------------------------------*/
82
85INT begin_of_run(INT run_number, char *error);
86INT end_of_run(INT run_number, char *error);
87INT pause_run(INT run_number, char *error);
88INT resume_run(INT run_number, char *error);
91INT interrupt_configure(INT cmd, INT source, PTYPE adr);
92
93int read_system_load(char *pevent, int off);
94
95/*-- Equipment list ------------------------------------------------*/
96
97#define EVID_MONITOR 63
98
100
102
103 { "${HOSTNAME}_msysmon", /* equipment name */ {
104 EVID_MONITOR, 0, /* event ID, trigger mask */
105 "SYSTEM", /* event buffer */
106 EQ_PERIODIC, /* equipment type */
107 0, /* event source */
108 "MIDAS", /* format */
109 TRUE, /* enabled */
110 RO_ALWAYS | RO_ODB, /* Read when running */
111 10000, /* poll every so milliseconds */
112 0, /* stop run after this event limit */
113 0, /* number of sub events */
114 1, /* history period */
115 "", "", ""
116 },
117 read_system_load,/* readout routine */
118 },
119 { "" }
120};
121
122// Not all items in struct are logged, but all are calculated
123// leaving options to log more if we want to...
124typedef struct CPUData_ {
125 unsigned long long int totalTime;
126 unsigned long long int userTime;
127 unsigned long long int systemTime;
128 unsigned long long int systemAllTime;
129 unsigned long long int idleAllTime;
130 unsigned long long int idleTime;
131 unsigned long long int niceTime;
132 unsigned long long int ioWaitTime;
133 unsigned long long int irqTime;
134 unsigned long long int softIrqTime;
135 unsigned long long int stealTime;
136 unsigned long long int guestTime;
137 unsigned long long int totalPeriod;
138 unsigned long long int userPeriod;
139 unsigned long long int systemPeriod;
140 unsigned long long int systemAllPeriod;
141 unsigned long long int idleAllPeriod;
142 unsigned long long int idlePeriod;
143 unsigned long long int nicePeriod;
144 unsigned long long int ioWaitPeriod;
145 unsigned long long int irqPeriod;
146 unsigned long long int softIrqPeriod;
147 unsigned long long int stealPeriod;
148 unsigned long long int guestPeriod;
151std::vector<CPUData*> cpus;
152void ReadCPUData();
153unsigned long long int usertime, nicetime, systemtime, idletime;
154
155
157{
158 std::string face;
159 unsigned long int bytes;
160 unsigned long int packets;
161 unsigned long int errs;
162 unsigned long int drop;
163 unsigned long int fifo;
164 unsigned long int frame;
165 unsigned long int compressed;
166 unsigned long int multicast;
167 unsigned long int bytesPeriod;
168 unsigned long int packetsPeriod;
169 unsigned long int errsPeriod;
170 unsigned long int dropPeriod;
171 unsigned long int fifoPeriod;
172 unsigned long int framePeriod;
173 unsigned long int compressedPeriod;
174 unsigned long int multicastPeriod;
175 timeval tv; //Time of these integrated values
176};
178std::vector<NetStat*> NetReceive;
179std::vector<NetStat*> NetTransmit;
180void ReadNetData();
181
182
183#ifdef HAVE_NVIDIA
184#include "nvml.h"
185
186enum feature {
187 TEMPERATURE = 1 << 0,
188 COMPUTE_MODE = 1 << 1,
189 POWER_USAGE = 1 << 2,
190 MEMORY_INFO = 1 << 3,
191 CLOCK_INFO = 1 << 4,
192 FAN_INFO = 1 << 5,
193 UTILIZATION_INFO = 1 << 6
194};
195struct GPU {
196 unsigned index;
197
198 nvmlDevice_t handle;
199
204 // Current device resource utilization rates (as percentages)
206
207 // In Celsius
208 unsigned temperature;
209
210 // In milliwatts
211 unsigned power_usage;
212
213 // Maximum clock speeds, in MHz
215
216 // Fan speed, percentage
217 unsigned fan;
218
222
223 // Bitmask of enum feature
224 unsigned feature_support;
225};
226unsigned nGPUs=HAVE_NVIDIA;
227std::vector<GPU*> GPUs;
228
229// Return string representation of return code
230// Strings are directly from NVML documentation
231
233{
234 switch(ret) {
235 case NVML_SUCCESS:
236 return "The operation was successful";
238 return "was not first initialized with nvmlInit()";
240 return "A supplied argument is invalid";
242 return "The requested operation is not available on target device";
244 return "The current user does not have permission for operation";
246 return"Deprecated: Multiple initializations are now allowed through ref counting";
248 return "A query to find an object was unsuccessful";
250 return "An input argument is not large enough";
252 return "A device’s external power cables are not properly attached";
254 return "NVIDIA driver is not loaded";
256 return "User provided timeout passed";
258 return "NVIDIA Kernel detected an interrupt issue with a GPU";
260 return "NVML Shared Library couldn’t be found or loaded";
262 return"Local version of NVML doesn’t implement this function";
264 return "infoROM is corrupted";
266 return "The GPU has fallen off the bus or has otherwise become inaccessible.";
268 return "The GPU requires a reset before it can be used again";
270 return "The GPU control device has been blocked by the operating system/cgroups.";
272 return "RM detects a driver/library version mismatch.";
274 return "An operation cannot be performed because the GPU is currently in use.";
276 return "Insufficient memory.";
278 return "No data.";
280 return "The requested vgpu operation is not available on target device, becasue ECC is enabled.";
282 return "An internal driver error occurred";
283 }
284
285 return "Unknown error";
286}
287
288
289// Simple wrapper function to remove boiler plate code of checking
290// NVML API return codes.
291//
292// Returns non-zero on error, 0 otherwise
293static inline int nvml_try(nvmlReturn_t ret, const char* fn)
294{
295 // We ignore the TIMEOUT error, as it simply indicates that
296 // no events (errors) were triggered in the given interval.
297 if(ret != NVML_SUCCESS && ret != NVML_ERROR_TIMEOUT) {
298 fprintf(stderr, "%s: %s: %s\n", fn, nvml_error_code_string(ret),
299 nvmlErrorString(ret));
300 return 1;
301 }
302
303 return 0;
304}
305
306#define NVML_TRY(code) nvml_try(code, #code)
307
308#endif
309
310//Cycle through these 16 colours when installing History graphs
311std::string colours[16]={
312 "#00AAFF", "#FF9000", "#FF00A0", "#00C030",
313 "#A0C0D0", "#D0A060", "#C04010", "#807060",
314 "#F0C000", "#2090A0", "#D040D0", "#90B000",
315 "#B0B040", "#B0B0FF", "#FFA0A0", "#A0FFA0"};
316
317
318#ifdef HAVE_LM_SENSORS
319class LM_Sensors
320{
321 private:
322 class MySensor
323 {
324 private:
325 const std::string SensorName;
326 const std::string FeatureName;
327 const sensors_chip_name* cn;
328 const int number;
329 public:
330 MySensor(const std::string _SensorName, const std::string _name, const sensors_chip_name* _cn, int _number):
332 {
333 }
334 double GetValue()
335 {
336 double value;
337 sensors_get_value(this->cn,this->number,&value);
338 return value;
339 }
340 std::string GetFullName(size_t limit)
341 {
342 std::string fullname = SensorName + "(" + FeatureName + ")";
343 if (fullname.size()> limit)
344 return fullname.substr(0,limit-1);
345 return fullname;
346 }
347 };
348 int status;
349 std::vector<MySensor*> Temperatures;
350 std::vector<MySensor*> Fans;
351
352 public:
353 LM_Sensors()
354 {
355 //FILE* = fopen("");
357 if (status!=0)
358 {
359 printf("Issue with sensors\n");
360 exit(status);
361 }
362 int nr = 0;
364 {
365 int fnr = 0;
366 while (const sensors_feature * cf = sensors_get_features(cn,&fnr))
367 {
368 int sfnr = 0;
369 //std::cout << sensors_get_label(cn,cf) <<"\t";// <<std::endl;
371 {
372 //For more sensor subfeature types, see full list:
373 //https://github.com/lm-sensors/lm-sensors/blob/master/lib/sensors.h
375 {
376 Temperatures.push_back(
377 new MySensor(
379 scf->name,
380 cn,
381 scf->number)
382 );
383 }
384 if (scf->type == SENSORS_SUBFEATURE_FAN_INPUT )
385 {
386 Fans.push_back(
387 new MySensor(
389 scf->name,
390 cn,
391 scf->number)
392 );
393 }
394 }
395 }
396 }
397 }
398
400 {
401 //Insert per Temperature monitor graphs into the history
402 int status, size;
403 char path[256];
404 int NVARS=Temperatures.size();
406 // Setup variables to plot:
408 size = 64;
409 sprintf(path,"/History/Display/msysmon/%s-Temperature/Variables",equipment[0].info.frontend_host);
410 {
411 char vars[size*NVARS];
412 memset(vars, 0, size*NVARS);
413 for (int i=0; i<NVARS; i++)
414 {
415 sprintf(vars+size*i,"%s/TEMP:TEMP[%d]",equipment[0].name,i);
416 }
417 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
418 }
419 assert(status == DB_SUCCESS);
420
422 // Setup labels
424 size = 32;
425 sprintf(path,"/History/Display/msysmon/%s-Temperature/Label",equipment[0].info.frontend_host);
426 {
427 char vars[size*NVARS];
428 memset(vars, 0, size*NVARS);
429 for (int i=0; i<NVARS; i++)
430 sprintf(vars+size*i,Temperatures.at(i)->GetFullName(size).c_str(),i+1);
431 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
432 }
433 assert(status == DB_SUCCESS);
434
436 // Setup colours:
438 size = 32;
439 sprintf(path,"/History/Display/msysmon/%s-Temperature/Colour",equipment[0].info.frontend_host);
440 {
441 char vars[size*NVARS];
442 memset(vars, 0, size*NVARS);
443 for (int i=0; i<NVARS; i++)
444 sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
445 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
446 }
447 assert(status == DB_SUCCESS);
448
450 // Setup time scale and range:
452 sprintf(path,"/History/Display/msysmon/%s-Temperature/Timescale",equipment[0].info.frontend_host);
453 status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
454 double *m=new double();
455 *m=0.;
456 sprintf(path,"/History/Display/msysmon/%s-Temperature/Minimum",equipment[0].info.frontend_host);
457 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
458 *m=100.;
459 sprintf(path,"/History/Display/msysmon/%s-Temperature/Maximum",equipment[0].info.frontend_host);
460 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
461 delete m;
462 }
463
464 char* ReadAndLogSensors(char* pevent)
465 {
466 //If sensors_init failed, do nothing
467 if (status!=0)
468 return pevent;
469
470 double* v;
471
472 if (Temperatures.size())
473 {
474 bk_create(pevent, "TEMP", TID_DOUBLE, (void**)&v);
475 for ( MySensor* s: Temperatures)
476 {
477 *v = s->GetValue();
478 v++;
479 }
480 bk_close(pevent,v);
481 }
482 if (Fans.size())
483 {
484 bk_create(pevent, "FANS", TID_DOUBLE, (void**)&v);
485 for ( MySensor* s: Fans)
486 {
487 *v = s->GetValue();
488 v++;
489 }
490 bk_close(pevent,v);
491 }
492 return pevent;
493 }
494
495};
496
498#endif
499
500
501/********************************************************************\
502 Callback routines for system transitions
503
504 These routines are called whenever a system transition like start/
505 stop of a run occurs. The routines are called on the following
506 occations:
507
508 frontend_init: When the frontend program is started. This routine
509 should initialize the hardware.
510
511 frontend_exit: When the frontend program is shut down. Can be used
512 to releas any locked resources like memory, commu-
513 nications ports etc.
514
515 begin_of_run: When a new run is started. Clear scalers, open
516 rungates, etc.
517
518 end_of_run: Called on a request to stop a run. Can send
519 end-of-run event and close run gates.
520
521 pause_run: When a run is paused. Should disable trigger events.
522
523 resume_run: When a run is resumed. Should enable trigger events.
524
525\********************************************************************/
526
527int event_size = 10*1024;
528
529/*-- Frontend Init -------------------------------------------------*/
530
533
534// RPC handler
535
537{
538 const char* cmd = CSTRING(0);
539 const char* args = CSTRING(1);
540 char* return_buf = CSTRING(2);
541 int return_max_length = CINT(3);
542
543 cm_msg(MINFO, "rpc_callback", "--------> rpc_callback: index %d, max_length %d, cmd [%s], args [%s]", index, return_max_length, cmd, args);
544
545 //int example_int = strtol(args, NULL, 0);
546 //int size = sizeof(int);
547 //int status = db_set_value(hDB, 0, "/Equipment/" EQ_NAME "/Settings/example_int", &example_int, size, 1, TID_INT);
548
549 char tmp[256];
550 time_t now = time(NULL);
551 sprintf(tmp, "{ \"current_time\" : [ %d, \"%s\"] }", (int)now, ctime(&now));
552
554
555 return RPC_SUCCESS;
556}
557
558
559
560#include "msystem.h"
561
563{
564 //Insert myself into the history
565
566 char path[256];
567 int status;
568 int size;
569 int NVARS=5;
570
572 // Setup variables to plot:
574 size = 64; // String length in ODB
575 sprintf(path,"/History/Display/msysmon/%s/Variables",equipment[0].info.frontend_host);
576 {
577 char vars[size*NVARS];
578 memset(vars, 0, size*NVARS);
579 sprintf(vars+size*0,"%s:LOAD[%d]",equipment[0].name,0);
580 sprintf(vars+size*1,"%s:LOAD[%d]",equipment[0].name,1);
581 sprintf(vars+size*2,"%s:LOAD[%d]",equipment[0].name,2);
582 sprintf(vars+size*3,"%s:MEMP",equipment[0].name);
583 sprintf(vars+size*4,"%s:SWAP",equipment[0].name);
584 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
585 }
586 assert(status == DB_SUCCESS);
587
589 // Setup labels
591 size = 32;
592 sprintf(path,"/History/Display/msysmon/%s/Label",equipment[0].info.frontend_host);
593 {
594 char vars[size*NVARS];
595 memset(vars, 0, size*NVARS);
596 sprintf(vars+size*0,"NICE CPU Load (%%)");
597 sprintf(vars+size*1,"USER CPU Load (%%)");
598 sprintf(vars+size*2,"SYSTEM CPU Load (%%)");
599 sprintf(vars+size*3,"Memory Usage (%%)");
600 sprintf(vars+size*4,"Swap Usage (%%)");
601 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
602 }
603 assert(status == DB_SUCCESS);
604
606 // Setup colours:
608 size = 32;
609 sprintf(path,"/History/Display/msysmon/%s/Colour",equipment[0].info.frontend_host);
610 {
611 char vars[size*NVARS];
612 memset(vars, 0, size*NVARS);
613 for (int i=0; i<NVARS; i++)
614 sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
615 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
616 }
617 assert(status == DB_SUCCESS);
618
620 // Setup time scale and range:
622 sprintf(path,"/History/Display/msysmon/%s/Timescale",equipment[0].info.frontend_host);
623 status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
624 double *m=new double();
625 *m=0.;
626 sprintf(path,"/History/Display/msysmon/%s/Minimum",equipment[0].info.frontend_host);
627 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
628 *m=100.;
629 sprintf(path,"/History/Display/msysmon/%s/Maximum",equipment[0].info.frontend_host);
630 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
631 delete m;
632}
633
635{
636 //Insert per CPU graphs into the history
637 int status, size;
638 char path[256];
639 int NVARS=cpuCount;
641 // Setup variables to plot:
643 size = 64;
644 sprintf(path,"/History/Display/msysmon/%s-CPU/Variables",equipment[0].info.frontend_host);
645 {
646 char vars[size*NVARS];
647 memset(vars, 0, size*NVARS);
648#ifdef CLASSIC_CPU_VARS
649 for (int i=0; i<cpuCount; i++)
650 {
651 int icpu=i+1;
652 int h='0'+icpu/100;
653 int t='0'+(icpu%100)/10;
654 int u='0'+icpu%10;
655 if (icpu<10)
656 sprintf(vars+size*i,"%s:CPU%c[3]",equipment[0].name,u);
657 else if (icpu<100)
658 sprintf(vars+size*i,"%s:CP%c%c[3]",equipment[0].name,t,u);
659 else if (icpu<1000)
660 sprintf(vars+size*i,"%s:C%c%c%c[3]",equipment[0].name,h,t,u);
661 else
662 {
663 cm_msg(MERROR, frontend_name, "Cannot handle a system with more than 1000 CPUs");
664 exit(FE_ERR_HW);
665 }
666 }
667#else
668 for (int i=0; i<cpuCount; i++)
669 {
670 sprintf(vars+size*i,"%s:CPUA[%d]",equipment[0].name,i);
671 }
672#endif
673 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
674 }
675 assert(status == DB_SUCCESS);
676
678 // Setup labels
680 size = 32;
681 sprintf(path,"/History/Display/msysmon/%s-CPU/Label",equipment[0].info.frontend_host);
682 {
683 char vars[size*NVARS];
684 memset(vars, 0, size*NVARS);
685 for (int i=0; i<cpuCount; i++)
686 sprintf(vars+size*i,"CPU%d Load (%%)",i+1);
687 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
688 }
689 assert(status == DB_SUCCESS);
690
692 // Setup colours:
694 size = 32;
695 sprintf(path,"/History/Display/msysmon/%s-CPU/Colour",equipment[0].info.frontend_host);
696 {
697 char vars[size*NVARS];
698 memset(vars, 0, size*NVARS);
699 for (int i=0; i<NVARS; i++)
700 sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
701 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
702 }
703 assert(status == DB_SUCCESS);
705 // Setup time scale and range:
707 sprintf(path,"/History/Display/msysmon/%s-CPU/Timescale",equipment[0].info.frontend_host);
708 status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
709 double *m=new double();
710 *m=0.;
711 sprintf(path,"/History/Display/msysmon/%s-CPU/Minimum",equipment[0].info.frontend_host);
712 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
713 *m=100.;
714 sprintf(path,"/History/Display/msysmon/%s-CPU/Maximum",equipment[0].info.frontend_host);
715 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
716 delete m;
717}
718
720{
721 //Insert per CPU graphs into the history
722 int status, size;
723 char path[256];
726 // Setup variables to plot:
728 size = 64;
729 sprintf(path,"/History/Display/msysmon/%s-net/Variables",equipment[0].info.frontend_host);
730 {
731 char vars[size*NVARS];
732 memset(vars, 0, size*NVARS);
733 for (int i=0; i<networkInterfaceCount; i++)
734 sprintf(vars+size*i,"%s:NETR[%d]",equipment[0].name,i);
735 for (int i=networkInterfaceCount; i<NVARS; i++)
736 sprintf(vars+size*i,"%s:NETT[%d]",equipment[0].name,i-networkInterfaceCount);
737 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
738 }
739 assert(status == DB_SUCCESS);
740
742 // Setup labels
744 size = 32;
745 sprintf(path,"/History/Display/msysmon/%s-net/Label",equipment[0].info.frontend_host);
746 {
747 char vars[size*NVARS];
748 memset(vars, 0, size*NVARS);
749 for (int i=0; i<networkInterfaceCount; i++)
750 sprintf(vars+size*i,"%s Received (Mbps)",NetReceive.at(i)->face.c_str());
751 for (int i=networkInterfaceCount; i<NVARS; i++)
752 sprintf(vars+size*i,"%s Transmitted (Mbps)",NetReceive.at(i-networkInterfaceCount)->face.c_str());
753 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
754 }
755 assert(status == DB_SUCCESS);
756
758 // Setup colours:
760 size = 32;
761 sprintf(path,"/History/Display/msysmon/%s-net/Colour",equipment[0].info.frontend_host);
762 {
763 char vars[size*NVARS];
764 memset(vars, 0, size*NVARS);
765 for (int i=0; i<NVARS; i++)
766 sprintf(vars+size*i,"%s",(colours[i%16]).c_str());
767 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
768 }
769 assert(status == DB_SUCCESS);
771 // Setup time scale and range:
773 sprintf(path,"/History/Display/msysmon/%s-net/Timescale",equipment[0].info.frontend_host);
774 status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
775 double *m=new double();
776 *m=0.;
777 sprintf(path,"/History/Display/msysmon/%s-net/Minimum",equipment[0].info.frontend_host);
778 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
779 *m=1.0/0.0; //infinity
780 sprintf(path,"/History/Display/msysmon/%s-net/Maximum",equipment[0].info.frontend_host);
781 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
782 delete m;
783}
784
785
786#ifdef HAVE_NVIDIA
787void BuildHostGPUPlot()
788{
789 //Insert myself into the history
790
791 char path[256];
792 int status;
793 int size;
794 //5 vars per GPU
795 int NVARS=5*HAVE_NVIDIA;
796
798 // Setup variables to plot:
800 size = 64; // String length in ODB
801 sprintf(path,"/History/Display/msysmon/%s-GPU/Variables",equipment[0].info.frontend_host);
802 {
803 char vars[size*NVARS];
804 memset(vars, 0, size*NVARS);
805 for (int i=0; i<HAVE_NVIDIA; i++)
806 {
807 sprintf(vars+size*0+i*size*5,"%s:GPUT[%d]",equipment[0].name,i);
808 sprintf(vars+size*1+i*size*5,"%s:GPUF[%d]",equipment[0].name,i);
809 sprintf(vars+size*2+i*size*5,"%s:GPUP[%d]",equipment[0].name,i);
810 sprintf(vars+size*3+i*size*5,"%s:GPUU[%d]",equipment[0].name,i);
811 sprintf(vars+size*4+i*size*5,"%s:GPUM[%d]",equipment[0].name,i);
812 }
813 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
814 }
815 assert(status == DB_SUCCESS);
816
818 // Setup labels
820 size = 32;
821 sprintf(path,"/History/Display/msysmon/%s-GPU/Label",equipment[0].info.frontend_host);
822 {
823 char vars[size*NVARS];
824 memset(vars, 0, size*NVARS);
825 for (int i=0; i<HAVE_NVIDIA; i++)
826 {
827 sprintf(vars+size*0+i*size*5,"GPU %d Temperature (C)",i);
828 sprintf(vars+size*1+i*size*5,"GPU %d FAN (%%)",i);
829 sprintf(vars+size*2+i*size*5,"GPU %d Power (W)",i);
830 sprintf(vars+size*3+i*size*5,"GPU %d Utilisation (%%)",i);
831 sprintf(vars+size*4+i*size*5,"GPU %d Memory Usage (%%)",i);
832 }
833 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
834 }
835 assert(status == DB_SUCCESS);
836
838 // Setup colours:
840 size = 32;
841 sprintf(path,"/History/Display/msysmon/%s-GPU/Colour",equipment[0].info.frontend_host);
842 {
843 char vars[size*NVARS];
844 memset(vars, 0, size*NVARS);
845 for (int i=0; i<NVARS; i++)
846 for (int j=0; j<HAVE_NVIDIA; j++)
847 sprintf(vars+size*i+j*size*5,"%s",(colours[i%16]).c_str());
848 status = db_set_value(hDB, 0, path, vars, size*NVARS, NVARS, TID_STRING);
849 }
850 assert(status == DB_SUCCESS);
851
853 // Setup time scale and range:
855 sprintf(path,"/History/Display/msysmon/%s-GPU/Timescale",equipment[0].info.frontend_host);
856 status = db_set_value(hDB,0,path,"1h",3,1,TID_STRING);
857 double *m=new double();
858 *m=0.;
859 sprintf(path,"/History/Display/msysmon/%s-GPU/Minimum",equipment[0].info.frontend_host);
860 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
861 *m=100.;
862 sprintf(path,"/History/Display/msysmon/%s-GPU/Maximum",equipment[0].info.frontend_host);
863 status = db_set_value(hDB,0,path,m,sizeof(double),1,TID_DOUBLE);
864 delete m;
865}
866#endif
867void InitGPU();
869{
870 int status;
871 printf("frontend_init!\n");
872
873 FILE* file = fopen(PROCSTATFILE, "r");
874 if (file == NULL) {
875 cm_msg(MERROR, frontend_name, "Cannot open " PROCSTATFILE);
876 return FE_ERR_HW;
877 }
878 char buffer[256];
879 int Ncpus = -1;
880 do {
881 Ncpus++;
882 const char*s = fgets(buffer, 255, file);
883 if (!s) // EOF
884 break;
885 } while (String_startsWith(buffer, "cpu"));
886 fclose(file);
887 cpuCount = MAX(Ncpus - 1, 1);
888 printf("%d CPUs found\n",cpuCount);
889 //Note, cpus[0] is a total for all CPUs
890 for (int i = 0; i <= cpuCount; i++) {
891 cpus.push_back(new CPUData);
892 }
893
894 file = fopen(PROCNETSTATFILE, "r");
895 if (file == NULL) {
896 cm_msg(MERROR, frontend_name, "Cannot open " PROCNETSTATFILE);
897 return FE_ERR_HW;
898 }
899 do {
900 if (!fgets(buffer, 255, file)) break;
901 for (int i=0; i<255; i++)
902 {
903 if (!buffer[i]) break;
904 if (buffer[i]==':')
905 {
906 NetStat* r=new NetStat;
907 r->face=std::string(buffer,&buffer[i]);
908 NetReceive.push_back(r);
909
910 NetStat* t=new NetStat;
911 t->face=std::string(buffer,&buffer[i]);
912 NetTransmit.push_back(t);
913
915 }
916 }
917 } while (1);
918 fclose(file);
919 printf("%d network inferfaces found\n",networkInterfaceCount);
920
921 ReadCPUData();
922 ReadNetData();
926
927#ifdef HAVE_NVIDIA
929 InitGPU();
930#endif
931
932#ifdef HAVE_LM_SENSORS
933 if (!sensors)
934 sensors= new LM_Sensors();
935 sensors->BuildHostTemperaturePlot();
936
937#endif
938
939#ifdef RPC_JRPC
941 assert(status == SUCCESS);
942#endif
943
944 return SUCCESS;
945}
946
947/*-- Frontend Exit -------------------------------------------------*/
948
950{
951 return SUCCESS;
952}
953
954/*-- Begin of Run --------------------------------------------------*/
955
957{
958 return SUCCESS;
959}
960
961/*-- End of Run ----------------------------------------------------*/
962
964{
965 return SUCCESS;
966}
967
968/*-- Pause Run -----------------------------------------------------*/
969
971{
972 return SUCCESS;
973}
974
975/*-- Resume Run ----------------------------------------------------*/
976
978{
979 return SUCCESS;
980}
981
982/*-- Frontend Loop -------------------------------------------------*/
983
985{
986 /* if frontend_call_loop is true, this routine gets called when
987 the frontend is idle or once between every event */
988 ss_sleep(100); // don't eat all CPU
989 return SUCCESS;
990}
991
992/*------------------------------------------------------------------*/
993
994/********************************************************************\
995
996 Readout routines for different events
997
998\********************************************************************/
999
1001/* Polling routine for events. Returns TRUE if event
1002 is available. If test equals TRUE, don't return. The test
1003 flag is used to time the polling */
1004{
1005 if (test) {
1006 ss_sleep (count);
1007 }
1008 return (0);
1009}
1010
1011/*-- Interrupt configuration ---------------------------------------*/
1012
1014{
1015 printf("interrupt_configure!\n");
1016
1017 switch(cmd)
1018 {
1020 break;
1022 break;
1024 break;
1026 break;
1027 }
1028 return SUCCESS;
1029}
1030
1032{
1033 //Largely from htop: https://github.com/hishamhm/htop (GNU licence)
1034 FILE* file = fopen(PROCSTATFILE, "r");
1035 if (file == NULL) {
1036 cm_msg(MERROR, frontend_name, "Cannot open " PROCSTATFILE);
1037 }
1038 for (int i = 0; i <= cpuCount; i++) {
1039 char buffer[256];
1040 int cpuid;
1041 unsigned long long int ioWait, irq, softIrq, steal, guest, guestnice;
1042 unsigned long long int systemalltime, idlealltime, totaltime, virtalltime;
1043 ioWait = irq = softIrq = steal = guest = guestnice = 0;
1044 // Dependending on your kernel version,
1045 // 5, 7, 8 or 9 of these fields will be set.
1046 // The rest will remain at zero.
1047 const char*s = fgets(buffer, 255, file);
1048 if (!s) // EOF
1049 break;
1050 if (i == 0)
1051 sscanf(buffer, "cpu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu", &usertime, &nicetime, &systemtime, &idletime, &ioWait, &irq, &softIrq, &steal, &guest, &guestnice);
1052 else {
1053 sscanf(buffer, "cpu%4d %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu", &cpuid, &usertime, &nicetime, &systemtime, &idletime, &ioWait, &irq, &softIrq, &steal, &guest, &guestnice);
1054 assert(cpuid == i - 1);
1055 }
1056 // Guest time is already accounted in usertime
1059 // Fields existing on kernels >= 2.6
1060 // (and RHEL's patched kernel 2.4...)
1065 CPUData* cpuData = cpus.at(i);
1066 cpuData->userPeriod = usertime - cpuData->userTime;
1067 cpuData->nicePeriod = nicetime - cpuData->niceTime;
1068 cpuData->systemPeriod = systemtime - cpuData->systemTime;
1069 cpuData->systemAllPeriod = systemalltime - cpuData->systemAllTime;
1070 cpuData->idleAllPeriod = idlealltime - cpuData->idleAllTime;
1071 cpuData->idlePeriod = idletime - cpuData->idleTime;
1072 cpuData->ioWaitPeriod = ioWait - cpuData->ioWaitTime;
1073 cpuData->irqPeriod = irq - cpuData->irqTime;
1074 cpuData->softIrqPeriod = softIrq - cpuData->softIrqTime;
1075 cpuData->stealPeriod = steal - cpuData->stealTime;
1076 cpuData->guestPeriod = virtalltime - cpuData->guestTime;
1077 cpuData->totalPeriod = totaltime - cpuData->totalTime;
1078 cpuData->userTime = usertime;
1079 cpuData->niceTime = nicetime;
1080 cpuData->systemTime = systemtime;
1081 cpuData->systemAllTime = systemalltime;
1082 cpuData->idleAllTime = idlealltime;
1083 cpuData->idleTime = idletime;
1084 cpuData->ioWaitTime = ioWait;
1085 cpuData->irqTime = irq;
1086 cpuData->softIrqTime = softIrq;
1087 cpuData->stealTime = steal;
1088 cpuData->guestTime = virtalltime;
1089 cpuData->totalTime = totaltime;
1090 }
1091 fclose(file);
1092 //end htop code
1093}
1094#include <sys/time.h>
1097{
1098 FILE* file = fopen(PROCNETSTATFILE,"r");
1099 if (file == NULL) {
1100 cm_msg(MERROR, frontend_name, "Cannot open " PROCNETSTATFILE);
1101 }
1103 timersub(&new_tv, &old_tv, &tv);
1104 //Note, there are two title lines (hence +2)
1105 const int title_lines=2;
1106 for (int i = 0; i < networkInterfaceCount+title_lines; i++) {
1107 char buffer[256];
1108 char InterfaceName[20];
1109 unsigned long int rbytes, rpackets, rerrs, rdrop, rfifo, rframe, rcompressed, rmulticast;
1110 unsigned long int sbytes, spackets, serrs, sdrop, sfifo, sframe, scompressed, smulticast;
1111 // Dependending on your kernel version,
1112 // 5, 7, 8 or 9 of these fields will be set.
1113 // The rest will remain at zero.
1114 const char*s = fgets(buffer, 255, file);
1115 if (!s) // EOF
1116 break;
1117 if (i < 2)
1118 continue; //Title lines
1119 else
1120 sscanf(buffer, "%[^:]: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",InterfaceName, &rbytes, &rpackets, &rerrs, &rdrop, &rfifo, &rframe, &rcompressed, &rmulticast,&sbytes, &spackets, &serrs, &sdrop, &sfifo, &sframe, &scompressed, &smulticast);
1121#ifdef FE_DEBUG
1122 printf("--------------------Parsing line %d from " PROCNETSTATFILE "---------------------\n",i);
1123 printf("Intput: %s\n",buffer);
1124 printf("Output: %s: %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu\n\n",InterfaceName, rbytes, rpackets, rerrs, rdrop, rfifo, rframe, rcompressed, rmulticast,sbytes, spackets, serrs, sdrop, sfifo, sframe, scompressed, smulticast);
1125 printf("-------------------------------------------------------------------------------\n");
1126#endif
1129
1130 RData->bytesPeriod =rbytes-RData->bytes;
1131 RData->packetsPeriod =rpackets-RData->packets;
1132 RData->errsPeriod =rerrs-RData->errs;
1133 RData->dropPeriod =rdrop-RData->drop;
1134 RData->fifoPeriod =rfifo-RData->fifo;
1135 RData->framePeriod =rframe-RData->frame;
1136 RData->compressedPeriod =rcompressed-RData->compressed;
1137 RData->multicastPeriod =rmulticast-RData->multicast;
1138
1139 RData->bytes =rbytes;
1140 RData->packets =rpackets;
1141 RData->errs =rerrs;
1142 RData->drop =rdrop;
1143 RData->fifo =rfifo;
1144 RData->frame =rframe;
1145 RData->compressed =rcompressed;
1146 RData->multicast =rmulticast;
1147 RData->tv =tv;
1148
1149 SData->bytesPeriod =sbytes-SData->bytes;
1150 SData->packetsPeriod =spackets-SData->packets;
1151 SData->errsPeriod =serrs-SData->errs;
1152 SData->dropPeriod =sdrop-SData->drop;
1153 SData->fifoPeriod =sfifo-SData->fifo;
1154 SData->framePeriod =sframe-SData->frame;
1155 SData->compressedPeriod =scompressed-SData->compressed;
1156 SData->multicastPeriod =smulticast-SData->multicast;
1157
1158 SData->bytes =sbytes;
1159 SData->packets =spackets;
1160 SData->errs =serrs;
1161 SData->drop =sdrop;
1162 SData->fifo =sfifo;
1163 SData->frame =sframe;
1164 SData->compressed =scompressed;
1165 SData->multicast =smulticast;
1166 SData->tv =tv;
1167 }
1168 old_tv = new_tv;
1169 fclose(file);
1170}
1171#if HAVE_NVIDIA
1172
1173// Build the set of device features
1174static void get_device_features(GPU* dev)
1175{
1177 &dev->temperature) == NVML_SUCCESS) {
1178 dev->feature_support |= TEMPERATURE;
1179 }
1180
1181 if(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory) == NVML_SUCCESS) {
1182 dev->feature_support |= MEMORY_INFO;
1183 }
1184
1185 if(nvmlDeviceGetPowerUsage(dev->handle, &dev->power_usage) == NVML_SUCCESS) {
1186 dev->feature_support |= POWER_USAGE;
1187 }
1188
1189 if(nvmlDeviceGetFanSpeed(dev->handle, &dev->fan) == NVML_SUCCESS) {
1190 dev->feature_support |= FAN_INFO;
1191 }
1192
1193 if(nvmlDeviceGetUtilizationRates(dev->handle, &dev->util) == NVML_SUCCESS) {
1194 dev->feature_support |= UTILIZATION_INFO;
1195 }
1196}
1197
1198void InitGPU()
1199{
1200 printf("Initialising NVIDIA monitoring\n");
1201 // No point in continuing if we can't even initialize the library.
1202 if(NVML_TRY(nvmlInit()))
1203 exit(1);
1205
1206 for(unsigned i = 0; i < nGPUs; ++i) {
1207 GPU* dev=new GPU();
1208 GPUs.push_back(dev);
1209
1210 dev->index = i;
1211
1213
1214 NVML_TRY(nvmlDeviceGetName(dev->handle, dev->name, sizeof(dev->name)));
1215 NVML_TRY(nvmlDeviceGetSerial(dev->handle, dev->serial, sizeof(dev->serial)));
1216 NVML_TRY(nvmlDeviceGetUUID(dev->handle, dev->uuid, sizeof(dev->uuid)));
1217
1218 NVML_TRY(nvmlDeviceGetPciInfo(dev->handle, &dev->pci));
1219 NVML_TRY(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory));
1220
1221 unsigned long long event_types;
1222 NVML_TRY(nvmlEventSetCreate(&dev->event_set));
1224 NVML_TRY(nvmlDeviceRegisterEvents(dev->handle, event_types, dev->event_set));
1225 } else {
1226 dev->event_set = NULL;
1227 }
1228
1230
1231 }
1232 printf("OK\n");
1233}
1234
1235void ReadGPUData()
1236{
1237 unsigned i;
1238
1239 for(i = 0; i < nGPUs; ++i) {
1240 GPU* dev = GPUs[i];
1241
1242 if(dev->feature_support & MEMORY_INFO) {
1243 NVML_TRY(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory));
1244 }
1245
1246 if(dev->feature_support & TEMPERATURE) {
1248 &dev->temperature));
1249 }
1250
1251 if(dev->feature_support & POWER_USAGE) {
1252 NVML_TRY(nvmlDeviceGetPowerUsage(dev->handle, &dev->power_usage));
1253 }
1254
1255 if(dev->feature_support & UTILIZATION_INFO) {
1257 }
1258
1259 if(dev->feature_support & FAN_INFO) {
1260 NVML_TRY(nvmlDeviceGetFanSpeed(dev->handle, &dev->fan));
1261 }
1262
1263 if(dev->event_set != NULL) {
1265
1266 NVML_TRY(nvmlEventSetWait(dev->event_set, &data, 1));
1267
1268 }
1269 }
1270
1271}
1272#endif
1273/*-- Event readout -------------------------------------------------*/
1274#include <fstream>
1275int read_system_load(char *pevent, int off)
1276{
1277 bk_init32(pevent);
1278
1279 ReadCPUData();
1280
1281 ReadNetData();
1282
1283 //Calculate load:
1284 // The classic layout of CPU variables would be to log a bank for 4 doubles for each CPU core. This will not scale with very high core counts in the future
1285#ifdef CLASSIC_CPU_VARS
1286 double CPULoadTotal[4]; //nice, user, system, total
1287 for (int j=0; j<4; j++)
1288 CPULoadTotal[j]=0;
1289
1290 double CPULoad[4]; //nice, user, system, total
1291 for (int i = 0; i <= cpuCount; i++) {
1292 CPUData* cpuData = (cpus[i]);
1293 double total = (double) ( cpuData->totalPeriod == 0 ? 1 : cpuData->totalPeriod);
1294 CPULoad[0] = cpuData->nicePeriod / total * 100.0;
1295 CPULoad[1] = cpuData->userPeriod / total * 100.0;
1296 CPULoad[2] = cpuData->systemPeriod / total * 100.0;
1297 CPULoad[3]=CPULoad[0]+CPULoad[1]+CPULoad[2];
1298
1299 for (int j=0; j<4; j++)
1300 {
1302 }
1303
1304 // This is a little long for just setting a bank name, but it
1305 // avoids format-truncation warnings and supports machines with upto
1306 // 1000 CPUs... another case can be put in when we reach that new limit
1307 char name[5]="LOAD";
1308 //i==0 is a total for ALL Cpus
1309 if (i!=0)
1310 {
1311 int h='0'+i/100;
1312 int t='0'+(i%100)/10;
1313 int u='0'+i%10;
1314 if (i<10)
1315 snprintf(name,5,"CPU%c",u);
1316 else if (i<100)
1317 snprintf(name,5,"CP%c%c",t,u);
1318 else if (i<1000)
1319 snprintf(name,5,"C%c%c%c",h,t,u);
1320 else
1321 cm_msg(MERROR, frontend_name, "Cannot handle a system with more than 1000 CPUs");
1322 }
1323 double* a;
1324 bk_create(pevent, name, TID_DOUBLE, (void**)&a);
1325 for (int k=0; k<4; k++)
1326 {
1327 *a=CPULoad[k];
1328 a++;
1329 }
1330 bk_close(pevent,a);
1331
1332 }
1333#else
1334 //Instead of the 'Classic' variables. Log 4 banks with N doubles, where N is the number of CPUs
1335 double CPUN[cpuCount]; // % nice time
1336 double CPUU[cpuCount]; // % user time
1337 double CPUS[cpuCount]; // % system time
1338 double CPUA[cpuCount]; // % total CPU time
1339 for (int i = 0; i <= cpuCount; i++) {
1340 CPUData* cpuData = (cpus[i]);
1341 double total = (double) ( cpuData->totalPeriod == 0 ? 1 : cpuData->totalPeriod);
1342 CPUN[i] = cpuData->nicePeriod / total * 100.0;
1343 CPUU[i] = cpuData->userPeriod / total * 100.0;
1344 CPUS[i] = cpuData->systemPeriod / total * 100.0;
1345 CPUA[i] = CPUN[i]+CPUU[i]+CPUS[i];
1346 }
1347 double* c;
1348 bk_create(pevent, "CPUN", TID_DOUBLE, (void**)&c);
1349 for (int k=0; k<cpuCount; k++)
1350 {
1351 *c=CPUN[k];
1352 c++;
1353 }
1354 bk_close(pevent,c);
1355 bk_create(pevent, "CPUU", TID_DOUBLE, (void**)&c);
1356 for (int k=0; k<cpuCount; k++)
1357 {
1358 *c=CPUU[k];
1359 c++;
1360 }
1361 bk_close(pevent,c);
1362
1363 bk_create(pevent, "CPUS", TID_DOUBLE, (void**)&c);
1364 for (int k=0; k<cpuCount; k++)
1365 {
1366 *c=CPUS[k];
1367 c++;
1368 }
1369 bk_close(pevent,c);
1370
1371 bk_create(pevent, "CPUA", TID_DOUBLE, (void**)&c);
1372 for (int k=0; k<cpuCount; k++)
1373 {
1374 *c=CPUA[k];
1375 c++;
1376 }
1377 bk_close(pevent,c);
1378 double TotalLoad[4]={0};
1379 for (int k=0; k<cpuCount; k++)
1380 {
1381 TotalLoad[0]+=CPUN[k];
1382 TotalLoad[1]+=CPUU[k];
1383 TotalLoad[2]+=CPUS[k];
1384 TotalLoad[3]+=CPUA[k];
1385 }
1386 bk_create(pevent, "LOAD", TID_DOUBLE, (void**)&c);
1387 for (int k=0; k<4; k++)
1388 {
1390 c++;
1391 }
1392 bk_close(pevent,c);
1393#endif
1394
1395
1396//Read and log system temperatures
1397#ifdef HAVE_LM_SENSORS
1398 pevent = sensors->ReadAndLogSensors(pevent);
1399#endif
1400
1401 double DataRecieve;
1402 double DataTransmit;
1403
1404 double* a;
1405 char name[5]="NETR";
1406 bk_create(pevent, name, TID_DOUBLE, (void**)&a);
1407 for (int i=0; i<networkInterfaceCount; i++)
1408 {
1409 NetStat* RData = NetReceive.at(i);
1410 double Rdt=RData->tv.tv_sec + (RData->tv.tv_usec * 1e-6);
1411 DataRecieve=(double)(RData->bytesPeriod)*8./1024./1024. / Rdt; //Megabits / s
1412 *a=DataRecieve;
1413 a++;
1414 }
1415 bk_close(pevent,a);
1416
1417 double* b;
1418 sprintf(name,"NETT");
1419 bk_create(pevent, name, TID_DOUBLE, (void**)&b);
1420 for (int i=0; i<networkInterfaceCount; i++)
1421 {
1422 NetStat* SData = NetTransmit.at(i);
1423 double Sdt=SData->tv.tv_sec + (SData->tv.tv_usec * 1e-6);
1424 DataTransmit=(double)(SData->bytesPeriod)*8./1024./1024. / Sdt; //Megabits /s
1425 *b=DataTransmit;
1426 b++;
1427 }
1428 bk_close(pevent,b);
1429
1430 //Again from htop:
1431 unsigned long long int totalMem;
1432 unsigned long long int usedMem;
1433 unsigned long long int freeMem;
1434 unsigned long long int sharedMem;
1435 unsigned long long int buffersMem;
1436 unsigned long long int cachedMem;
1437 unsigned long long int totalSwap;
1438 unsigned long long int usedSwap;
1439 unsigned long long int freeSwap;
1440 FILE* file = fopen(PROCMEMINFOFILE, "r");
1441 if (file == NULL) {
1442 cm_msg(MERROR, frontend_name, "Cannot open " PROCMEMINFOFILE);
1443 }
1444 char buffer[128];
1445 while (fgets(buffer, 128, file)) {
1446 switch (buffer[0]) {
1447 case 'M':
1448 if (String_startsWith(buffer, "MemTotal:"))
1449 sscanf(buffer, "MemTotal: %32llu kB", &totalMem);
1450 else if (String_startsWith(buffer, "MemFree:"))
1451 sscanf(buffer, "MemFree: %32llu kB", &freeMem);
1452 else if (String_startsWith(buffer, "MemShared:"))
1453 sscanf(buffer, "MemShared: %32llu kB", &sharedMem);
1454 break;
1455 case 'B':
1456 if (String_startsWith(buffer, "Buffers:"))
1457 sscanf(buffer, "Buffers: %32llu kB", &buffersMem);
1458 break;
1459 case 'C':
1460 if (String_startsWith(buffer, "Cached:"))
1461 sscanf(buffer, "Cached: %32llu kB", &cachedMem);
1462 break;
1463 case 'S':
1464 if (String_startsWith(buffer, "SwapTotal:"))
1465 sscanf(buffer, "SwapTotal: %32llu kB", &totalSwap);
1466 if (String_startsWith(buffer, "SwapFree:"))
1467 sscanf(buffer, "SwapFree: %32llu kB", &freeSwap);
1468 break;
1469 }
1470 }
1471 fclose(file);
1472 //end htop code
1473
1476 double mem_percent=100.*(double)usedMem/(double)totalMem;
1477 double swap_percent=100;
1478 if (totalSwap) //If there is an swap space, calculate... else always say 100% used
1480#ifdef FE_DEBUG
1481 printf("-----------------------------\n");
1482 printf("MemUsed: %lld kB (%lld GB) (%.2f%%)\n",usedMem,usedMem/1024/1024,mem_percent);
1483 printf("SwapUsed: %lld kB (%lld GB) (%.2f%%)\n",usedSwap,usedSwap/1024/1024,swap_percent);
1484 printf("-----------------------------\n");
1485#endif
1486 double* m;
1487 bk_create(pevent, "MEMP", TID_DOUBLE, (void**)&m);
1488 *m=mem_percent;
1489 bk_close(pevent,m+1);
1490
1491 if (totalSwap) //Only log SWAP if there is any
1492 {
1493 bk_create(pevent, "SWAP", TID_DOUBLE, (void**)&m);
1494 *m=swap_percent;
1495 bk_close(pevent,m+1);
1496 }
1497
1498#if HAVE_NVIDIA
1499 ReadGPUData();
1500 int* t;
1501
1502 //GPU Temperature
1503 bk_create(pevent, "GPUT", TID_INT, (void**)&t);
1504 for (unsigned i=0; i<nGPUs; i++)
1505 {
1506 *t=GPUs[i]->temperature;
1507 t++;
1508 }
1509 bk_close(pevent,t);
1510
1511 //GPU Fan speed
1512 bk_create(pevent, "GPUF", TID_INT, (void**)&t);
1513 for (unsigned i=0; i<nGPUs; i++)
1514 {
1515 *t=GPUs[i]->fan;
1516 t++;
1517 }
1518 bk_close(pevent,t);
1519
1520 //GPU Power (W)
1521 bk_create(pevent, "GPUP", TID_INT, (void**)&t);
1522 for (unsigned i=0; i<nGPUs; i++)
1523 {
1524 *t=GPUs[i]->power_usage/1000;
1525 t++;
1526 }
1527 bk_close(pevent,t);
1528
1529 //GPU Utilisiation (%)
1530 bk_create(pevent, "GPUU", TID_INT, (void**)&t);
1531 for (unsigned i=0; i<nGPUs; i++)
1532 {
1533 *t=GPUs[i]->util.gpu;
1534 t++;
1535 }
1536 bk_close(pevent,t);
1537
1538 //GPU Memory Utilisiation (%)
1539 bk_create(pevent, "GPUM", TID_DOUBLE, (void**)&m);
1540 for (unsigned i=0; i<nGPUs; i++)
1541 {
1542 *m=100.*(double)GPUs[i]->memory.used/(double)GPUs[i]->memory.total;
1543 m++;
1544 }
1545 bk_close(pevent,m);
1546
1547#endif
1548
1549 return bk_size(pevent);
1550}
1551
1552/* emacs
1553 * Local Variables:
1554 * tab-width: 8
1555 * c-basic-offset: 3
1556 * indent-tabs-mode: nil
1557 * End:
1558 */
#define FALSE
Definition cfortran.h:309
struct usb_device * dev
Definition feccusb.cxx:80
INT bk_close(void *event, void *pdata)
Definition midas.cxx:16780
void bk_init32(void *event)
Definition midas.cxx:16469
void bk_create(void *event, const char *name, WORD type, void **pdata)
Definition midas.cxx:16561
INT bk_size(const void *event)
Definition midas.cxx:16495
INT cm_register_function(INT id, INT(*func)(INT, void **))
Definition midas.cxx:5790
#define DB_SUCCESS
Definition midas.h:631
#define RPC_SUCCESS
Definition midas.h:698
#define CMD_INTERRUPT_ATTACH
Definition midas.h:822
#define FE_ERR_HW
Definition midas.h:719
#define CMD_INTERRUPT_DISABLE
Definition midas.h:821
#define CMD_INTERRUPT_ENABLE
Definition midas.h:820
#define CMD_INTERRUPT_DETACH
Definition midas.h:823
#define SUCCESS
Definition mcstd.h:54
#define TID_DOUBLE
Definition midas.h:343
#define RO_ODB
Definition midas.h:438
#define EQ_PERIODIC
Definition midas.h:414
#define MINFO
Definition midas.h:560
#define TID_STRING
Definition midas.h:346
#define MERROR
Definition midas.h:559
#define TID_INT
Definition midas.h:338
#define RO_ALWAYS
Definition midas.h:436
#define MAX(a, b)
Definition midas.h:509
INT ss_sleep(INT millisec)
Definition system.cxx:3628
INT cm_msg(INT message_type, const char *filename, INT line, const char *routine, const char *format,...)
Definition midas.cxx:915
INT db_set_value(HNDLE hDB, HNDLE hKeyRoot, const char *key_name, const void *data, INT data_size, INT num_values, DWORD type)
Definition odb.cxx:5261
#define RPC_JRPC
Definition mrpc.h:130
void ** info
Definition fesimdaq.cxx:41
INT run_number[2]
Definition mana.cxx:246
INT index
Definition mana.cxx:271
void * data
Definition mana.cxx:268
HNDLE hDB
main ODB handle
Definition mana.cxx:207
double count
Definition mdump.cxx:33
INT i
Definition mdump.cxx:32
INT HNDLE
Definition midas.h:132
#define CINT(_i)
Definition midas.h:1622
DWORD BOOL
Definition midas.h:105
int INT
Definition midas.h:129
#define CSTRING(_i)
Definition midas.h:1646
#define PTYPE
Definition midas.h:170
#define TRUE
Definition midas.h:182
#define resume_run
#define name(x)
Definition midas_macro.h:24
#define pause_run
#define equipment(name, id, type, source, readon, period, readout, cd, driver)
Definition midas_macro.h:60
program test
Definition miniana.f:6
INT serial
Definition minife.c:20
int gettimeofday(struct timeval *tp, void *tzp)
#define PROCNETSTATFILE
Definition msysmon.cxx:27
BOOL frontend_call_loop
Definition msysmon.cxx:68
INT rpc_callback(INT index, void *prpc_param[])
Definition msysmon.cxx:536
const char * frontend_file_name
Definition msysmon.cxx:65
INT max_event_size
Definition msysmon.cxx:75
void ReadNetData()
Definition msysmon.cxx:1096
INT frontend_exit()
Frontend exit.
Definition msysmon.cxx:949
int test_rb_wait_sleep
Definition msysmon.cxx:532
timeval new_tv
Definition msysmon.cxx:1095
#define EVID_MONITOR
Definition msysmon.cxx:97
HNDLE hSet
Definition msysmon.cxx:531
INT frontend_init()
Frontend initialization.
Definition msysmon.cxx:868
std::vector< CPUData * > cpus
Definition msysmon.cxx:151
INT event_buffer_size
Definition msysmon.cxx:79
std::string colours[16]
Definition msysmon.cxx:311
int read_system_load(char *pevent, int off)
Definition msysmon.cxx:1275
INT max_event_size_frag
Definition msysmon.cxx:76
INT interrupt_configure(INT cmd, INT source, PTYPE adr)
Definition msysmon.cxx:1013
std::vector< NetStat * > NetReceive
Definition msysmon.cxx:178
unsigned long long int nicetime
Definition msysmon.cxx:153
BOOL equipment_common_overwrite
Definition msysmon.cxx:99
timeval old_tv
Definition msysmon.cxx:1095
INT poll_event(INT source, INT count, BOOL test)
Definition msysmon.cxx:1000
timeval tv
Definition msysmon.cxx:1095
EQUIPMENT equipment[]
Definition msysmon.cxx:101
unsigned long long int idletime
Definition msysmon.cxx:153
#define PROCMEMINFOFILE
Definition msysmon.cxx:23
#define String_startsWith(s, match)
Definition msysmon.cxx:30
void ReadCPUData()
Definition msysmon.cxx:1031
struct CPUData_ CPUData
const char * frontend_name
Definition msysmon.cxx:62
INT display_period
Definition msysmon.cxx:72
void InitGPU()
void BuildHostCPUPlot()
Definition msysmon.cxx:634
int networkInterfaceCount
Definition msysmon.cxx:177
void BuildHostHistoryPlot()
Definition msysmon.cxx:562
void BuildHostNetPlot()
Definition msysmon.cxx:719
INT begin_of_run(INT run_number, char *error)
Begin of Run.
Definition msysmon.cxx:956
INT frontend_loop()
Frontend loop.
Definition msysmon.cxx:984
int event_size
Definition msysmon.cxx:527
std::vector< NetStat * > NetTransmit
Definition msysmon.cxx:179
INT end_of_run(INT run_number, char *error)
End of Run.
Definition msysmon.cxx:963
#define PROCSTATFILE
Definition msysmon.cxx:19
int cpuCount
Definition msysmon.cxx:150
unsigned long long int systemtime
Definition msysmon.cxx:153
unsigned long long int usertime
Definition msysmon.cxx:153
#define ctime
Definition msystem.h:264
double total[100]
Definition odbhist.cxx:42
INT j
Definition odbhist.cxx:40
double value[100]
Definition odbhist.cxx:42
INT k
Definition odbhist.cxx:40
DWORD status
Definition odbhist.cxx:39
TH1X EXPRT * h1_book(const char *name, const char *title, int bins, double min, double max)
Definition rmidas.h:24
unsigned long long int ioWaitPeriod
Definition msysmon.cxx:144
unsigned long long int ioWaitTime
Definition msysmon.cxx:132
unsigned long long int softIrqTime
Definition msysmon.cxx:134
unsigned long long int userPeriod
Definition msysmon.cxx:138
unsigned long long int totalPeriod
Definition msysmon.cxx:137
unsigned long long int stealPeriod
Definition msysmon.cxx:147
unsigned long long int guestPeriod
Definition msysmon.cxx:148
unsigned long long int systemAllTime
Definition msysmon.cxx:128
unsigned long long int irqPeriod
Definition msysmon.cxx:145
unsigned long long int systemPeriod
Definition msysmon.cxx:139
unsigned long long int nicePeriod
Definition msysmon.cxx:143
unsigned long long int irqTime
Definition msysmon.cxx:133
unsigned long long int systemAllPeriod
Definition msysmon.cxx:140
unsigned long long int idleAllPeriod
Definition msysmon.cxx:141
unsigned long long int stealTime
Definition msysmon.cxx:135
unsigned long long int niceTime
Definition msysmon.cxx:131
unsigned long long int idleTime
Definition msysmon.cxx:130
unsigned long long int userTime
Definition msysmon.cxx:126
unsigned long long int guestTime
Definition msysmon.cxx:136
unsigned long long int systemTime
Definition msysmon.cxx:127
unsigned long long int totalTime
Definition msysmon.cxx:125
unsigned long long int idlePeriod
Definition msysmon.cxx:142
unsigned long long int softIrqPeriod
Definition msysmon.cxx:146
unsigned long long int idleAllTime
Definition msysmon.cxx:129
unsigned long int drop
Definition msysmon.cxx:162
unsigned long int multicast
Definition msysmon.cxx:166
timeval tv
Definition msysmon.cxx:175
unsigned long int frame
Definition msysmon.cxx:164
unsigned long int framePeriod
Definition msysmon.cxx:172
unsigned long int compressed
Definition msysmon.cxx:165
unsigned long int packetsPeriod
Definition msysmon.cxx:168
unsigned long int errs
Definition msysmon.cxx:161
unsigned long int dropPeriod
Definition msysmon.cxx:170
unsigned long int errsPeriod
Definition msysmon.cxx:169
unsigned long int fifo
Definition msysmon.cxx:163
unsigned long int multicastPeriod
Definition msysmon.cxx:174
unsigned long int packets
Definition msysmon.cxx:160
unsigned long int bytes
Definition msysmon.cxx:159
unsigned long int compressedPeriod
Definition msysmon.cxx:173
unsigned long int fifoPeriod
Definition msysmon.cxx:171
unsigned long int bytesPeriod
Definition msysmon.cxx:167
std::string face
Definition msysmon.cxx:158
char c
Definition system.cxx:1310
static double e(void)
Definition tinyexpr.c:136