XRootD
XrdCmsManager.cc
Go to the documentation of this file.
1 /******************************************************************************/
2 /* */
3 /* X r d C m s M a n a g e r . c c */
4 /* */
5 /* (c) 2007 by the Board of Trustees of the Leland Stanford, Jr., University */
6 /* All Rights Reserved */
7 /* Produced by Andrew Hanushevsky for Stanford University under contract */
8 /* DE-AC02-76-SFO0515 with the Department of Energy */
9 /* */
10 /* This file is part of the XRootD software suite. */
11 /* */
12 /* XRootD is free software: you can redistribute it and/or modify it under */
13 /* the terms of the GNU Lesser General Public License as published by the */
14 /* Free Software Foundation, either version 3 of the License, or (at your */
15 /* option) any later version. */
16 /* */
17 /* XRootD is distributed in the hope that it will be useful, but WITHOUT */
18 /* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
19 /* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
20 /* License for more details. */
21 /* */
22 /* You should have received a copy of the GNU Lesser General Public License */
23 /* along with XRootD in a file called COPYING.LESSER (LGPL license) and file */
24 /* COPYING (GPL license). If not, see <http://www.gnu.org/licenses/>. */
25 /* */
26 /* The copyright holder's institutional names and contributor's names may not */
27 /* be used to endorse or promote products derived from this software without */
28 /* specific prior written permission of the institution or contributor. */
29 /******************************************************************************/
30 
31 #include <cstdlib>
32 #include <cstdio>
33 #include <unistd.h>
34 #include <netinet/in.h>
35 #include <sys/types.h>
36 
37 #include "Xrd/XrdScheduler.hh"
38 
39 #include "XrdCms/XrdCmsConfig.hh"
40 #include "XrdCms/XrdCmsManager.hh"
41 #include "XrdCms/XrdCmsManTree.hh"
42 #include "XrdCms/XrdCmsNode.hh"
43 #include "XrdCms/XrdCmsProtocol.hh"
44 #include "XrdCms/XrdCmsRouting.hh"
45 #include "XrdCms/XrdCmsUtils.hh"
46 #include "XrdCms/XrdCmsTrace.hh"
47 
48 #include "XrdNet/XrdNetAddr.hh"
49 
50 #include "XrdOuc/XrdOucTList.hh"
52 
53 #include "XrdSys/XrdSysError.hh"
54 #include "XrdSys/XrdSysTimer.hh"
55 
56 /******************************************************************************/
57 /* G l o b a l O b j e c t s */
58 /******************************************************************************/
59 
60 namespace XrdCms
61 {
63 
65 }
66 
67 using namespace XrdCms;
68 
69 /******************************************************************************/
70 /* S t a t i c M e m b e r s */
71 /******************************************************************************/
72 
73 XrdSysMutex XrdCmsManager::MTMutex;
74 XrdCmsNode *XrdCmsManager::MastTab[MTMax] = {0};
75 char XrdCmsManager::MastSID[MTMax] = {0};
76 int XrdCmsManager::MTHi = -1;
77 
78 /******************************************************************************/
79 /* L o c a l C l a s s e s */
80 /******************************************************************************/
81 
83 {
84 public:
85 
86  void DoIt() {nodeP->Delete(XrdCmsManager::MTMutex);
87  delete this;
88  }
89 
90  XrdCmsDelNode(XrdCmsNode *nP) : XrdJob("delete node"), nodeP(nP)
91  {Sched->Schedule((XrdJob *)this);}
92 
94 
96 };
97 
98 /******************************************************************************/
99 /* C o n s t r u c t o r */
100 /******************************************************************************/
101 
103 {
104  myMans = 0;
105  ManTree = 0;
106  curManCnt = 0;
107  curManList= mlP;
108  newManList= 0;
109  theSite = 0;
110  theHost = 0;
111  theSID = 0;
112  siteID = snum;
113  wasRedir = false;
114 }
115 
116 /******************************************************************************/
117 /* A d d */
118 /******************************************************************************/
119 
120 XrdCmsNode *XrdCmsManager::Add(XrdLink *lp, int Lvl, bool &xit)
121 {
122  EPNAME("Add")
123  XrdCmsNode *nP;
124  int i;
125 
126 // Check if there is a pending reconfiguration. If so, return no node but
127 // tell the caller to finish so we can proceed with the reconfiguration
128 //
129  MTMutex.Lock();
130  lp->setID("manager",0);
131  if (newManList) {MTMutex.UnLock(); xit = true; return 0;}
132  xit = false;
133 
134 // Find available ID for this node
135 //
136  for (i = 0; i < MTMax; i++) if (!MastTab[i]) break;
137 
138 // Check if we have too many here
139 //
140  if (i >= MTMax)
141  {MTMutex.UnLock();
142  Say.Emsg("Manager", "Login to", lp->Name(), "failed; too many managers");
143  return 0;
144  }
145 
146 // Obtain a new a new node object
147 //
148  if (!(nP = new XrdCmsNode(lp, 0, 0, 0, Lvl, i)))
149  {Say.Emsg("Manager", "Unable to obtain node object."); return 0;}
150 
151 // Assign new manager
152 //
153  MastTab[i] = nP;
154  MastSID[i] = siteID;
155  if (i > MTHi) MTHi = i;
156  nP->isOffline = 0;
157  nP->isNoStage = 0;
158  nP->isBad = 0;
159  nP->isBound = 1;
160  nP->isConn = 1;
161  nP->isMan = (Config.asManager() ? 1 : 0);
162  nP->setManager(this);
163  MTMutex.UnLock();
164 
165 // Document login
166 //
167  DEBUG(nP->Name() <<" to manager config; id=" <<i);
168  return nP;
169 }
170 
171 /******************************************************************************/
172 /* D e l e t e */
173 /******************************************************************************/
174 
176 {
177  new XrdCmsDelNode(nodeP);
178 }
179 
180 /******************************************************************************/
181 /* F i n i s h e d */
182 /******************************************************************************/
183 
184 void XrdCmsManager::Finished(const char *manP, int mPort)
185 {
186  XrdOucTList *mP;
187  char mbuff[16];
188 
189 // Indicate what we are disbanding
190 //
191  sprintf(mbuff, ":%d", mPort);
192  Say.Say("Config ", "Manager ", manP, mbuff, " unconfigured.");
193 
194 // Serialize
195 //
196  MTMutex.Lock();
197 
198 // If this is this is the last manager connection and we have a pending new
199 // list of managers, run those now. We waited so as to not overwhelm the system.
200 //
201  curManCnt--;
202  if (curManCnt > 0 || !newManList) {MTMutex.UnLock(); return;}
203 
204 // Remove all vestigial information
205 //
206  for (int i = 0; i <= MTHi; i++)
207  {if (MastSID[i] == siteID) {MastTab[i] = 0; MastSID[i] = 0;}}
208 
209 // Readjust the high water mark
210 //
211  while(MTHi >= 0 && !MastTab[MTHi]) MTHi--;
212 
213 // Delete the current manager list, it is safe to do so
214 //
215  while((mP = curManList)) {curManList = curManList->next; delete mP;}
216  curManList = newManList;
217  newManList = 0;
218 
219 // Run the new manager setup
220 //
221  Say.Say("Config ","Manager subsystem reconfiguration completed; restarting.");
222  Run(curManList);
223 
224 // All done
225 //
226  MTMutex.UnLock();
227 }
228 
229 /******************************************************************************/
230 /* I n f o r m */
231 /******************************************************************************/
232 
233 void XrdCmsManager::Inform(const char *What, const char *Data, int Dlen)
234 {
235  EPNAME("Inform");
236  XrdCmsNode *nP;
237  int i;
238 
239 // Obtain a lock on the table
240 //
241  MTMutex.Lock();
242 
243 // Run through the table looking for managers to send messages to
244 //
245  for (i = 0; i <= MTHi; i++)
246  {if ((nP=MastTab[i]) && !nP->isOffline)
247  {nP->Lock();
248  MTMutex.UnLock();
249  DEBUG(nP->Name() <<" " <<What);
250  nP->Send(Data, Dlen);
251  nP->UnLock();
252  MTMutex.Lock();
253  }
254  }
255  MTMutex.UnLock();
256 }
257 
258 /******************************************************************************/
259 
260 void XrdCmsManager::Inform(const char *What, struct iovec *vP, int vN, int vT)
261 {
262  EPNAME("Inform");
263  int i;
264  XrdCmsNode *nP;
265 
266 // Obtain a lock on the table
267 //
268  MTMutex.Lock();
269 
270 // Run through the table looking for managers to send messages to
271 //
272  for (i = 0; i <= MTHi; i++)
273  {if ((nP=MastTab[i]) && !nP->isOffline)
274  {nP->Lock();
275  MTMutex.UnLock();
276  DEBUG(nP->Name() <<" " <<What);
277  nP->Send(vP, vN, vT);
278  nP->UnLock();
279  MTMutex.Lock();
280  }
281  }
282  MTMutex.UnLock();
283 }
284 
285 /******************************************************************************/
286 
288  const char *Arg, int Alen)
289 {
290  CmsRRHdr Hdr = {0, (kXR_char)rCode, (kXR_char)rMod,
291  htons(static_cast<unsigned short>(Alen))};
292  struct iovec ioV[2] = {{(char *)&Hdr, sizeof(Hdr)},
293  {(char *)Arg, (size_t)Alen}};
294 
295  Inform(Router.getName((int)rCode), ioV, (Arg ? 2 : 1), Alen+sizeof(Hdr));
296 }
297 
298 /******************************************************************************/
299 
300 void XrdCmsManager::Inform(CmsRRHdr &Hdr, const char *Arg, int Alen)
301 {
302  struct iovec ioV[2] = {{(char *)&Hdr, sizeof(Hdr)},
303  {(char *)Arg, (size_t)Alen}};
304 
305  Hdr.datalen = htons(static_cast<unsigned short>(Alen));
306 
307  Inform(Router.getName(Hdr.rrCode), ioV, (Arg ? 2 : 1), Alen+sizeof(Hdr));
308 }
309 
310 /******************************************************************************/
311 /* R e m o v e */
312 /******************************************************************************/
313 
314 void XrdCmsManager::Remove(XrdCmsNode *nP, const char *reason)
315 {
316  EPNAME("Remove")
317  int sinst, sent = nP->ID(sinst);
318 
319 // Obtain a lock on the servtab
320 //
321  MTMutex.Lock();
322 
323 // Make sure this node is the right one
324 //
325  if (!(nP == MastTab[sent]))
326  {MTMutex.UnLock();
327  DEBUG("manager " <<sent <<'.' <<sinst <<" failed.");
328  return;
329  }
330 
331 // Remove node from the manager table
332 //
333  MastTab[sent] = 0;
334  MastSID[sent] = 0;
335  nP->isOffline = 1;
336  DEBUG("completed " <<nP->Name() <<" manager " <<sent <<'.' <<sinst);
337 
338 // Readjust MTHi
339 //
340  if (sent == MTHi) while(MTHi >= 0 && !MastTab[MTHi]) MTHi--;
341  MTMutex.UnLock();
342 
343 // Document removal
344 // .
345  if (reason) Say.Emsg("Manager", nP->Ident, "removed;", reason);
346 }
347 
348 /******************************************************************************/
349 /* R e r u n */
350 /******************************************************************************/
351 
352 void XrdCmsManager::Rerun(char *newMans)
353 {
354  static CmsDiscRequest discRequest = {{0, kYR_disc, 0, 0}};
355  XrdOucTList *tP;
356  const char *eText;
357  char *hP;
358  int newManCnt = 0;
359 
360 // Lock ourselves
361 //
362  MTMutex.Lock();
363  wasRedir = true;
364 
365 // If we already have a pending new sequence, then just return
366 //
367  if (newManList) {MTMutex.UnLock(); return;}
368 
369 // Indicate that we will be re-initialzing
370 //
371  Say.Say("Config ", "Manager subsystem reconfiguring using ", newMans);
372 
373 // Process the new man list
374 //
375  XrdNetAddr manAddr;
376  XrdOucTokenizer mList((char *)newMans);
377  hP = mList.GetLine();
378 
379 // Add each manager in the list. These have already been expanded and are
380 // gaurenteed to contain a port number as the list is provided by the cmsd.
381 // However, we will check for duplicates and ignore any overage.
382 //
383  while((hP = mList.GetToken()))
384  {if ((eText = manAddr.Set(hP)))
385  {Say.Emsg("Config","Ignoring manager", hP, eText); continue;}
386  tP = newManList;
387  while(tP && strcmp(hP, tP->text)) tP = tP->next;
388  if (tP) {Say.Emsg("Config","Ignoring duplicate manager", hP);
389  continue;
390  }
391  if (newManCnt >=MTMax)
392  {Say.Emsg("Config","Ignoring manager", hP,
393  "and remaining entries; limit exceeded!");
394  break;
395  }
396  newManList = new XrdOucTList(manAddr.Name(),manAddr.Port(),newManList);
397  newManCnt++;
398  }
399 
400 // If we have managers then tell the cluster builder to abort as we will
401 // be restarting this whole process (we don't want any hung nodes here).
402 //
403  if (newManCnt) ManTree->Abort();
404 
405 // Now run through the node table and doom all current site connections as we
406 // need to reinitialize the whole manager subsystem. Note that none of these
407 // objects can escape without us removing them from the table.
408 //
409  if (newManCnt)
410  {for (int i = 0; i <= MTHi; i++)
411  if (MastTab[i] && (MastSID[i] == siteID))
412  {MastTab[i]->isBad |= XrdCmsNode::isBlisted|XrdCmsNode::isDoomed;
413  MastTab[i]->Send((char *)&discRequest, sizeof(discRequest));
414  }
415  }
416 
417 // We are done
418 //
419  MTMutex.UnLock();
420 }
421 
422 /******************************************************************************/
423 /* R e s e t */
424 /******************************************************************************/
425 
427 {
428  EPNAME("Reset");
429  static CmsStatusRequest myState = {{0, kYR_status,
431  static const int szReqst = sizeof(CmsStatusRequest);
432  XrdCmsNode *nP;
433  int i;
434 
435 // Obtain a lock on the table
436 //
437  MTMutex.Lock();
438 
439 // Run through the table looking for managers to send a reset request
440 //
441  for (i = 0; i <= MTHi; i++)
442  {if ((nP=MastTab[i]) && !nP->isOffline && nP->isKnown)
443  {nP->Lock();
444  nP->isKnown = 0;
445  MTMutex.UnLock();
446  DEBUG("sent to " <<nP->Name());
447  nP->Send((char *)&myState, szReqst);
448  nP->UnLock();
449  MTMutex.Lock();
450  }
451  }
452  MTMutex.UnLock();
453 }
454 
455 /******************************************************************************/
456 /* Private: R u n */
457 /******************************************************************************/
458 
459 int XrdCmsManager::Run(XrdOucTList *manL)
460 {
461  XrdOucTList *tP = manL;
462  XrdJob *jP, *jFirst = 0, *jLast = 0;
463 
464 // This method is either called during initial start-up or if we were wholly
465 // redirected elsewhere due to a blacklist. In the latter case, the caller
466 // must have obtained all the required locks
467 //
468  curManCnt = 0;
469  if (!manL) return 0;
470 
471 // Prime the manager subsystem. We check here to make sure we will not be
472 // tying to connect to ourselves. This is possible if the manager and meta-
473 // manager were defined to be the same and we are a manager. We would have
474 // liked to screen this out earlier but port discovery prevents it.
475 //
476  while(tP)
477  {if (strcmp(tP->text, Config.myName) || tP->val != Config.PortTCP)
478  {jP = (XrdJob *)XrdCmsProtocol::Alloc(Config.myRole, this,
479  tP->text, tP->val);
480  if (!jFirst) jFirst = jLast = jP;
481  else {jLast->NextJob = jP; jLast = jP;}
482  curManCnt++;
483  } else {
484  char buff[512];
485  sprintf(buff, "%s:%d", tP->text, tP->val);
486  Say.Emsg("Config", "Circular connection to", buff, "ignored.");
487  }
488  tP = tP->next;
489  }
490 
491 // Make sure we have something to start up
492 //
493  if (!curManCnt)
494  {Say.Emsg("Config","No managers can be started; we are now unreachable!");
495  return 0;
496  }
497 
498 // We now know there is no pandering going on, so we need to initialize the
499 // the tree management subsystem to get it into a fresh state.
500 //
501  if (myMans) delete myMans;
502  myMans = new XrdCmsManList;
503  if (ManTree) delete ManTree;
504  ManTree = new XrdCmsManTree(curManCnt);
505  if (theSID) {free(theSID); theSID = 0;}
506  if (theSite) {free(theSite); theSite = 0;}
507 
508 // Now start up all of the threads
509 //
510  if (jFirst == jLast) Sched->Schedule(jFirst);
511  else Sched->Schedule(curManCnt, jFirst, jLast);
512 
513 // All done
514 //
515  return curManCnt;
516 }
517 
518 /******************************************************************************/
519 /* S t a r t */
520 /******************************************************************************/
521 
523 {
524  XrdOucTList *manVec[MTMax] = {0};
525  XrdCmsManager *manP;
526  char buff[1024];
527  int n, sid, snum = 0, mtot = 0, mnum = 0, xnum = 0;
528 
529 // If there is no manager list then we must not be connecting to anyone
530 //
531  if (!mL) return true;
532 
533 // Segregate the manager list by site and run them that way. Unfortunately,
534 // that means we have to copy the TList. This ok as this happens once.
535 //
536  while(mL)
537  {sid = mL->ival[1]; mtot++;
538  if (sid >= MTMax)
539  {sprintf(buff, "%d", sid);
540  Say.Say("Config ", "Invalid site ID ", buff, " for ", mL->text);
541  } else {
542  manVec[sid] = new XrdOucTList(mL->text, mL->val, manVec[sid]);
543  mnum++;
544  }
545  mL = mL->next;
546  }
547 
548 // Count how many sites we have
549 //
550  for (n = 0; n < MTMax; n++) if (manVec[n]) snum++;
551 
552 // Indicate what we are about to do
553 //
554  snprintf(buff, sizeof(buff),"%d manager%s and %d site%s.", mnum,
555  (mnum != 1 ? "s":""), snum, (snum != 1 ? "s":""));
556  Say.Say("Config Connecting to ", buff);
557 
558 // Now run each one
559 //
560  for (n = 0; n < MTMax; n++)
561  {if (manVec[n])
562  {manP = new XrdCmsManager(manVec[n], n);
563  xnum += manP->Run(manVec[n]);
564  }
565  }
566 
567 // Check if we should issue a warning
568 //
569  if (xnum < mtot)
570  {snprintf(buff, sizeof(buff), "%d of %d", xnum, mtot);
571  Say.Say("Config Warning! Only ", buff, " manager(s) will be contacted!");
572  }
573 
574 // All done
575 //
576  return xnum == mtot;
577 }
578 
579 /******************************************************************************/
580 /* V e r i f y */
581 /******************************************************************************/
582 
583 bool XrdCmsManager::Verify(XrdLink *lP, const char *sid, const char *sname)
584 {
585  XrdSysMutexHelper hMutex(MTMutex);
586  const char *sidP;
587 
588 // Trim off the type of service in the sid
589 //
590  if ((sidP = index(sid, ' '))) sidP++;
591  else sidP = sid;
592 
593 // If we have no sid, just record it
594 //
595  if (!theSID)
596  {theSID = strdup(sidP);
597  if (theSite) free(theSite);
598  theHost = strdup(lP->Host());
599  theSite = (sname ? strdup(sname) : strdup("anonymous"));
600  return true;
601  }
602 
603 // Make sure we are connecting to the same cluster as before
604 //
605  if (!strcmp(theSID, sidP)) return true;
606 
607 // Compute the offending site configuration
608 //
609  char mBuff[1024];
610  snprintf(mBuff,sizeof(mBuff),"%s for site %s; "
611  "making file location unpredictable!", theHost,
612  (wasRedir ? theSite : XrdCmsUtils::SiteName(siteID)));
613 
614 // There seems to be a configuration error here
615 //
616  Say.Emsg("Manager", lP->Host(), "manager configuration differs from", mBuff);
617  return false;
618 }
unsigned char kXR_char
Definition: XPtypes.hh:65
#define DEBUG(x)
Definition: XrdBwmTrace.hh:54
#define EPNAME(x)
Definition: XrdBwmTrace.hh:56
const char * myName
XrdCmsDelNode(XrdCmsNode *nP)
XrdCmsNode * nodeP
static void Inform(const char *What, const char *Data, int Dlen)
XrdCmsNode * Add(XrdLink *lp, int Lvl, bool &xit)
void Rerun(char *newMans)
void Finished(const char *manP, int mPort)
static void Reset()
static bool Start(const XrdOucTList *mL)
void Delete(XrdCmsNode *nodeP)
XrdCmsManager(XrdOucTList *mlP, int snum)
void Remove(XrdCmsNode *nP, const char *reason=0)
bool Verify(XrdLink *lP, const char *sid, const char *sname)
void setManager(XrdCmsManager *mP)
Definition: XrdCmsNode.hh:189
char * Ident
Definition: XrdCmsNode.hh:61
char isKnown
Definition: XrdCmsNode.hh:70
char isConn
Definition: XrdCmsNode.hh:71
int Send(const char *buff, int blen=0)
Definition: XrdCmsNode.hh:184
void Lock()
Definition: XrdCmsNode.hh:175
char isBad
Definition: XrdCmsNode.hh:63
char isOffline
Definition: XrdCmsNode.hh:64
int ID(int &INum)
Definition: XrdCmsNode.hh:139
char isNoStage
Definition: XrdCmsNode.hh:66
char isMan
Definition: XrdCmsNode.hh:67
void UnLock()
Definition: XrdCmsNode.hh:177
char isBound
Definition: XrdCmsNode.hh:69
static const char isDoomed
Definition: XrdCmsNode.hh:82
static const char isBlisted
Definition: XrdCmsNode.hh:79
char * Name()
Definition: XrdCmsNode.hh:158
static XrdCmsProtocol * Alloc(const char *theRole="", XrdCmsManager *mP=0, const char *theMan=0, int thePort=0)
const char * getName(int Code)
static const char * SiteName(int snum)
Definition: XrdCmsUtils.cc:310
Definition: XrdJob.hh:43
XrdJob * NextJob
Definition: XrdJob.hh:46
const char * Name(const char *eName=0, const char **eText=0)
int Port(int pNum=-1)
Definition: XrdNetAddr.cc:156
const char * Set(const char *hSpec, int pNum=PortInSpec)
Definition: XrdNetAddr.cc:216
XrdOucTList * next
Definition: XrdOucTList.hh:45
char * text
Definition: XrdOucTList.hh:46
char * GetToken(char **rest=0, int lowcase=0)
void Schedule(XrdJob *jp)
int Emsg(const char *esfx, int ecode, const char *text1, const char *text2=0)
Definition: XrdSysError.cc:95
void Say(const char *text1, const char *text2=0, const char *txt3=0, const char *text4=0, const char *text5=0, const char *txt6=0)
Definition: XrdSysError.cc:141
XrdCmsRouter Router
kXR_unt16 datalen
Definition: YProtocol.hh:86
XrdScheduler * Sched
XrdSysError Say
kXR_char rrCode
Definition: YProtocol.hh:84
XrdSysTrace Trace("cms")
XrdCmsConfig Config
CmsReqCode
Definition: YProtocol.hh:90
@ kYR_disc
Definition: YProtocol.hh:103
@ kYR_status
Definition: YProtocol.hh:112
XrdOucSid * sidP
Definition: XrdPss.cc:107