XRootD
Loading...
Searching...
No Matches
XrdCmsCluster.cc
Go to the documentation of this file.
1/******************************************************************************/
2/* */
3/* X r d C m s C l u s t e r . c c */
4/* */
5/* (c) 2007 by the Board of Trustees of the Leland Stanford, Jr., University */
6/* All Rights Reserved */
7/* Produced by Andrew Hanushevsky for Stanford University under contract */
8/* DE-AC02-76-SFO0515 with the Department of Energy */
9/* */
10/* This file is part of the XRootD software suite. */
11/* */
12/* XRootD is free software: you can redistribute it and/or modify it under */
13/* the terms of the GNU Lesser General Public License as published by the */
14/* Free Software Foundation, either version 3 of the License, or (at your */
15/* option) any later version. */
16/* */
17/* XRootD is distributed in the hope that it will be useful, but WITHOUT */
18/* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
19/* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
20/* License for more details. */
21/* */
22/* You should have received a copy of the GNU Lesser General Public License */
23/* along with XRootD in a file called COPYING.LESSER (LGPL license) and file */
24/* COPYING (GPL license). If not, see <http://www.gnu.org/licenses/>. */
25/* */
26/* The copyright holder's institutional names and contributor's names may not */
27/* be used to endorse or promote products derived from this software without */
28/* specific prior written permission of the institution or contributor. */
29/******************************************************************************/
30
31#include <cerrno>
32#include <fcntl.h>
33#include <cstdio>
34#include <cstdlib>
35#include <random>
36#include <unistd.h>
37#include <netinet/in.h>
38#include <sys/types.h>
39
41
42#include "Xrd/XrdJob.hh"
43#include "Xrd/XrdLink.hh"
44#include "Xrd/XrdScheduler.hh"
45
48#include "XrdCms/XrdCmsCache.hh"
52#include "XrdCms/XrdCmsNode.hh"
53#include "XrdCms/XrdCmsRole.hh"
54#include "XrdCms/XrdCmsRRQ.hh"
55#include "XrdCms/XrdCmsState.hh"
57#include "XrdCms/XrdCmsTrace.hh"
58#include "XrdCms/XrdCmsTypes.hh"
59
60#include "XrdOuc/XrdOucPup.hh"
61
64#include "XrdSys/XrdSysTimer.hh"
65
66using namespace XrdCms;
67
68/******************************************************************************/
69/* G l o b a l O b j e c t s */
70/******************************************************************************/
71
72 XrdCmsCluster XrdCms::Cluster;
73
74/******************************************************************************/
75/* L o c a l S t r u c t u r e s */
76/******************************************************************************/
77
79{
80public:
81
82 void DoIt() {if (nodeP)
83 {nodeP->Delete(Cluster.STMutex);
84 delete this;
85 } else {
86 if (!Cluster.Drop(nodeEnt, nodeInst, this)) delete this;
87 }
88 }
89
90 XrdCmsDrop(XrdCmsNode *nP) : XrdJob("delete node"), nodeP(nP),
91 nodeEnt(0), nodeInst(0)
92 {Sched->Schedule((XrdJob *)this);}
93
94 XrdCmsDrop(int nid, int inst) : XrdJob("drop node"), nodeP(0),
95 nodeEnt(nid), nodeInst(inst)
96 {Sched->Schedule((XrdJob *)this, time(0)+Config.DRPDelay);}
97
99
103};
104
105/******************************************************************************/
106/* C o n s t r u c t o r */
107/******************************************************************************/
108
110{
111 memset((void *)NodeTab, 0, sizeof(NodeTab));
112 memset((void *)AltMans, (int)' ', sizeof(AltMans));
113 AltMend = AltMans;
114 AltMent = -1;
115 NodeCnt = 0;
116 STHi = -1;
117 SelWtot = 0;
118 SelRtot = 0;
119 SelTcnt = 0;
120 peerHost = 0;
121 peerMask = ~peerHost;
122}
123
124/******************************************************************************/
125/* A d d */
126/******************************************************************************/
127
128XrdCmsNode *XrdCmsCluster::Add(XrdLink *lp, int port, int Status, int sport,
129 const char *theNID, const char *theIF)
130
131{
132 EPNAME("Add")
133 const char *act = "";
134 XrdCmsNode *nP = 0;
135 XrdCmsClustID *cidP = 0;
136 XrdSysRWLockHelper STMHelper(STMutex, false); // Need write lock!
137 int tmp, Slot, Free = -1, Bump1 = -1, Bump2 = -1, Bump3 = -1, aSet = 0;
138 bool Special = (Status & (CMS_isMan|CMS_isPeer));
139 bool SpecAlt = (Special && !(Status & CMS_isSuper));
140 bool Hidden = false;
141
142// Find available slot for this node. Here are the priorities:
143// Slot = Reconnecting node
144// Free = Available slot ( 1st in table)
145// Bump1 = Disconnected server (last in table)
146// Bump2 = Connected server (last in table) if new one is managr/peer
147// Bump3 = Disconnected managr/peer ( 1st in table) if new one is managr/peer
148//
149 for (Slot = 0; Slot < STMax; Slot++)
150 if (NodeTab[Slot])
151 {if (NodeTab[Slot]->isNode(lp, theNID, port)) break;
152/*Conn*/ if (NodeTab[Slot]->isConn)
153 {if (!NodeTab[Slot]->isPerm && Special)
154 Bump2 = Slot; // Last conn Server
155/*Disc*/ } else {
156 if ( NodeTab[Slot]->isPerm)
157 {if (Bump3 < 0 && Special) Bump3 = Slot;}// 1st disc Man/Pr
158 else Bump1 = Slot; // Last disc Server
159 }
160 } else if (Free < 0) Free = Slot; // 1st free slot
161
162// Check if node is already logged in or is a relogin
163//
164 if (Slot < STMax)
165 {if (NodeTab[Slot] && NodeTab[Slot]->isBound)
166 {Say.Emsg("Cluster", lp->ID, "already logged in.");
167 return 0;
168 } else { // Rehook node to previous unconnected entry
169 nP = NodeTab[Slot];
170 nP->Link = lp;
171 nP->isOffline = 0;
172 nP->isBad &= ~XrdCmsNode::isSuspend;
173 nP->isConn = 1;
174 nP->Instance++;
175 nP->setName(lp, theIF, port); // Just in case it changed
176 act = "Reconnect ";
177 }
178 }
179
180// First see if this node may be an alternate
181//
182 if (!nP && SpecAlt)
183 {if ((cidP = XrdCmsClustID::Find(theNID)) && !(cidP->IsEmpty()))
184 {if (!(nP = AddAlt(cidP, lp, port, Status, sport, theNID, theIF)))
185 return 0;
186 aSet = 1; Slot = nP->NodeID;
187 if (nP != NodeTab[Slot]) {Hidden = true; act = "Alternate ";}
188 }
189 }
190
191// Reuse an old ID if we must or redirect the incoming node
192//
193 if (!nP)
194 {if (Free >= 0) Slot = Free;
195 else {if (Bump1 >= 0) Slot = Bump1;
196 else Slot = (Bump2 >= 0 ? Bump2 : Bump3);
197 if (Slot < 0)
198 {if (Status & CMS_isPeer) Say.Emsg("Cluster", "Add peer", lp->ID,
199 "failed; too many subscribers.");
200 else {sendAList(lp);
201 DEBUG(lp->ID <<" redirected; too many subscribers.");
202 }
203 return 0;
204 }
205
206 if (Status & CMS_isMan) {setAltMan(Slot, lp, sport); aSet=1;}
207 if (NodeTab[Slot] && !(Status & CMS_isPeer))
208 sendAList(NodeTab[Slot]->Link);
209
210 DEBUG(lp->ID << " bumps " << NodeTab[Slot]->Ident <<" #" <<Slot);
211 NodeTab[Slot]->Lock();
212 Remove("redirected", NodeTab[Slot], -1);
213 act = "Shoved ";
214 }
215 NodeTab[Slot] = nP = new XrdCmsNode(lp, theIF, theNID, port, 0, Slot);
216 if (!cidP) cidP = XrdCmsClustID::AddID(theNID);
217 if ((cidP->AddNode(nP, SpecAlt))) nP->cidP = cidP;
218 else {delete nP; NodeTab[Slot] = 0; return 0;} // OK to do delete!
219 }
220
221// Indicate whether this snode can be redirected
222//
223 nP->isPerm = (Status & (CMS_isMan | CMS_isPeer)) ? 1 : 0;
224
225// Assign new server
226//
227 if (!aSet && (Status & CMS_isSuper)) setAltMan(Slot, lp, sport);
228 if (Slot > STHi) STHi = Slot;
229 nP->isBound = 1;
230 nP->isConn = 1;
231 nP->isNoStage = 0 != (Status & CMS_noStage);
232 nP->isBad |= (Status & CMS_Suspend ? XrdCmsNode::isSuspend : 0);
233 nP->isMan = 0 != (Status & CMS_isMan);
234 nP->isPeer = 0 != (Status & CMS_isPeer);
236 nP->subsPort = sport;
237
238// If this is an actual non-hidden node, count it
239//
240 if (!Hidden)
241 {NodeCnt++;
242 if (Config.SUPLevel
243 && (tmp = NodeCnt*Config.SUPLevel/100) > Config.SUPCount)
244 {Config.SUPCount=tmp; CmsState.Set(tmp);}
245 } else nP->isMan |= 0x02;
246
247// Compute new peer mask, as needed
248//
249 if (nP->isPeer) peerHost |= nP->NodeMask;
250 else peerHost &= ~nP->NodeMask;
251 peerMask = ~peerHost;
252
253// Document login
254//
255 if (QTRACE(Debug))
256 {DEBUG(act <<nP->Ident <<" to cluster " <<nP->myNID <<" slot "
257 <<Slot <<'.' <<nP->Instance <<" (nodecnt=" <<NodeCnt
258 <<" supn=" <<Config.SUPCount <<")");
259 }
260
261// Compute new state of all nodes if we are a reporting manager.
262//
263 if (Config.asManager() && !Hidden)
265 nP->isBad & XrdCmsNode::isSuspend ? 0 : 1,
266 nP->isNoStage ? 0 : 1);
267
268// All done. Return the node locked.
269//
270 nP->Lock();
271 return nP;
272}
273
274/******************************************************************************/
275/* Private: A d d A l t */
276/******************************************************************************/
277
278// Warning STMutex must be held in write mode by the caller!
279
280XrdCmsNode *XrdCmsCluster::AddAlt(XrdCmsClustID *cidP, XrdLink *lp,
281 int port, int Status, int sport,
282 const char *theNID, const char *theIF)
283
284{
285 EPNAME("AddAlt")
286 XrdCmsNode *pP, *nP = 0;
287 int slot = cidP->Slot();
288
289// Check if this node is already in the alternate table
290//
291 if (cidP->Exists(lp, theNID, port))
292 {Say.Emsg(epname, lp->ID, "already logged in.");
293 return 0;
294 }
295
296// Add this node if there is room
297//
298 if (cidP->Avail())
299 {nP = new XrdCmsNode(lp, theIF, theNID, port, 0, slot);
300 if (!(cidP->AddNode(nP, true))) {delete nP; nP = 0;} // OK to do delete!
301 }
302
303// Check if we were actually able to add this node
304//
305 if (!nP)
306 {Say.Emsg(epname, "Add alternate manager", lp->ID,
307 "failed; too many subscribers.");
308 return 0;
309 }
310
311// Check if the existing lead dead and we can substiture this one
312//
313 if ((pP = NodeTab[slot]) && !(pP->isBound))
314 {setAltMan(nP->NodeID, nP->Link, sport);
315 Say.Emsg("AddAlt", nP->Ident, "replacing dropped", pP->Ident);
316 NodeTab[slot] = nP;
317 pP->DropJob = new XrdCmsDrop(pP); // Schedule deletion
318 }
319
320// Hook the node to the cluster table and return
321//
322 nP->cidP = cidP;
323 return nP;
324}
325
326/******************************************************************************/
327/* B l a c k L i s t */
328/******************************************************************************/
329
331{
332 static CmsDiscRequest discRequest = {{0, kYR_disc, 0, 0}};
333 XrdCmsNode *nP;
334 const char *etxt = "blacklisted.";
335 int i, blRD = 0;
336 bool inBL;
337
338// Obtain a lock on the table. We need this in write mode!
339//
340 STMutex.WriteLock();
341
342// Run through the table looking to put or out of the blacklist
343//
344 for (i = 0; i <= STHi; i++)
345 {if ((nP = NodeTab[i]))
346 {inBL = (blP && (blRD = XrdCmsBlackList::Present(nP->Name(), blP)));
347 if ((!inBL && !(nP->isBad & XrdCmsNode::isBlisted))
348 || ( inBL && (nP->isBad & XrdCmsNode::isBlisted))) continue;
349 nP->g2nLock(STMutex); // Downgrade to only node lock
350 if (inBL)
352 if (blRD < -1)
353 {if (kYR_Version > nP->myVersion)
354 etxt = "blacklisted; redirect unsupported.";
355 else etxt = "blacklisted with redirect.";
357 nP->Send((char *)&discRequest, sizeof(discRequest));
358 }
359 Say.Emsg("Manager", nP->Name(), etxt);
360 } else {
362 Say.Emsg("Manager", nP->Name(), "removed from blacklist.");
363 }
364 nP->n2gLock(STMutex);
365 }
366 }
367 STMutex.UnLock();
368}
369
370/******************************************************************************/
371/* B r o a d c a s t */
372/******************************************************************************/
373
374SMask_t XrdCmsCluster::Broadcast(SMask_t smask, const struct iovec *iod,
375 int iovcnt, int iotot)
376{
377 EPNAME("Broadcast")
378 int i;
379 XrdCmsNode *nP;
380 SMask_t bmask, unQueried(0);
381
382// Obtain a lock on the table and screen out peer nodes
383//
384 STMutex.ReadLock(); // Sufficient to prevent modifications
385 bmask = smask & peerMask;
386
387// Run through the table looking for nodes to send messages to. We don't need
388// the node lock for this but we do need to up the reference count to keep the
389// node pointer valid for the duration of the send() (may or may not block).
390//
391 for (i = 0; i <= STHi; i++)
392 {if ((nP = NodeTab[i]) && nP->isNode(bmask))
393 {if (nP->isOffline) unQueried |= nP->Mask();
394 else {nP->Ref();
395 STMutex.UnLock();
396 if (nP->Send(iod, iovcnt, iotot) < 0)
397 {unQueried |= nP->Mask();
398 DEBUG(nP->Ident <<" is unreachable");
399 }
400 nP->unRef();
401 STMutex.ReadLock();
402 }
403 }
404 }
405 STMutex.UnLock();
406 return unQueried;
407}
408
409/******************************************************************************/
410
412 char *Data, int Dlen)
413{
414 struct iovec ioV[3], *iovP = &ioV[1];
415 unsigned short Temp;
416 int Blen;
417
418// Construct packed data for the character argument. If data is a string then
419// Dlen must include the null byte if it is specified at all.
420//
421 Blen = XrdOucPup::Pack(&iovP, Data, Temp, (Dlen ? strlen(Data)+1 : Dlen));
422 Hdr.datalen = htons(static_cast<unsigned short>(Blen));
423
424// Complete the iovec and send off the data
425//
426 ioV[0].iov_base = (char *)&Hdr; ioV[0].iov_len = sizeof(Hdr);
427 return Broadcast(smask, ioV, 3, Blen+sizeof(Hdr));
428}
429
430/******************************************************************************/
431
433 void *Data, int Dlen)
434{
435 struct iovec ioV[2] = {{(char *)&Hdr, sizeof(Hdr)},
436 {(char *)Data, (size_t)Dlen}};
437
438// Send of the data as eveything was constructed properly
439//
440 Hdr.datalen = htons(static_cast<unsigned short>(Dlen));
441 return Broadcast(smask, ioV, 2, Dlen+sizeof(Hdr));
442}
443
444/******************************************************************************/
445/* B r o a d s e n d */
446/******************************************************************************/
447
448// Send message to first eligible node!
449
451 void *Data, int Dlen)
452{
453 EPNAME("Broadsend");
454 static int Start = 0;
455 XrdCmsNode *nP;
456 struct iovec ioV[2] = {{(char *)&Hdr, sizeof(Hdr)},
457 {(char *)Data, (size_t)Dlen}};
458 int i, Beg, Fin, ioTot = Dlen+sizeof(Hdr);
459
460// Send of the data as eveything was constructed properly
461//
462 Hdr.datalen = htons(static_cast<unsigned short>(Dlen));
463
464// Obtain a lock on the table and get the starting and ending position. Note
465// that the mechnism we use will necessarily skip newly added nodes.
466//
467 STMutex.ReadLock(); // Sufficient to prevent modifications
468 Beg = Start = (Start <= STHi ? Start+1 : 0);
469 Fin = STHi;
470
471// Run through the table looking for a node to send a message to. We don't need
472// the node lock for this but we do need to up the reference count to keep the
473// node pointer valid for the duration of the send() (may or may not block).
474//
475do{for (i = Beg; i <= Fin; i++)
476 {if ((nP = NodeTab[i]) && nP->isNode(Who))
477 {if (nP->isOffline) continue;
478 nP->Ref();
479 STMutex.UnLock();
480 if (nP->Send(ioV, 2, ioTot) >= 0) {nP->unRef(); return 1;}
481 DEBUG(nP->Ident <<" is unreachable");
482 nP->unRef();
483 STMutex.ReadLock();
484 }
485 }
486 if (!Beg) break;
487 Fin = Beg-1; Beg = 0;
488 } while(1);
489
490// Did not send to anyone
491//
492 STMutex.UnLock();
493 return 0;
494}
495
496/******************************************************************************/
497/* g e t M a s k */
498/******************************************************************************/
499
501{
502 int i;
503 XrdCmsNode *nP;
504 SMask_t smask(0);
505
506// Obtain a lock on the table
507//
508 STMutex.ReadLock();
509
510// Run through the table looking for a node with matching IP address
511//
512 for (i = 0; i <= STHi; i++)
513 if ((nP = NodeTab[i]) && nP->isNode(addr))
514 {smask = nP->NodeMask; break;}
515
516// All done
517//
518 STMutex.UnLock();
519 return smask;
520}
521
522/******************************************************************************/
523
525{
526 return XrdCmsClustID::Mask(Cid);
527}
528
529/******************************************************************************/
530/* L i s t */
531/******************************************************************************/
532
534{
535 static const int iSize = XrdCmsSelected::IdentSize;
536 XrdCmsNode *nP;
537 XrdCmsSelected *sipp = 0, *sip;
539 XrdNetIF::ifType ifGet = ifType;
540 int i, destLen;
541 bool retName = (opts & LS_IDNT) != 0;
542 bool retAny = (opts & LS_ANY ) != 0;
543 bool retDest = retName || (opts & LS_IPO);
544
545// If only one wanted, the select appropriately
546//
547 oksel = false;
548 STMutex.ReadLock();
549 for (i = 0; i <= STHi; i++)
550 if ((nP=NodeTab[i]) && (nP->NodeMask & mask))
551 {oksel = true;
552 if (retDest)
553 { if (nP->netIF.HasDest(ifType)) ifGet = ifType;
554 else if (!retAny) continue;
555 else {ifGet = (XrdNetIF::ifType)(ifType ^ XrdNetIF::PrivateIF);
556 if (!nP->netIF.HasDest(ifGet)) continue;
557 }
558 }
559 sip = new XrdCmsSelected(sipp);
560 if (retDest) destLen = nP->netIF.GetDest(sip->Ident, iSize,
561 ifGet, retName);
562 else if (nP->myNlen >= XrdCmsSelected::IdentSize) destLen = 0;
563 else {strcpy(sip->Ident, nP->myName); destLen = nP->myNlen;}
564 if (!destLen) {delete sip; continue;}
565
566 sip->IdentLen = destLen;
567 sip->Mask = nP->NodeMask;
568 sip->Id = nP->NodeID;
569 sip->Port = nP->netIF.Port();
570 sip->RefTotW = nP->RefTotW;
571 sip->RefTotR = nP->RefTotR;
572 sip->Shrin = nP->Shrin;
573 sip->Share = nP->Share;
574 sip->RoleID = nP->RoleID;
575 sip->Status = (nP->isOffline ? XrdCmsSelected::Offline : 0);
577 sip->Status |= XrdCmsSelected::Disable;
578 if (nP->isNoStage) sip->Status |= XrdCmsSelected::NoStage;
580 sip->Status |= XrdCmsSelected::Suspend;
581 if (nP->isRW ) sip->Status |= XrdCmsSelected::isRW;
582 if (nP->isMan ) sip->Status |= XrdCmsSelected::isMangr;
583 sipp = sip;
584 }
585 STMutex.UnLock();
586
587// Return result
588//
589 return sipp;
590}
591
592/******************************************************************************/
593/* L o c a t e */
594/******************************************************************************/
595
597{
598 EPNAME("Locate");
599 XrdCmsPInfo pinfo;
600 SMask_t qfVec(0);
601 char *Path;
602 int retc = 0;
603
604// Check if this is a locate for all current servers
605//
606 if (*Sel.Path.Val != '*') Path = Sel.Path.Val;
607 else {if (*(Sel.Path.Val+1) == '\0')
608 {Sel.Vec.hf = ~0LL; Sel.Vec.pf = Sel.Vec.wf = 0;
609 return 0;
610 }
611 Path = Sel.Path.Val+1;
612 }
613
614// Find out who serves this path
615//
616 if (!Cache.Paths.Find(Path, pinfo) || !pinfo.rovec)
617 {Sel.Vec.hf = Sel.Vec.pf = Sel.Vec.wf = 0;
618 return NotFound;
619 } else Sel.Vec.wf = pinfo.rwvec;
620
621// Check if this was a non-lookup request
622//
623 if (*Sel.Path.Val == '*')
624 {Sel.Vec.hf = pinfo.rovec; Sel.Vec.pf = 0;
625 Sel.Vec.wf = pinfo.rwvec;
626 return 0;
627 }
628
629// Complete the request info object if we have one
630//
631 if (Sel.InfoP)
632 {Sel.InfoP->rwVec = pinfo.rwvec;
633 Sel.InfoP->isLU = 1;
634 }
635
636// If we are running a shared file system preform an optional restricted
637// pre-selection and then do a standard selection.
638//
639 if (baseFS.isDFS())
640 {SMask_t amask, smask, pmask;
641 amask = pmask = pinfo.rovec;
642 smask = (Sel.Opts & XrdCmsSelect::Online ? 0 : pinfo.ssvec & amask);
643 Sel.Resp.DLen = 0;
644 if (!(retc = SelDFS(Sel, amask, pmask, smask, 1)))
645 return (Sel.Opts & XrdCmsSelect::Asap && Sel.InfoP
646 ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
647 if (retc < 0) return NotFound;
648 return 0;
649 }
650
651// First check if we have seen this file before. If so, get nodes that have it.
652// A Refresh request kills this because it's as if we hadn't seen it before.
653// If the file was found but either a query is in progress or we have a server
654// bounce; the client must wait.
655//
656 if (Sel.Opts & XrdCmsSelect::Refresh
657 || !(retc = Cache.GetFile(Sel, pinfo.rovec)))
658 {Cache.AddFile(Sel, 0);
659 qfVec = pinfo.rovec; Sel.Vec.hf = 0;
660 } else qfVec = Sel.Vec.bf;
661
662// Compute the delay, if any
663//
664 if ((!qfVec && retc >= 0) || (Sel.Vec.hf && Sel.InfoP)) retc = 0;
665 else if (!(retc = Cache.WT4File(Sel, Sel.Vec.hf))) retc = Wait4CBk;
666
667// Check if we have to ask any nodes if they have the file
668//
669 if (qfVec)
670 {CmsStateRequest QReq = {{Sel.Path.Hash, kYR_state, kYR_raw, 0}};
671 if (Sel.Opts & XrdCmsSelect::Refresh)
673 TRACE(Files, "seeking " <<Sel.Path.Val);
674 qfVec = Cluster.Broadcast(qfVec, QReq.Hdr,
675 (void *)Sel.Path.Val, Sel.Path.Len+1);
676 if (qfVec) Cache.UnkFile(Sel, qfVec);
677 }
678 return retc;
679}
680
681/******************************************************************************/
682/* M o n P e r f */
683/******************************************************************************/
684
686{
687 CmsUsageRequest Usage = {{0, kYR_usage, 0, 0}};
688 struct iovec ioV[] = {{(char *)&Usage, sizeof(Usage)}};
689 int ioVnum = sizeof(ioV)/sizeof(struct iovec);
690 int ioVtot = sizeof(Usage);
691 SMask_t allNodes(~0);
692 int uInterval = Config.AskPing*Config.AskPerf;
693
694// Sleep for the indicated amount of time, then ask for load on each server
695//
696 while(uInterval)
697 {XrdSysTimer::Snooze(uInterval);
698 Broadcast(allNodes, ioV, ioVnum, ioVtot);
699 }
700 return (void *)0;
701}
702
703/******************************************************************************/
704/* M o n R e f s */
705/******************************************************************************/
706
708{
709 XrdCmsNode *nP;
710 int snooze_interval = 60, snooze_total = 0;
711 int rCnt = 0, wCnt = 0;
712 bool resetW, resetR, resetRW;
713
714// Sleep for the snooze interval. If a reset was requested then do a selective
715// reset unless we reached our snooze maximum and enough selections have gone
716// by; in which case, do a global reset.
717//
718 do {XrdSysTimer::Snooze(snooze_interval);
719 int totR = 0, totW = 0;
720
721 STMutex.ReadLock();
722 for (int i = 0; i <= STHi; i++)
723 {if ((nP = NodeTab[i]))
724 {totR += nP->RefTotR;
725 totW += nP->RefTotW;
726 }
727 }
728 STMutex.UnLock();
729
730 rCnt += (totR - SelRtot); SelRtot = totR;
731 wCnt += (totW - SelWtot); SelWtot = totW;
732 snooze_total += snooze_interval;
733
734 resetR = (rCnt >= Config.RefTurn);
735 resetW = (wCnt >= Config.RefTurn);
736 resetRW = (snooze_total >= Config.RefReset && (resetW || resetR));
737 if (resetRW)
738 {ResetRef((SMask_t)0);
739 if (resetR) rCnt = 0;
740 if (resetW) wCnt = 0;
741 snooze_total = 0;
742 }
743 } while(1);
744
745 return (void *)0;
746}
747
748/******************************************************************************/
749/* R e m o v e */
750/******************************************************************************/
751
752// Warning! The node object must be locked upon entry. The lock is released
753// upon deletion of the object.
754
756{
757 theNode->DropJob = new XrdCmsDrop(theNode);
758}
759
760// Warning! The node object must be locked upon entry. The lock is released
761// prior to returning to the caller. This entry obtains the node
762// table lock. When immed != 0 then the node is immediately dropped.
763// When immed if < 0 then the caller already holds the STMutex in
764// write mode and it is not released upon exit.
765
766void XrdCmsCluster::Remove(const char *reason, XrdCmsNode *theNode, int immed)
767{
768 EPNAME("Remove_Node")
769 struct theLocks
770 {XrdSysRWLock *myMutex;
771 XrdCmsNode *myNode;
772 int myNID;
773 int myInst;
774 bool hasLK;
775 bool doDrop;
776 char myIdent[510];
777
778 theLocks(XrdSysRWLock *mtx, XrdCmsNode *node, int immed)
779 : myMutex(mtx), myNode(node), hasLK(immed < 0),
780 doDrop(false)
781 {strlcpy(myIdent, node->Ident, sizeof(myIdent));
782 myNID = node->ID(myInst);
783 if (!hasLK)
784 {myNode->Ref(); // Keep alive
785 myNode->UnLock();
786 myMutex->WriteLock(); // Get global lock
787 myNode->Lock();
788 myNode->unRef(); // Can't escape now
789 }
790 }
791 ~theLocks()
792 {if (myNode)
793 {if (doDrop)
794 {myNode->isBound = 0;
795 myNode->DropTime = 0;
796 myNode->DropJob = new XrdCmsDrop(myNode);
797 myNode->UnLock();
798 } else myNode->UnLock();
799 }
800 if (!hasLK) myMutex->UnLock();
801 }
802 } LockHandler(&STMutex, theNode, immed);
803
804 XrdCmsNode *altNode = 0;
805 int Inst, NodeID = theNode->ID(Inst);
806
807// The LockHandler makes sure that the proper locks are obtained in a deadlock
808// free order. However, this may require that the node lock be released and
809// then re-aquired. We check if we are still dealing with same node at entry.
810// If not, issue message and high-tail it out.
811//
812 if (LockHandler.myNID != NodeID || LockHandler.myInst != Inst)
813 {Say.Emsg("Manager", LockHandler.myIdent, "removal aborted.");
814 DEBUG(LockHandler.myIdent <<" node " <<NodeID <<'.' <<Inst <<" != "
815 << LockHandler.myNID <<'.' <<LockHandler.myInst <<" at entry.");
816 }
817
818// Mark node as being offline and remove any drop job from it
819//
820 theNode->isOffline = 1; // Global lock is held here
821
822// If the node is connected we simply close the connection. This will cause
823// the connection handler to re-initiate the node removal. This condition
824// exists only if one node is being displaced by another node. The Disc()
825// may take a long time, but it's done async by default on the WAN and sync
826// on the LAN (local connections are fast enough and error-free for this).
827//
828 if (theNode->isConn)
829 {theNode->Disc(reason, 0);
830 theNode->isGone = 1; // Disc() sets the isOffline flag
831 return;
832 }
833
834// If we are not the primary node, then get rid of this node post-haste
835//
836 if (!(NodeTab[NodeID] == theNode))
837 {const char *why = (theNode->isMan ? "dropped as alternate."
838 : "dropped and redirected.");
839 Say.Emsg("Remove_Node", theNode->Ident, why);
840 LockHandler.doDrop = true;
841 return;
842 }
843
844
845// If the node is part of the cluster, do not count it anymore and
846// indicate new state of this nodes if we are a reporting manager
847//
848 if (theNode->isBound)
849 {theNode->isBound = 0;
850 NodeCnt--;
851 if (Config.asManager())
853 theNode->isBad & XrdCmsNode::isSuspend ? 0 : -1,
854 theNode->isNoStage ? 0 : -1);
855 }
856
857// If we have a working alternate, substitute it here and immediately drop
858// the former primary. This allows the cache to remain warm.
859//
860 if (theNode->isMan && theNode->cidP && !(theNode->cidP->IsSingle())
861 && (altNode = theNode->cidP->RemNode(theNode)))
862 {if (altNode->isBound) NodeCnt++;
863 NodeTab[NodeID] = altNode;
864 if (Config.asManager())
866 altNode->isBad & XrdCmsNode::isSuspend ? 0 : 1,
867 altNode->isNoStage ? 0 : 1);
868 setAltMan(altNode->NodeID, altNode->Link, altNode->subsPort);
869 Say.Emsg("Manager",altNode->Ident,"replacing dropped",theNode->Ident);
870 LockHandler.doDrop = true;
871 return;
872 }
873
874// If this is an immediate drop request, do so now. Drop() will delete
875// the node object, so remove the node lock and tell LockHandler that.
876//
877 if (immed || !Config.DRPDelay || theNode->isBad & XrdCmsNode::isDoomed)
878 {theNode->UnLock();
879 LockHandler.myNode = 0;
880 Drop(NodeID, Inst);
881 return;
882 }
883
884// If a drop job is already scheduled, update the instance field. Otherwise,
885// Schedule a node drop at a future time.
886//
887 theNode->DropTime = time(0)+Config.DRPDelay;
888 if (theNode->DropJob) theNode->DropJob->nodeInst = Inst;
889 else theNode->DropJob = new XrdCmsDrop(NodeID, Inst);
890
891// Document removal
892//
893 if (reason)
894 Say.Emsg("Manager", theNode->Ident, "scheduled for removal;", reason);
895 else DEBUG(theNode->Ident <<" node " <<NodeID <<'.' <<Inst);
896}
897
898/******************************************************************************/
899/* R e s e t R e f */
900/******************************************************************************/
901
902void XrdCmsCluster::ResetRef(SMask_t nMask, bool isLocked)
903{
904 XrdCmsNode *nP;
905 bool doAll (nMask == 0);
906
907// Obtain a lock on the table if not already locked
908//
909 if (!isLocked) STMutex.ReadLock();
910
911// Reset reference counts as needed. We can do this with a read lock as the
912// reference counters are atomic.
913//
914 for (int i = 0; i <= STHi; i++)
915 {if ((nP = NodeTab[i]) && (doAll || nP->isNode(nMask)))
916 {nP->RefW = 0;
917 nP->RefR = 0;
918 nP->Shrem = nP->Share;
919 }
920 }
921
922// Unlock table and exit
923//
924 if (!isLocked) STMutex.UnLock();
925}
926
927/******************************************************************************/
928/* S e l e c t */
929/******************************************************************************/
930
932{
933 EPNAME("Select");
934 XrdCmsPInfo pinfo;
935 const char *Amode;
936 int dowt = 0, retc = 0, isRW, fRD, noSel = (Sel.Opts & XrdCmsSelect::Defer);
937 SMask_t amask, smask, pmask;
938
939// Establish some local options
940//
941 if (Sel.Opts & XrdCmsSelect::Write)
942 {isRW = 1; Amode = "write";
943 if (Config.RWDelay)
944 if (Sel.Opts & XrdCmsSelect::Create && Config.RWDelay < 2) fRD = 1;
945 else fRD = 0;
946 else fRD = 1;
947 }
948 else {isRW = 0; Amode = "read"; fRD = 1;}
949
950// Find out who serves this path
951//
952 if (!Cache.Paths.Find(Sel.Path.Val, pinfo)
953 || (amask = ((isRW ? pinfo.rwvec : pinfo.rovec) & ~Sel.nmask)) == 0)
954 {Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
955 "No servers %s %s access to the file",
956 (isRW && Config.forceRO ? "allowed" : "have"), Amode)+1;
957 Sel.Resp.Port = kYR_ENOENT;
958 return EReplete;
959 }
960
961// If we are running a shared file system preform an optional restricted
962// pre-selection and then do a standard selection. Since all nodes are equal,
963// make sure the client is needlessly avoiding them as this signals an error.
964//
965 if (baseFS.isDFS())
966 {if (Sel.nmask && !(Sel.Opts & XrdCmsSelect::NoTryLim))
967 {pmask = (isRW ? pinfo.rwvec : pinfo.rovec) & Sel.nmask;
968 if (!(Sel.Opts & XrdCmsSelect::Online))
969 pmask |= pinfo.ssvec & Sel.nmask;
970 if (pmask && maxBits(pmask, baseFS.dfsTries()))
971 {Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
972 "Too many attempts to gain dfs %s access to the file", Amode)+1;
973 return RetryErr;
974 }
975 }
976 pmask = amask;
977 smask = (Sel.Opts & XrdCmsSelect::Online ? 0 : pinfo.ssvec & amask);
978 if (baseFS.Trim())
979 {Sel.Resp.DLen = 0;
980 if (!(retc = SelDFS(Sel, amask, pmask, smask, isRW)))
981 return (fRD ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
982 if (retc < 0) return retc;
983 } else if (noSel) return 0;
984 return SelNode(Sel, pmask, smask);
985 }
986
987// If either a refresh is wanted or we didn't find the file, re-prime the cache
988// which will force the client to wait. Otherwise, compute the primary and
989// secondary selections. If there are none, the client may have to wait if we
990// have servers that we can query regarding the file. Note that for files being
991// opened in write mode, only one writable copy may exist unless this is a
992// meta-operation (e.g., remove) in which case the file itself remain unmodified
993// or a replica request, in which case we select a new target server.
994//
995 if (!(Sel.Opts & XrdCmsSelect::Refresh)
996 && (retc = Cache.GetFile(Sel, pinfo.rovec)))
997 {if (isRW)
998 { if (retc<0) return Config.LUPDelay;
999 else if (Sel.Opts & XrdCmsSelect::Replica)
1000 {pmask = amask & ~(Sel.Vec.hf | Sel.Vec.bf); smask = 0;
1001 if (!pmask && !Sel.Vec.bf) return SelFail(Sel,eNoRep);
1002 }
1003 else if (Sel.Vec.bf) pmask = smask = 0;
1004 else if (Sel.Vec.hf)
1005 {if (Sel.Opts & XrdCmsSelect::NewFile) return SelFail(Sel,eExists);
1006 if (!(Sel.Opts & XrdCmsSelect::MWFiles))
1007 {if (!(Sel.Opts & XrdCmsSelect::isMeta)
1008 && maxBits(Sel.Vec.hf,2)) return SelFail(Sel,eDups);
1009 if ((Sel.Vec.hf & pinfo.rwvec)
1010 != (Sel.Vec.hf & pinfo.rovec)) return SelFail(Sel,eROfs);
1011 }
1012 if (!(pmask = Sel.Vec.hf & amask)) return SelFail(Sel,eNoSel);
1013 smask = 0;
1014 }
1016 {pmask = amask; smask = 0;}
1017 else if ((smask = pinfo.ssvec & amask)) pmask = 0;
1018 else pmask = smask = 0;
1019 } else {
1020 pmask = Sel.Vec.hf & amask;
1021 if (Sel.Opts & XrdCmsSelect::Online) {pmask &= ~Sel.Vec.pf; smask=0;}
1022 else smask = (retc < 0 ? 0 : pinfo.ssvec & amask);
1023 }
1024 if (Sel.Vec.hf & Sel.nmask) Cache.UnkFile(Sel, Sel.nmask);
1025 } else {
1026 Cache.AddFile(Sel, 0);
1027 Sel.Vec.bf = pinfo.rovec;
1028 Sel.Vec.hf = Sel.Vec.pf = pmask = smask = 0;
1029 retc = 0;
1030 }
1031
1032// A wait is required if we don't have any primary or seconday servers
1033//
1034 dowt = (!pmask && !smask);
1035
1036// If we can query additional servers, do so now. The client will be placed
1037// in the callback queue only if we have no possible selections
1038//
1039 if (Sel.Vec.bf)
1040 {CmsStateRequest QReq = {{Sel.Path.Hash, kYR_state, kYR_raw, 0}};
1041 if (Sel.Opts & XrdCmsSelect::Refresh)
1043 if (dowt) retc= (fRD ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
1044 TRACE(Files, "seeking " <<Sel.Path.Val);
1045 amask = Cluster.Broadcast(Sel.Vec.bf, QReq.Hdr,
1046 (void *)Sel.Path.Val,Sel.Path.Len+1);
1047 if (amask) Cache.UnkFile(Sel, amask);
1048 if (dowt) return retc;
1049 } else if (dowt && retc < 0 && !noSel)
1050 return (fRD ? Cache.WT4File(Sel,Sel.Vec.hf) : Config.LUPDelay);
1051
1052// Broadcast a freshen up request if wanted
1053//
1054 if ((Sel.Opts & XrdCmsSelect::Freshen) && (amask = pmask & ~Sel.Vec.bf))
1056 Cluster.Broadcast(amask, Qupt.Hdr,(void *)Sel.Path.Val,Sel.Path.Len+1);
1057 }
1058
1059// If we need to defer selection, simply return as this is a mindless prepare
1060//
1061 if (noSel) return 0;
1062
1063// Check if we have no useable servers
1064//
1065 if (dowt) return Unuseable(Sel);
1066
1067// Check if should eliminate staging servers. We may need to do this if the
1068// client has been eliminating too many of them as they all should be equal.
1069//
1070 if (Sel.nmask && pinfo.ssvec && !(Sel.Opts & XrdCmsSelect::NoTryLim)
1071 && maxBits(Sel.nmask & pinfo.ssvec, baseFS.stgTries()))
1072 {if (!pmask)
1073 {Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
1074 "Too many attempts to stage %s access to the file", Amode)+1;
1075 return RetryErr;
1076 }
1077 smask = 0;
1078 }
1079
1080// Select a node
1081//
1082 return SelNode(Sel, pmask, smask);
1083}
1084
1085/******************************************************************************/
1086
1087int XrdCmsCluster::Select(SMask_t pmask, int &port, char *hbuff, int &hlen,
1088 int isrw, int isMulti, int ifWant)
1089{
1090 static const SMask_t smLow(255);
1091 XrdCmsSelector selR;
1092 XrdCmsNode *nP = 0;
1093 SMask_t tmask;
1094 int Snum = 0;
1095 XrdNetIF::ifType nType = static_cast<XrdNetIF::ifType>(ifWant);
1096
1097// If there is nothing to select from, return failure
1098//
1099 if (!pmask) return 0;
1100
1101// Obtain the network we need for the client
1102//
1103 selR.needNet = XrdNetIF::Mask(nType);
1104
1105// Initialize
1106//
1107 selR.needSpace = 0;
1108
1109// Packed selection can never occur in this code path so we turn it off
1110//
1111 selR.selPack = 0;
1112
1113// If we are exporting a shared-everything system then the incoming mask
1114// may have more than one server indicated. So, we need to do a full select.
1115// This is forced when isMulti is true, indicating a choice may exist. Note
1116// that the node, if any, is returned unlocked but we have the global mutex.
1117//
1118 if (isMulti || baseFS.isDFS())
1119 {STMutex.ReadLock();
1120 nP = (Config.sched_RR ? SelbyRef(pmask,selR)
1121 : Config.sched_LoadR == 0 ? SelbyLoad(pmask,selR)
1122 : SelbyLoadR(pmask, selR));
1123
1124 if (nP) hlen = nP->netIF.GetName(hbuff, port, nType) + 1;
1125 else hlen = 0;
1126 STMutex.UnLock();
1127 return hlen != 1;
1128 }
1129
1130// In shared-nothing systems the incoming mask will only have a single node.
1131// Compute the a single node number that is contained in the mask.
1132//
1133 do {if (!(tmask = pmask & smLow)) Snum += 8;
1134 else {while((tmask = tmask>>1)) Snum++; break;}
1135 } while((pmask = pmask >> 8));
1136
1137// See if the node passes muster
1138//
1139 STMutex.ReadLock();
1140 if ((nP = NodeTab[Snum]))
1141 { if (nP->isBad) nP = 0;
1142 else if (!Config.sched_RR && (nP->myLoad > Config.MaxLoad)) nP = 0;
1143 else if (!(selR.needNet & nP->hasNet)) nP = 0;
1144 if (nP)
1145 {if (isrw)
1146 if (nP->isNoStage || nP->DiskFree < nP->DiskMinF) nP = 0;
1147 else {nP->RefTotW++; nP->RefW++;}
1148 else {nP->RefTotR++; nP->RefR++;}
1149 }
1150 }
1151
1152// At this point either we have a node or we do not
1153//
1154 if (nP)
1155 {hlen = nP->netIF.GetName(hbuff, port, nType) + 1;
1156 nP->RefR++;
1157 STMutex.UnLock();
1158 return hlen != 1;
1159 }
1160 STMutex.UnLock();
1161 return 0;
1162}
1163
1164/******************************************************************************/
1165/* S e l F a i l */
1166/******************************************************************************/
1167
1168int XrdCmsCluster::SelFail(XrdCmsSelect &Sel, int rc)
1169{
1170//
1171 const char *etext, *Item = "file";
1172
1173 switch(rc)
1174 {case eExists: if (Sel.Opts & XrdCmsSelect::isMeta) Item = "directory";
1175 etext = "Unable to create %s; it already exists.";
1176 Sel.Resp.Port = kYR_RWConflict;
1177 break;
1178 case eROfs: etext = "Unable to modify %s; r/o copy already exists.";
1179 Sel.Resp.Port = kYR_RWConflict;
1180 break;
1181 case eDups: etext = "Unable to modify %s; multiple copies exist.";
1182 Sel.Resp.Port = kYR_RWConflict;
1183 break;
1184 case eNoRep: etext = "Unable to replicate %s; no new sites available.";
1185 Sel.Resp.Port = kYR_noReplicas;
1186 break;
1187 case eNoSel: if (Sel.Vec.hf & Sel.nmask)
1188 {etext = "Unable to access %s; eligible servers shunned.";
1189 if (Sel.Opts & XrdCmsSelect::isDir) Item = "directory";
1190 } else {
1191 if (Sel.Opts & XrdCmsSelect::Write)
1192 {etext = "Unable to write %s; r/w exports not found.";
1193 } else {
1194 etext = "Unable to access %s; it does not exist.";
1195 if (Sel.Opts & XrdCmsSelect::isDir) Item = "directory";
1196 }
1197 }
1198 Sel.Resp.Port = kYR_ENOENT;
1199 break;
1200 default: etext = "Unable to access %s; it does not exist.";
1201 Sel.Resp.Port = kYR_ENOENT;
1202 break;
1203 };
1204
1205 int n = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data), etext, Item);
1206 if (n < (int)sizeof(Sel.Resp.Data)) Sel.Resp.DLen = n+1;
1207 else Sel.Resp.DLen = sizeof(Sel.Resp.Data);
1208
1209 return EReplete;
1210}
1211
1212/******************************************************************************/
1213/* S p a c e */
1214/******************************************************************************/
1215
1217{
1218 XrdCmsNode *nP;
1219 SMask_t bmask;
1220 int i;
1221 bool doAll = !baseFS.isDFS();
1222
1223// Obtain a lock on the table and screen out peer nodes
1224//
1225 STMutex.ReadLock();
1226 bmask = smask & peerMask;
1227
1228// Run through the table getting space information
1229//
1230 for (i = 0; i <= STHi; i++)
1231 if ((nP = NodeTab[i]) && nP->isNode(bmask) && !(nP->isOffline))
1232 {if (doAll || !sData.Total)
1233 {sData.Total += nP->DiskTotal;
1234 sData.TotFr += nP->DiskFree;
1235 }
1236 if (nP->isRW & XrdCmsNode::allowsSS)
1237 {sData.sNum++;
1238 if (sData.sFree < nP->DiskFree)
1239 {sData.sFree = nP->DiskFree; sData.sUtil = nP->DiskUtil;}
1240 }
1241 if (nP->isRW & XrdCmsNode::allowsRW)
1242 {sData.wNum++;
1243 if (sData.wFree < nP->DiskFree)
1244 {sData.wFree = nP->DiskFree; sData.wUtil = nP->DiskUtil;
1245 sData.wMinF = nP->DiskMinF;
1246 }
1247 }
1248 }
1249 STMutex.UnLock();
1250}
1251
1252/******************************************************************************/
1253/* S t a t s */
1254/******************************************************************************/
1255
1256int XrdCmsCluster::Stats(char *bfr, int bln)
1257{
1258 static const char statfmt1[] = "<stats id=\"cms\">"
1259 "<role>%s</role></stats>";
1260 int mlen;
1261
1262// Check if actual length wanted
1263//
1264 if (!bfr) return sizeof(statfmt1) + 8;
1265
1266// Format the statistics (not much here for now)
1267//
1268 mlen = snprintf(bfr, bln, statfmt1, Config.myRType);
1269
1270 if ((bln -= mlen) <= 0) return 0;
1271 return mlen;
1272}
1273
1274/******************************************************************************/
1275/* S t a t t */
1276/******************************************************************************/
1277
1278int XrdCmsCluster::Statt(char *bfr, int bln)
1279{
1280 static const char statfmt0[] = "</stats>";
1281 static const char statfmt1[] = "<stats id=\"cmsm\">"
1282 "<role>%s</role><sel><t>%lld</t><r>%lld</r><w>%lld</w></sel>"
1283 "<node>%d";
1284 static const char statfmt2[] = "<stats id=\"%d\">"
1285 "<host>%s</host><role>%s</role>"
1286 "<run>%s</run><ref><r>%d</r><w>%d</w></ref>%s</stats>";
1287 static const char statfmt3[] = "<shr>%d<use>%d</use></shr>";
1288 static const char statfmt4[] = "</node>";
1289 static const char statfmt5[] =
1290 "<frq><add>%lld<d>%lld</d></add><rsp>%lld<m>%lld</m></rsp>"
1291 "<lf>%lld</lf><ls>%lld</ls><rf>%lld</rf><rs>%lld</rs></frq>";
1292
1293 static int AddFrq = (Config.RepStats & XrdCmsConfig::RepStat_frq);
1294 static int AddShr = (Config.RepStats & XrdCmsConfig::RepStat_shr)
1295 && Config.asMetaMan();
1296
1297 XrdCmsRRQ::Info Frq;
1298 XrdCmsSelected *sp;
1299 int mlen, tlen, n = 0;
1300 char shrBuff[80], stat[6], *stp;
1301 bool oksel;
1302
1303 class spmngr {
1304 public: XrdCmsSelected *sp;
1305
1306 spmngr() {sp = 0;}
1307 ~spmngr() {XrdCmsSelected *xsp;
1308 while((xsp = sp)) {sp = sp->next; delete xsp;}
1309 }
1310 } mngrsp;
1311
1312// Check if actual length wanted
1313//
1314 if (!bfr)
1315 {n = sizeof(statfmt0) +
1316 sizeof(statfmt1) + 12*3 + 3 + 3 +
1317 (sizeof(statfmt2) + 10*2 + 256 + 16) * STMax + sizeof(statfmt4);
1318 if (AddShr) n += sizeof(statfmt3) + 12;
1319 if (AddFrq) n += sizeof(statfmt4) + (10*8);
1320 return n;
1321 }
1322
1323// Get the statistics
1324//
1325 if (AddFrq) RRQ.Statistics(Frq);
1326 mngrsp.sp = sp = List(FULLMASK, LS_NULL, oksel);
1327
1328// Count number of nodes we have
1329//
1330 while(sp) {n++; sp = sp->next;}
1331 sp = mngrsp.sp;
1332
1333// Format the statistics
1334//
1335 long long lclTcnt = SelTcnt, lclRtot = SelRtot, lclWtot = SelWtot;
1336 mlen = snprintf(bfr, bln, statfmt1,
1337 Config.myRType, lclTcnt, lclRtot, lclWtot, n);
1338
1339 if ((bln -= mlen) <= 0) return 0;
1340 tlen = mlen; bfr += mlen; n = 0; *shrBuff = 0;
1341
1342 while(sp && bln > 0)
1343 {stp = stat;
1344 if (sp->Status & XrdCmsSelected::Offline) *stp++ = 'o';
1345 else if (sp->Status & XrdCmsSelected::Suspend) *stp++ = 's';
1346 else if (sp->Status & XrdCmsSelected::Disable) *stp++ = 'd';
1347 else *stp++ = 'a';
1348 if (sp->Status & XrdCmsSelected::isRW) *stp++ = 'w';
1349 if (sp->Status & XrdCmsSelected::NoStage) *stp++ = 'n';
1350 *stp = 0;
1351 if (AddShr) snprintf(shrBuff, sizeof(shrBuff), statfmt3,
1352 (sp->Share ? sp->Share : 100), sp->Shrin);
1353 mlen = snprintf(bfr, bln, statfmt2, n, sp->Ident,
1354 XrdCmsRole::Type(static_cast<XrdCmsRole::RoleID>(sp->RoleID)),
1355 stat, sp->RefTotR, sp->RefTotW, shrBuff);
1356 bfr += mlen; bln -= mlen; tlen += mlen;
1357 sp = sp->next; n++;
1358 }
1359
1360 if (bln <= (int)sizeof(statfmt4)) return 0;
1361 strcpy(bfr, statfmt4); mlen = sizeof(statfmt4) - 1;
1362 bfr += mlen; bln -= mlen; tlen += mlen;
1363
1364 if (AddFrq && bln > 0)
1365 {mlen = snprintf(bfr, bln, statfmt5, Frq.Add2Q, Frq.PBack, Frq.Resp,
1366 Frq.Multi, Frq.luFast, Frq.luSlow, Frq.rdFast, Frq.rdSlow);
1367 bfr += mlen; bln -= mlen; tlen += mlen;
1368 }
1369
1370// See if we overflowed. otherwise finish up
1371//
1372 if (sp || bln < (int)sizeof(statfmt0)) return 0;
1373 strcpy(bfr, statfmt0);
1374 return tlen + sizeof(statfmt0) - 1;
1375}
1376
1377/******************************************************************************/
1378/* P r i v a t e M e t h o d s */
1379/******************************************************************************/
1380/******************************************************************************/
1381/* c a l c D e l a y */
1382/******************************************************************************/
1383
1384XrdCmsNode *XrdCmsCluster::calcDelay(XrdCmsSelector &selR)
1385{
1386 if (!selR.nPick) {selR.delay = 0;
1387 selR.reason = (selR.xNoNet
1388 ? "no eligible servers reachable for"
1389 : "no eligible servers for");
1390 }
1391 else if (selR.xFull) {selR.delay = Config.DiskWT;
1392 selR.reason = "no eligible servers have space for";
1393 }
1394 else if (selR.xOvld) {selR.delay = Config.MaxDelay;
1395 selR.reason = "eligible servers overloaded for";
1396 }
1397 else if (selR.xSusp) {selR.delay = Config.SUSDelay;
1398 selR.reason = "eligible servers suspended for";
1399 }
1400 else if (selR.xOff) {selR.delay = Config.SUPDelay;
1401 selR.reason = "eligible servers offline for";
1402 }
1403 else {selR.delay = Config.SUPDelay;
1404 selR.reason = "server selection error for";
1405 }
1406 return (XrdCmsNode *)0;
1407}
1408
1409/******************************************************************************/
1410/* D r o p */
1411/******************************************************************************/
1412
1413// Warning: STMutex must be locked in write upon entry and the caller must
1414// release it if this method is called directily. Otherwise, the mutex
1415// will be obtained and released. Also, this method may only be called
1416// via Remove() either directly or via a deferred job scheduled by that
1417// method. This method actually deletes the node object.
1418
1419int XrdCmsCluster::Drop(int sent, int sinst, XrdCmsDrop *djp)
1420{
1421 EPNAME("Drop_Node")
1422 XrdCmsNode *nP;
1423 char hname[512];
1424
1425// If we are being called outside of a scheduled job, obtain the mutex
1426//
1427 if (djp) STMutex.WriteLock();
1428
1429// Make sure this node is the right one
1430//
1431 if (!(nP = NodeTab[sent]) || nP->Inst() != sinst)
1432 {if (nP && djp == nP->DropJob) {nP->DropJob = 0; nP->DropTime = 0;}
1433 if (djp) STMutex.UnLock();
1434 DEBUG(sent <<'.' <<sinst <<" cancelled.");
1435 return 0;
1436 }
1437
1438// Check if the drop has been rescheduled
1439//
1440 if (djp && time(0) < nP->DropTime)
1441 {Sched->Schedule((XrdJob *)djp, nP->DropTime);
1442 if (djp) STMutex.UnLock();
1443 return 1;
1444 }
1445
1446// Save the node name (don't want to hold a lock across a message)
1447//
1448 strlcpy(hname, nP->Ident, sizeof(hname));
1449
1450// Cleanup status
1451//
1452 NodeTab[sent] = 0;
1453 nP->isOffline = 1; // STMutex is locked in write mode
1454 nP->DropTime = 0;
1455 nP->DropJob = 0;
1456 nP->isBound = 0;
1457
1458// Remove node from the peer list (if it is one)
1459//
1460 if (nP->isPeer) {peerHost &= nP->NodeMask; peerMask = ~peerHost;}
1461
1462// Remove node entry from the alternate list and readjust the end pointer.
1463//
1464 if (nP->isMan)
1465 {memset((void *)&AltMans[sent*AltSize], (int)' ', AltSize);
1466 if (sent == AltMent)
1467 {AltMent--;
1468 while(AltMent >= 0 && NodeTab[AltMent]
1469 && !NodeTab[AltMent]->isMan) AltMent--;
1470 if (AltMent < 0) AltMend = AltMans;
1471 else AltMend = AltMans + ((AltMent+1)*AltSize);
1472 }
1473 }
1474
1475// Readjust STHi
1476//
1477 if (sent == STHi) while(STHi >= 0 && !NodeTab[STHi]) STHi--;
1478
1479// Invalidate any cached entries for this node
1480//
1481 if (nP->NodeMask) Cache.Drop(nP->NodeMask, sent, STHi);
1482
1483// We can now delete the node object if we were called via a job as we are on
1484// a different thread. Direct calls require that we schedule the deletion as
1485// it may take a long time if there are oustanding references to this node.
1486//
1487 if (djp) {STMutex.UnLock(); nP->Delete(STMutex);}
1488 else nP->DropJob = new XrdCmsDrop(nP);
1489
1490// Document the drop
1491//
1492 Say.Emsg("Drop_Node", hname, "dropped.");
1493 return 0;
1494}
1495
1496/******************************************************************************/
1497/* M u l t i p l e */
1498/******************************************************************************/
1499
1500int XrdCmsCluster::Multiple(SMask_t mVec)
1501{
1502 static const unsigned long long Left32 = 0xffffffff00000000LL;
1503 static const unsigned long long Right32 = 0x00000000ffffffffLL;
1504 static const unsigned long long Left16 = 0x00000000ffff0000LL;
1505 static const unsigned long long Right16 = 0x000000000000ffffLL;
1506 static const unsigned long long Left08 = 0x000000000000ff00LL;
1507 static const unsigned long long Right08 = 0x00000000000000ffLL;
1508 static const unsigned long long Left04 = 0x00000000000000f0LL;
1509 static const unsigned long long Right04 = 0x000000000000000fLL;
1510// 0 1 2 3 4 5 6 7 8 9 A B C D E F
1511 static const int isMult[16] = {0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1};
1512
1513 if (mVec & Left32) {if (mVec & Right32) return 1;
1514 else mVec = mVec >> 32LL;
1515 }
1516 if (mVec & Left16) {if (mVec & Right16) return 1;
1517 else mVec = mVec >> 16LL;
1518 }
1519 if (mVec & Left08) {if (mVec & Right08) return 1;
1520 else mVec = mVec >> 8LL;
1521 }
1522 if (mVec & Left04) {if (mVec & Right04) return 1;
1523 else mVec = mVec >> 4LL;
1524 }
1525 return isMult[mVec];
1526}
1527
1528/******************************************************************************/
1529/* m a x B i t s */
1530/******************************************************************************/
1531
1532bool XrdCmsCluster::maxBits(SMask_t mVec, int mbits)
1533{
1534 int count = 0;
1535
1536// Count bits. This is the fastest way assuming few bits are set
1537//
1538 while(mVec)
1539 {mVec &= (mVec - 1);
1540 count++;
1541 if (count >= mbits) return true;
1542 }
1543
1544// Indicate we have not reached the maximum bits set
1545//
1546 return false;
1547}
1548
1549/******************************************************************************/
1550/* R e c o r d */
1551/******************************************************************************/
1552
1553void XrdCmsCluster::Record(char *path, const char *reason, bool force)
1554{
1555 EPNAME("Record")
1556 static int msgcnt = 255;
1557 static XrdSysMutex mcMutex;
1558 int skipmsg;
1559
1560 DEBUG(reason <<path);
1561 mcMutex.Lock();
1562 msgcnt++; skipmsg = msgcnt & (force ? 0x0f : 0xff);
1563 mcMutex.UnLock();
1564
1565 if (!skipmsg) Say.Emsg(epname, "client deferred;", reason, path);
1566}
1567
1568/******************************************************************************/
1569/* S e l N o d e */
1570/******************************************************************************/
1571
1572int XrdCmsCluster::SelNode(XrdCmsSelect &Sel, SMask_t pmask, SMask_t amask)
1573{
1574 EPNAME("SelNode")
1575 const char *act=0;
1576 int affsel = 1, count = 0, isalt = 0, pass = 2;
1577 SMask_t mask;
1578 XrdCmsNode *nP = 0;
1579 XrdCmsSelector selR;
1580 XrdNetIF::ifType nType=(XrdNetIF::ifType)(Sel.Opts & XrdCmsSelect::ifWant);
1581
1582// Obtain the network we need for the client
1583//
1584 selR.needNet = XrdNetIF::Mask(nType);
1585
1586// Indicate whether or not stable selection is required
1587//
1588 if (!(Sel.Opts & XrdCmsSelect::Pack)) selR.selPack = 0;
1589 else {unsigned int theHash = (Sel.Opts & XrdCmsSelect::UseAH
1590 ? Sel.AltHash : Sel.Path.Hash);
1591 SMask_t sVec = pmask;
1592 for (count = 0; sVec; count++) sVec &= (sVec - 1);
1593 if (count > 1) selR.selPack = affsel = (theHash % count) + 1;
1594 else selR.selPack = 0;
1595 }
1596
1597// There is a difference bwteen needing space and needing r/w access. The former
1598// is needed when we will be writing data the latter for inode modifications.
1599//
1600 if (Sel.Opts & XrdCmsSelect::isMeta) selR.needSpace = 0;
1601 else selR.needSpace = (Sel.Opts & XrdCmsSelect::Write
1602 ? XrdCmsNode::allowsRW : 0);
1603
1604// Scan for a primary and alternate node (alternates do staging). At this
1605// point we omit all peer nodes as they are our last resort. Note that Selbyxxx
1606// returns the node unlocked but we have the global mutex so that is OK.
1607//
1608 STMutex.ReadLock();
1609 mask = pmask & peerMask;
1610 while(pass--)
1611 {if (mask)
1612 {nP = (Config.sched_RR || (Sel.Opts & XrdCmsSelect::UseRef)
1613 ? SelbyRef(mask,selR)
1614 : Config.sched_LoadR == 0 ? SelbyLoad(pmask,selR)
1615 : SelbyLoadR(pmask, selR));
1616 if (nP || (selR.nPick && selR.delay)
1617 || NodeCnt < Config.SUPCount) break;
1618 }
1619 mask = amask & peerMask; isalt = XrdCmsNode::allowsSS;
1620 if (!(Sel.Opts & XrdCmsSelect::isMeta)) selR.needSpace |= isalt;
1621 }
1622
1623// Produce affinity result trace
1624//
1625 if (Sel.Opts & XrdCmsSelect::Pack && nP)
1626 {TRACE(Redirect, "affinity " <<affsel <<'/' <<count <<'/'
1627 <<(int)selR.selPack <<(selR.selPack ? " go " : " ng ")
1628 <<nP->Name() <<' ' <<Sel.Path.Val);
1629 }
1630
1631// If we found an eligible node then dispatch the client to it. We will
1632// swap the global mutex for the node mutex to minimize interefrence.
1633//
1634 if (nP)
1635 {nP->g2nLock(STMutex);
1636 Sel.Resp.DLen = nP->netIF.GetName(Sel.Resp.Data, Sel.Resp.Port, nType);
1637 if (!Sel.Resp.DLen) {nP->UnLock(); return Unreachable(Sel, false);}
1638 Sel.Resp.DLen++; Sel.smask = nP->NodeMask;
1639
1640 // If a message is to be sent to the selected server, send it.
1641 //
1642 if (Sel.iovN && Sel.iovP) nP->Send(Sel.iovP, Sel.iovN);
1643
1644 // Do special post proccessing when any of:
1645 // a) isalt true: Secondary selection occurred
1646 // b) Create set: File creation will occur
1647 //
1648 if (isalt || (Sel.Opts & XrdCmsSelect::Create))
1650 if (Sel.Opts & XrdCmsSelect::noBind) act = " handling ";
1651 else Cache.AddFile(Sel, nP->NodeMask);
1652 }
1653
1654 // Determine what we are actually doing here
1655 //
1656 nP->UnLock();
1657 if (!act)
1658 {if (isalt) act = (Sel.iovN ? " staging " : " assigned ");
1659 else act = " serving ";
1660 }
1661 TRACE(Stage, Sel.Resp.Data <<act <<Sel.Path.Val);
1662 return 0;
1663 }
1664
1665// No node so check if we have a sufficient number to continue. Note that we
1666// do not forward to a peer unless we have a suffficient number of local nodes.
1667//
1668 if (!selR.delay && NodeCnt < Config.SUPCount)
1669 {STMutex.UnLock();
1670 Record(Sel.Path.Val, "insufficient number of nodes", true);
1671 return Config.SUPDelay;
1672 }
1673
1674// Return delay if we should avoid selecting a peer manager
1675//
1676 if (selR.delay && selR.delay < Config.PSDelay)
1677 {STMutex.UnLock();
1678 Record(Sel.Path.Val, selR.reason);
1679 return selR.delay;
1680 }
1681
1682// At this point, we attempt a peer node selection (choice of last resort). Note
1683// that we are still holding the global lock! If we find a peer node we will
1684// swap it with the node lock.
1685//
1686 if (Sel.Opts & XrdCmsSelect::Peers)
1687 {const char *reason1 = selR.reason;
1688 int delay1 = selR.delay;
1689 bool noNet = selR.xNoNet;
1690 if ((mask = (pmask | amask) & peerHost)) nP = SelbyCost(mask, selR);
1691 if (nP)
1692 {nP->g2nLock(STMutex);
1693 Sel.Resp.DLen = nP->netIF.GetName(Sel.Resp.Data,Sel.Resp.Port,nType);
1694 if (!Sel.Resp.DLen) {nP->UnLock(); return Unreachable(Sel, false);}
1695 Sel.Resp.DLen++; Sel.smask = nP->NodeMask;
1696 if (Sel.iovN && Sel.iovP) nP->Send(Sel.iovP, Sel.iovN);
1697 nP->UnLock();
1698 TRACE(Stage, "Peer " <<Sel.Resp.Data <<" handling " <<Sel.Path.Val);
1699 return 0;
1700 }
1701 if (!selR.delay)
1702 {selR.delay = delay1; selR.reason = reason1; selR.xNoNet = noNet;}
1703 }
1704
1705// At this point we don't need the global lock so let it go.
1706//
1707 STMutex.UnLock();
1708
1709// At this point we either don't have enough nodes or simply can't handle this
1710//
1711 if (selR.delay)
1712 {Record(Sel.Path.Val, selR.reason);
1713 return selR.delay;
1714 }
1715
1716// Return appropriate error message
1717//
1718 if (selR.xNoNet) return Unreachable(Sel, true);
1719 return Unuseable(Sel);
1720}
1721
1722/******************************************************************************/
1723/* R e f C o u n t */
1724/******************************************************************************/
1725
1726// This snippet of code occurrs often enough so that we make it a macro as we
1727// want to execute this inline.
1728//
1729#define RefCount(sP, sPMulti, NeedSpace) \
1730 if (NeedSpace) {sP->RefTotW++; sP->RefW++;} \
1731 else {sP->RefTotR++; sP->RefR++;} \
1732 if (sPMulti && sP->Share && !sP->Shrem--) \
1733 {sP->RefW += sP->Shrip; sP->RefR += sP->Shrip; \
1734 sP->Shrem = sP->Share; sP->Shrin++; \
1735 }
1736
1737/******************************************************************************/
1738/* S e l b y C o s t */
1739/******************************************************************************/
1740
1741// Cost selection is used only for peer node selection as peers do not
1742// report a load and handle their own scheduling.
1743
1744// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1745
1746XrdCmsNode *XrdCmsCluster::SelbyCost(SMask_t mask, XrdCmsSelector &selR)
1747{
1748 XrdCmsNode *np, *sp = 0;
1749 bool Multi = false;
1750
1751// Scan for a node (sp points to the selected one)
1752//
1753 selR.Reset(); SelTcnt++;
1754 for (int i = 0; i <= STHi; i++)
1755 if ((np = NodeTab[i]) && (np->NodeMask & mask))
1756 {if (!(selR.needNet & np->hasNet)) {selR.xNoNet= true; continue;}
1757 selR.nPick++;
1758 if (np->isOffline) {selR.xOff = true; continue;}
1759 if (np->isBad) {selR.xSusp = true; continue;}
1760 if (selR.needSpace && np->isNoStage) {selR.xFull = true; continue;}
1761 if (!sp) sp = np;
1762 else{if (abs(sp->myCost - np->myCost) <= Config.P_fuzz)
1763 { if (selR.selPack)
1764 {if (--selR.selPack) sp=np;
1765 else break;
1766 }
1767 else if (selR.needSpace)
1768 {if (sp->RefW > (np->RefW+Config.DiskLinger))
1769 sp=np;
1770 }
1771 else if (sp->RefR > np->RefR) sp=np;
1772 }
1773 else if (sp->myCost > np->myCost) sp=np;
1774 Multi = true;
1775 }
1776 }
1777
1778// Check for overloaded node and return result
1779//
1780 if (!sp) return calcDelay(selR);
1781 RefCount(sp, Multi, selR.needSpace);
1782 return sp;
1783}
1784
1785/******************************************************************************/
1786/* S e l b y L o a d */
1787/******************************************************************************/
1788
1789// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1790
1791XrdCmsNode *XrdCmsCluster::SelbyLoad(SMask_t mask, XrdCmsSelector &selR)
1792{
1793 XrdCmsNode *np, *sp = 0;
1794 bool Multi = false, reqSS = (selR.needSpace & XrdCmsNode::allowsSS) != 0;
1795
1796// Scan for a node (preset possible, suspended, overloaded, full, and dead)
1797//
1798 selR.Reset(); SelTcnt++;
1799 for (int i = 0; i <= STHi; i++)
1800 if ((np = NodeTab[i]) && (np->NodeMask & mask))
1801 {if (!(selR.needNet & np->hasNet)) {selR.xNoNet= true; continue;}
1802 selR.nPick++;
1803 if (np->isOffline) {selR.xOff = true; continue;}
1804 if (np->isBad) {selR.xSusp = true; continue;}
1805 if (np->myLoad > Config.MaxLoad) {selR.xOvld = true; continue;}
1806 if (selR.needSpace && (np->DiskFree < np->DiskMinF
1807 || (reqSS && np->isNoStage)))
1808 {selR.xFull = true; continue;}
1809 if (!sp) sp = np;
1810 else{if (selR.needSpace)
1811 {if (abs(sp->myMass - np->myMass) <= Config.P_fuzz)
1812 {if (sp->RefW > (np->RefW+Config.DiskLinger)) sp=np;}
1813 else if (sp->myMass > np->myMass) sp=np;
1814 } else {
1815 if (abs(sp->myLoad - np->myLoad) <= Config.P_fuzz)
1816 {if (selR.selPack)
1817 {if (--selR.selPack) sp=np;
1818 else break;
1819 }
1820 else if (sp->RefR > np->RefR) sp=np;
1821 }
1822 else if (sp->myLoad > np->myLoad) sp=np;
1823 }
1824 Multi = true;
1825 }
1826 }
1827
1828// Check for overloaded node and return result
1829//
1830 if (!sp) return calcDelay(selR);
1831 RefCount(sp, Multi, selR.needSpace);
1832 return sp;
1833}
1834
1835/******************************************************************************/
1836/* S e l b y L o a d R */
1837/******************************************************************************/
1838
1839// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1840
1841XrdCmsNode *XrdCmsCluster::SelbyLoadR(SMask_t mask, XrdCmsSelector &selR)
1842{
1843 static std::random_device rand_dev;
1844 static std::default_random_engine generator(rand_dev());
1845
1846 XrdCmsNode *np = nullptr, *sp = nullptr;
1847 bool reqSS = (selR.needSpace & XrdCmsNode::allowsSS) != 0;
1848
1849 // Scan for a node (preset possible, suspended, overloaded, full, and dead)
1850
1851 selR.Reset();
1852 SelTcnt++;
1853
1854 int totWeight = 0;
1855
1856 for (int i = 0; i <= STHi; ++i) {
1857 NodeWeight[i] = 0; // make node unselectable first
1858
1859 if (!((np = NodeTab[i]) && (np->NodeMask & mask)))
1860 continue;
1861
1862 if (!(selR.needNet & np->hasNet)) { selR.xNoNet = true; continue; }
1863
1864 selR.nPick++;
1865
1866 if (np->isOffline) { selR.xOff = true; continue; }
1867 if (np->isBad) { selR.xSusp = true; continue; }
1868 if (np->myLoad > Config.MaxLoad) { selR.xOvld = true; continue; }
1869
1870 if (selR.needSpace) {
1871 if (np->DiskFree < np->DiskMinF || (reqSS && np->isNoStage)) {
1872 selR.xFull = true;
1873 continue;
1874 }
1875 }
1876
1877 // If node passes filters, give it a weight
1878 totWeight += Config.P_fuzz + (100 - np->myLoad);
1879 NodeWeight[i] = totWeight;
1880 }
1881
1882 std::uniform_int_distribution<int> distr(1, totWeight);
1883 int selected = distr(generator);
1884
1885 for (int i = 0; i <= STHi; ++i) {
1886 if (NodeWeight[i] < selected)
1887 continue;
1888
1889 sp = NodeTab[i];
1890 break;
1891 }
1892
1893 return sp ? sp : calcDelay(selR);
1894}
1895
1896/******************************************************************************/
1897/* S e l b y R e f */
1898/******************************************************************************/
1899
1900// Caller must have the STMutex locked. The returned node, if any, is unlocked.
1901
1902XrdCmsNode *XrdCmsCluster::SelbyRef(SMask_t mask, XrdCmsSelector &selR)
1903{
1904 XrdCmsNode *np, *sp = 0;
1905 bool Multi = false, reqSS = (selR.needSpace & XrdCmsNode::allowsSS) != 0;
1906
1907// Scan for a node (sp points to the selected one)
1908//
1909 selR.Reset(); SelTcnt++;
1910 for (int i = 0; i <= STHi; i++)
1911 if ((np = NodeTab[i]) && (np->NodeMask & mask))
1912 {if (!(selR.needNet & np->hasNet)) {selR.xNoNet= true; continue;}
1913 selR.nPick++;
1914 if (np->isOffline) {selR.xOff = true; continue;}
1915 if (np->isBad) {selR.xSusp = true; continue;}
1916 if (selR.needSpace && (np->DiskFree < np->DiskMinF
1917 || (reqSS && np->isNoStage)))
1918 {selR.xFull = true; continue;}
1919 if (!sp) sp = np;
1920 else {Multi = true;
1921 if (selR.selPack)
1922 {if (--selR.selPack) sp=np;
1923 else break;
1924 }
1925 else if (selR.needSpace)
1926 {if (sp->RefW > (np->RefW+Config.DiskLinger)) sp=np;}
1927 else if (sp->RefR > np->RefR) sp=np;
1928 }
1929 }
1930
1931// Check for overloaded node and return result
1932//
1933 if (!sp) return calcDelay(selR);
1934 RefCount(sp, Multi, selR.needSpace);
1935 return sp;
1936}
1937
1938/******************************************************************************/
1939/* S e l D F S */
1940/******************************************************************************/
1941
1942int XrdCmsCluster::SelDFS(XrdCmsSelect &Sel, SMask_t amask,
1943 SMask_t &pmask, SMask_t &smask, int isRW)
1944{
1945 EPNAME("SelDFS");
1946 static const SMask_t allNodes(~0);
1947 int oldOpts, rc;
1948
1949// The first task is to find out if the file exists somewhere. If we are doing
1950// local queries, then the answer will be immediate. Otherwise, forward it.
1951//
1952 if ((Sel.Opts & XrdCmsSelect::Refresh) || !(rc = Cache.GetFile(Sel, amask)))
1953 {if (!baseFS.Local())
1954 {CmsStateRequest QReq = {{Sel.Path.Hash, kYR_state, kYR_raw, 0}};
1955 TRACE(Files, "seeking " <<Sel.Path.Val);
1956 Cache.AddFile(Sel, 0);
1957 if (Sel.Opts & XrdCmsSelect::Refresh)
1959 Cluster.Broadsend(amask, QReq.Hdr, Sel.Path.Val, Sel.Path.Len+1);
1960 return 0;
1961 }
1962 if ((rc = baseFS.Exists(Sel.Path.Val, -Sel.Path.Len)) < 0)
1963 {Cache.AddFile(Sel, 0);
1964 Sel.Vec.bf = Sel.Vec.pf = Sel.Vec.wf = Sel.Vec.hf = 0;
1965 } else {
1966 Sel.Vec.hf = amask; Sel.Vec.wf = (isRW ? amask : 0);
1967 oldOpts = Sel.Opts;
1968 if (rc != CmsHaveRequest::Pending) Sel.Vec.pf = 0;
1969 else {Sel.Vec.pf = amask; Sel.Opts |= XrdCmsSelect::Pending;}
1970 Cache.AddFile(Sel, allNodes);
1971 Sel.Opts = oldOpts;
1972 }
1973 }
1974
1975// Screen out online requests where the file is pending
1976//
1977 if (Sel.Opts & XrdCmsSelect::Online && Sel.Vec.pf)
1978 {pmask = smask = 0;
1979 return 1;
1980 }
1981
1982// If the file is to be written and the files exists then it can't be a new file
1983//
1984 if (isRW && Sel.Vec.hf)
1985 {if (Sel.Opts & XrdCmsSelect::NewFile) return SelFail(Sel,eExists);
1986 if (Sel.Opts & XrdCmsSelect::Trunc) smask = 0;
1987 return 1;
1988 }
1989
1990// Final verification that we have something to select
1991//
1992 if (!Sel.Vec.hf
1993 && (!isRW || !(Sel.Opts & (XrdCmsSelect::Trunc | XrdCmsSelect::NewFile))))
1994 return SelFail(Sel, eNoEnt);
1995 return 1;
1996}
1997
1998/******************************************************************************/
1999/* s e n d A L i s t */
2000/******************************************************************************/
2001
2002// Single entry at a time, protected by STMutex in write mode!
2003
2004void XrdCmsCluster::sendAList(XrdLink *lp)
2005{
2006 static CmsTryRequest Req = {{0, kYR_try, 0, 0}, 0};
2007 static int HdrSize = sizeof(Req.Hdr) + sizeof(Req.sLen);
2008 static char *AltNext = AltMans;
2009 static struct iovec iov[4] = {{(caddr_t)&Req, (size_t)HdrSize},
2010 {0, 0},
2011 {AltMans, 0},
2012 {(caddr_t)"\0", 1}};
2013 int dlen;
2014
2015// Calculate what to send
2016//
2017 AltNext = AltNext + AltSize;
2018 if (AltNext >= AltMend)
2019 {AltNext = AltMans;
2020 iov[1].iov_len = 0;
2021 iov[2].iov_len = dlen = AltMend - AltMans;
2022 } else {
2023 iov[1].iov_base = (caddr_t)AltNext;
2024 iov[1].iov_len = AltMend - AltNext;
2025 iov[2].iov_len = AltNext - AltMans;
2026 dlen = iov[1].iov_len + iov[2].iov_len;
2027 }
2028
2029// Complete the request (account for trailing null character)
2030//
2031 dlen++;
2032 Req.Hdr.datalen = htons(static_cast<unsigned short>(dlen+sizeof(Req.sLen)));
2033 Req.sLen = htons(static_cast<unsigned short>(dlen));
2034
2035// Send the list of alternates (rotated once)
2036//
2037 lp->Send(iov, 4, dlen+HdrSize);
2038}
2039
2040/******************************************************************************/
2041/* s e t A l t M a n */
2042/******************************************************************************/
2043
2044// Single entry at a time, protected by STMutex in write mode!
2045
2046void XrdCmsCluster::setAltMan(int snum, XrdLink *lp, int port)
2047{
2048 XrdNetAddr altAddr = *(lp->NetAddr());
2049 char *ap = &AltMans[snum*AltSize];
2050 int i;
2051
2052// Preset the buffer and pre-screen the port number
2053//
2054 if (!port || (port > 0x0000ffff)) port = Config.PortTCP;
2055 memset(ap, int(' '), AltSize);
2056
2057// First tr to use the hostname:port which may be too large (unlikely). Else
2058// Insert the ip address of this node into the list of nodes. We made sure that
2059// the size of he buffer was big enough so no need to check for overflow.
2060//
2061 altAddr.Port(port);
2062 if (Config.DoHnTry) i = altAddr.Format(ap, AltSize, XrdNetAddr::fmtName);
2063 else i = 0;
2064 if (!i) i=altAddr.Format(ap,AltSize,XrdNetAddr::fmtAddr,XrdNetAddr::prefipv4);
2065 ap[i] = ' ';
2066
2067// Compute new fence
2068//
2069 if (ap >= AltMend) {AltMend = ap + AltSize; AltMent = snum;}
2070}
2071
2072/******************************************************************************/
2073/* U n r e a c h a b l e */
2074/******************************************************************************/
2075
2076int XrdCmsCluster::Unreachable(XrdCmsSelect &Sel, bool none)
2077{
2079 const char *Amode = (Sel.Opts & XrdCmsSelect::Write ? "write" : "read");
2080 const char *Xmode = (Sel.Opts & XrdCmsSelect::Online ? "immediately " : "");
2081
2082 if (none)
2083 {Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
2084 "No servers are reachable via %s network to %s%s the file.",
2085 XrdNetIF::Name(nType), Xmode, Amode) + 1;
2086 } else {
2087 Sel.Resp.DLen = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data)-1,
2088 "Eligible server is unreachable via %s network to %s%s the file.",
2089 XrdNetIF::Name(nType), Xmode, Amode) + 1;
2090 }
2091 Sel.Resp.Port = kYR_ENETUNREACH;
2092 return EReplete;
2093}
2094
2095/******************************************************************************/
2096/* U n u s e a b l e */
2097/******************************************************************************/
2098
2099int XrdCmsCluster::Unuseable(XrdCmsSelect &Sel)
2100{
2101 const char *Amode = (Sel.Opts & XrdCmsSelect::Write ? "write" : "read");
2102 const char *Xmode = (Sel.Opts & XrdCmsSelect::Online ? "immediately " : "");
2103 const char *EType = (Sel.Opts & XrdCmsSelect::isDir ? "directory" : "file");
2104
2105 int n = snprintf(Sel.Resp.Data, sizeof(Sel.Resp.Data),
2106 "No servers are available to %s%s the %s.",
2107 Xmode, Amode, EType);
2108 if (n < (int)sizeof(Sel.Resp.Data)) Sel.Resp.DLen = n+1;
2109 else Sel.Resp.DLen = sizeof(Sel.Resp.Data);
2110
2111 Sel.Resp.Port = kYR_ENOENT;
2112 return EReplete;
2113}
void Usage(const char *msg)
#define DEBUG(x)
#define EPNAME(x)
#define RefCount(sP, sPMulti, NeedSpace)
#define QTRACE(act)
#define STMax
unsigned long long SMask_t
#define FULLMASK
#define stat(a, b)
Definition XrdPosix.hh:96
bool Debug
bool Exists
XrdOucString Path
struct myOpts opts
if(ec< 0) ec
size_t strlcpy(char *dst, const char *src, size_t sz)
#define TRACE(act, x)
Definition XrdTrace.hh:63
int Exists(XrdCmsRRData &Arg, XrdCmsPInfo &Who, int noLim=0)
static int Present(const char *hName, XrdOucTList *bList=0, char *rbuff=0, int rblen=0)
int GetFile(XrdCmsSelect &Sel, SMask_t mask)
int AddFile(XrdCmsSelect &Sel, SMask_t mask)
XrdCmsPList_Anchor Paths
int UnkFile(XrdCmsSelect &Sel, SMask_t mask)
void Drop(SMask_t mask, int SNum, int xHi)
int WT4File(XrdCmsSelect &Sel, SMask_t mask)
static XrdCmsClustID * AddID(const char *cID)
static SMask_t Mask(const char *cID)
XrdCmsNode * RemNode(XrdCmsNode *nP)
static XrdCmsClustID * Find(const char *cID)
bool AddNode(XrdCmsNode *nP, bool isMan)
SMask_t getMask(const XrdNetAddr *addr)
void Space(XrdCms::SpaceData &sData, SMask_t smask)
int Broadsend(SMask_t smask, XrdCms::CmsRRHdr &Hdr, void *Data, int Dlen)
int Select(XrdCmsSelect &Sel)
int Locate(XrdCmsSelect &Sel)
static const int EReplete
void ResetRef(SMask_t smask, bool isLocked=false)
SMask_t Broadcast(SMask_t, const struct iovec *, int, int tot=0)
XrdCmsSelected * List(SMask_t mask, CmsLSOpts opts, bool &oksel)
static const int RetryErr
XrdCmsNode * Add(XrdLink *lp, int dport, int Status, int sport, const char *theNID, const char *theIF)
static const int NotFound
static const int Wait4CBk
void Remove(XrdCmsNode *theNode)
int Stats(char *bfr, int bln)
virtual void BlackList(XrdOucTList *blP)
friend class XrdCmsDrop
int Statt(char *bfr, int bln)
static const int RepStat_shr
static const int RepStat_frq
XrdCmsDrop(int nid, int inst)
XrdCmsNode * nodeP
XrdCmsDrop(XrdCmsNode *nP)
unsigned int Hash
Definition XrdCmsKey.hh:53
char * Val
Definition XrdCmsKey.hh:52
short Len
Definition XrdCmsKey.hh:54
static const char allowsRW
Definition XrdCmsNode.hh:84
char * Ident
Definition XrdCmsNode.hh:61
void Delete(XrdSysRWLock &gMutex)
void n2gLock(XrdSysRWLock &gMutex, bool rdlock=false)
int Send(const char *buff, int blen=0)
static const char allowsSS
Definition XrdCmsNode.hh:85
void unRef()
void Lock()
static const char isDisabled
Definition XrdCmsNode.hh:80
int isNode(SMask_t smask)
SMask_t Mask()
char * Name()
char isOffline
Definition XrdCmsNode.hh:64
void g2nLock(XrdSysRWLock &gMutex)
static const char isSuspend
Definition XrdCmsNode.hh:81
unsigned int DiskTotal
Definition XrdCmsNode.hh:87
int ID(int &INum)
char isNoStage
Definition XrdCmsNode.hh:66
void Disc(const char *reason=0, int needLock=1)
void UnLock()
void setName(XrdLink *lnkp, const char *theIF, int port)
static const char isDoomed
Definition XrdCmsNode.hh:82
static const char isBlisted
Definition XrdCmsNode.hh:79
SMask_t ssvec
SMask_t rovec
SMask_t rwvec
int Find(const char *pname, XrdCmsPInfo &masks)
SMask_t rwVec
Definition XrdCmsRRQ.hh:59
void Statistics(Info &Data)
Definition XrdCmsRRQ.hh:144
static const char * Type(RoleID rid)
Definition XrdCmsRole.hh:78
struct XrdCmsSelect::@93 Resp
struct XrdCmsSelect::@92 Vec
XrdCmsRRQInfo * InfoP
XrdCmsKey Path
static const int IdentSize
char Ident[IdentSize]
XrdCmsSelected * next
const char * reason
void Update(StateType StateT, int ActivVal, int StageVal=0)
void Set(int ncount)
static const int prefipv4
Use if mapped IPV4 actual format.
int Format(char *bAddr, int bLen, fmtUse fmtType=fmtAuto, int fmtOpts=0)
@ fmtAddr
Address using suitable ipv4 or ipv6 format.
@ fmtName
Hostname if it is resolvable o/w use fmtAddr.
int Port(int pNum=-1)
char Mask()
Definition XrdNetIF.hh:242
static const char * Name(ifType ifT)
Definition XrdNetIF.hh:266
int Port()
Definition XrdNetIF.hh:276
bool HasDest(ifType ifT=PublicV6)
Definition XrdNetIF.hh:221
int GetName(const char *&name, ifType ifT=PublicV6)
Definition XrdNetIF.hh:102
int GetDest(char *dest, int dlen, ifType ifT=PublicV6, bool prefn=false)
Definition XrdNetIF.cc:389
ifType
The enum that is used to index into ifData to get appropriate interface.
Definition XrdNetIF.hh:64
@ PrivateIF
Definition XrdNetIF.hh:68
static int Pack(struct iovec **, const char *, unsigned short &buff)
Definition XrdOucPup.cc:52
void Schedule(XrdJob *jp)
int Emsg(const char *esfx, int ecode, const char *text1, const char *text2=0)
static void Snooze(int seconds)
XrdCmsRRQ RRQ
Definition XrdCmsRRQ.cc:55
XrdCmsCache Cache
static const unsigned char kYR_Version
Definition YProtocol.hh:80
kXR_unt16 datalen
Definition YProtocol.hh:86
@ kYR_ENETUNREACH
Definition YProtocol.hh:158
@ kYR_noReplicas
Definition YProtocol.hh:164
@ kYR_ENOENT
Definition YProtocol.hh:150
@ kYR_RWConflict
Definition YProtocol.hh:163
static const int CMS_isSuper
static const int CMS_noStage
kXR_char modifier
Definition YProtocol.hh:85
XrdScheduler * Sched
XrdCmsBaseFS baseFS
XrdSysError Say
XrdCmsState CmsState
static const int CMS_isMan
XrdCmsConfig Config
@ kYR_state
Definition YProtocol.hh:110
@ kYR_usage
Definition YProtocol.hh:116
static const int CMS_isPeer
static const int CMS_Suspend
long long luSlow
Definition XrdCmsRRQ.hh:139
long long rdSlow
Definition XrdCmsRRQ.hh:141
long long Resp
Definition XrdCmsRRQ.hh:136
long long luFast
Definition XrdCmsRRQ.hh:138
long long Add2Q
Definition XrdCmsRRQ.hh:134
long long Multi
Definition XrdCmsRRQ.hh:137
long long rdFast
Definition XrdCmsRRQ.hh:140
long long PBack
Definition XrdCmsRRQ.hh:135