XRootD
XrdXmlMetaLink.cc
Go to the documentation of this file.
1 /******************************************************************************/
2 /* */
3 /* X r d X m l M e t a L i n k . h h */
4 /* */
5 /* (c) 2015 by the Board of Trustees of the Leland Stanford, Jr., University */
6 /* Produced by Andrew Hanushevsky for Stanford University under contract */
7 /* DE-AC02-76-SFO0515 with the Department of Energy */
8 /* */
9 /* This file is part of the XRootD software suite. */
10 /* */
11 /* XRootD is free software: you can redistribute it and/or modify it under */
12 /* the terms of the GNU Lesser General Public License as published by the */
13 /* Free Software Foundation, either version 3 of the License, or (at your */
14 /* option) any later version. */
15 /* */
16 /* XRootD is distributed in the hope that it will be useful, but WITHOUT */
17 /* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
18 /* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
19 /* License for more details. */
20 /* */
21 /* You should have received a copy of the GNU Lesser General Public License */
22 /* along with XRootD in a file called COPYING.LESSER (LGPL license) and file */
23 /* COPYING (GPL license). If not, see <http://www.gnu.org/licenses/>. */
24 /* */
25 /* The copyright holder's institutional names and contributor's names may not */
26 /* be used to endorse or promote products derived from this software without */
27 /* specific prior written permission of the institution or contributor. */
28 /******************************************************************************/
29 
30 #include <cctype>
31 #include <cstdio>
32 #include <unistd.h>
33 #include <limits.h>
34 
35 #include "XrdSys/XrdSysAtomics.hh"
36 #include "XrdSys/XrdSysE2T.hh"
37 #include "XrdSys/XrdSysFD.hh"
38 #include "XrdSys/XrdSysPthread.hh"
39 #include "XrdXml/XrdXmlMetaLink.hh"
40 
41 /******************************************************************************/
42 /* L o c a l D e f i n i t i o n s */
43 /******************************************************************************/
44 
45 #define SizeOfVec(x) sizeof(x)/sizeof(x[0])
46 
47 namespace
48 {
49 char tmpPath[40];
50 
51 unsigned int GenTmpPath()
52 {
53 // The below will not generate a result more than 31 characters.
54 //
55  snprintf(tmpPath, sizeof(tmpPath), "/tmp/.MetaLink%8x.%d.",
56  static_cast<int>(time(0)), static_cast<int>(getpid()));
57  return 0;
58 }
59 
60 XrdSysMutex xMutex;
61 
62 unsigned int seqNo = GenTmpPath();
63 }
64 
65 /******************************************************************************/
66 /* L o c a l C l a s s e s */
67 /******************************************************************************/
68 
69 namespace
70 {
71 class CleanUp
72 {
73 public:
74 
75 XrdXmlReader **delRDR;
76 char *delTFN;
77 
78  CleanUp() : delRDR(0), delTFN(0) {}
79  ~CleanUp() {if (delRDR) {delete *delRDR; *delRDR = 0;}
80  if (delTFN) unlink(delTFN);
81  }
82 };
83 
84 class vecMon
85 {
86 public:
87 
88  vecMon(char **vec, int vecn)
89  : theVec(vec), vecNum(vecn) {}
90  ~vecMon() {if (theVec)
91  for (int i = 0; i < vecNum; i++)
92  if (theVec[i]) free(theVec[i]);
93  }
94 private:
95 char **theVec;
96 int vecNum;
97 };
98 }
99 
100 /******************************************************************************/
101 /* C o n v e r t */
102 /******************************************************************************/
103 
104 XrdOucFileInfo *XrdXmlMetaLink::Convert(const char *fname, int blen)
105 {
106  static const char *mlV3NS = "http://www.metalinker.org/";
107  static const char *mlV4NS = "urn:ietf:params:xml:ns:metalink";
108  static const char *mlV3[] = {"metalink", "files", 0};
109  static const char *mTag[] = {"", "metalink", 0};
110  static const char *mAtr[] = {"xmlns", 0};
111  const char *scope = "metalink";
112  char *mVal[] = {0};
113  CleanUp onReturn;
114  XrdOucFileInfo *fP;
115  const char *gLFN;
116  char *colon, gHdr[272];
117  bool chkG;
118 
119 // If we are converting a buffer, then generate the file
120 //
121  if (blen > 0)
122  {if (!PutFile(fname, blen)) return 0;
123  onReturn.delTFN = tmpFn;
124  fname = tmpFn;
125  }
126 
127 // Check if we should add a global file entry
128 //
129  if (rdHost && (rdProt || (prots && (colon = index(prots,':')))))
130  {if (!rdProt) {rdProt = prots; *(colon+1) = 0;}
131  else colon = 0;
132  snprintf(gHdr, sizeof(gHdr), "%s//%s/", rdProt, rdHost);
133  if (colon) *(colon+1) = ':';
134  chkG = true;
135  } else chkG = false;
136 
137 // Get a file reader
138 //
139  if (!(reader = XrdXmlReader::GetReader(fname, encType)))
140  {eCode = errno;
141  snprintf(eText, sizeof(eText), "%s trying to read %s",
142  (errno ? XrdSysE2T(errno) : "Unknown error"), fname);
143  return 0;
144  }
145 
146 // Make sure we delete the reader should we return
147 //
148  onReturn.delRDR = &reader;
149 
150 // We must find the metalink tag
151 //
152  if (!reader->GetElement(mTag, true))
153  {GetRdrError("looking for 'metalink' tag");
154  return 0;
155  }
156 
157 // The input can be in metalink 3 or metalink 4 format. The metalink tag will
158 // tell us which one it is. It better be in the document with the xmlns attribute
159 //
160  if (!reader->GetAttributes(mAtr, mVal))
161  {strcpy(eText, "Required metalink tag attribute 'xmlns' not found");
162  eCode = ENOMSG;
163  return 0;
164  }
165 
166 // The namespace tells us what format we are using here. For v3 formt we must
167 // alignh ourselves on the "files" tag. There can only be one of those present.
168 //
169  if (!strcmp(mVal[0], mlV3NS))
170  {if (!reader->GetElement(mlV3, true))
171  GetRdrError("looking for 'files' tag");
172  scope = "files";
173  }
174  else if ( strcmp((const char *)mVal[0], mlV4NS))
175  {strcpy(eText, "Metalink format not supported");
176  eCode = EPFNOSUPPORT;
177  }
178 
179 // Check if can continue
180 //
181  free(mVal[0]);
182  if (eCode) return 0;
183 
184 // Get one or more files
185 //
186  currFile = 0; fileCnt = 0; noUrl = true;
187  do{if (!GetFile(scope)) break;
188  currFile = new XrdOucFileInfo;
189  if (GetFileInfo("file"))
190  {if (lastFile) lastFile ->nextFile = currFile;
191  else fileList = currFile;
192  lastFile = currFile;
193  if (chkG && (gLFN = currFile->GetLfn()))
194  {char lfnBuff[2048];
195  snprintf(lfnBuff, sizeof(lfnBuff), "%s%s", gHdr, gLFN);
196  currFile->AddUrl(lfnBuff, 0, INT_MAX);
197  currFile->AddProtocol(rdProt);
198  }
199  currFile = 0;
200  fileCnt++; noUrl = true;
201  }
202  } while(doAll);
203 
204 // The loop ends when we cannot find a file tag. So, the current file is invalid
205 //
206  if (currFile) {delete currFile; currFile = 0;}
207 
208 // Check if we have any files at all
209 //
210  if (!fileCnt)
211  {strcpy(eText, "No applicable urls specified for the file entry");
212  eCode = EDESTADDRREQ;
213  }
214 
215 // If this is an all call then return to execute the postantem
216 //
217  fP = fileList; lastFile = fileList = 0;
218  if (doAll) return fP;
219 
220 // Check if we have clean status. If not, undo all we have and return failure
221 //
222  if (!eCode) return fP;
223  if (fP) delete fP;
224  return 0;
225 }
226 
227 /******************************************************************************/
228 /* C o n v e r t A l l */
229 /******************************************************************************/
230 
231 XrdOucFileInfo **XrdXmlMetaLink::ConvertAll(const char *fname, int &count,
232  int blen)
233 {
234  CleanUp onReturn;
235  XrdOucFileInfo *fP, **fvP;
236 
237 // Indicate this is a call from here
238 //
239  doAll = true;
240  count = 0;
241 
242 // If we are converting a buffer, then generate the file
243 //
244  if (blen > 0)
245  {if (!PutFile(fname, blen)) return 0;
246  onReturn.delTFN = tmpFn;
247  fname = tmpFn;
248  }
249 
250 // Perform the conversion
251 //
252  if (!(fP = Convert(fname))) return 0;
253 
254 // Check if we have clean status, if not return nothing
255 //
256  if (eCode)
257  {XrdOucFileInfo *fnP = fP->nextFile;
258  while((fP = fnP))
259  {fnP = fP->nextFile;
260  delete fP;
261  }
262  return 0;
263  }
264 
265 // Return a vector of the file info objects
266 //
267  fvP = new XrdOucFileInfo* [fileCnt];
268  for (int i = 0; i < fileCnt; i++) {fvP[i] = fP; fP = fP->nextFile;}
269  count = fileCnt;
270  return fvP;
271 }
272 
273 /******************************************************************************/
274 /* D e l e t e A l l */
275 /******************************************************************************/
276 
278 {
279 // Delete each object in the vector
280 //
281  for (int i = 0; i < vecn; i++)
282  delete vecp[i];
283 
284 // Now delete the vector
285 //
286  delete []vecp;
287 }
288 
289 /******************************************************************************/
290 /* Private: G e t F i l e */
291 /******************************************************************************/
292 
293 bool XrdXmlMetaLink::GetFile(const char *scope)
294 {
295  const char *fileElem[] = {scope, "file", 0};
296  const char *etext;
297  bool needFile = fileCnt == 0;
298 
299 // We align on "file" this is true at this point regardless of version.
300 //
301  if (!reader->GetElement(fileElem, needFile))
302  {if ((etext = reader->GetError(eCode)))
303  {size_t len = strlen(etext);
304  if(len > sizeof(eText)-1) len=sizeof(eText)-1;
305  memcpy(eText, etext, len);
306  eText[len]=0;
307  }
308  return false;
309  }
310 
311 // We are now aligned on a file tag
312 //
313  return true;
314 }
315 
316 /******************************************************************************/
317 /* Private: G e t F i l e I n f o */
318 /******************************************************************************/
319 
320 bool XrdXmlMetaLink::GetFileInfo(const char *scope)
321 {
322  static const char *fileScope = "file";
323  const char *fsubElem[] = {scope, "url", "hash", "size",
324  "verification", "resources", "glfn", 0};
325  int ePos;
326 
327  if(strncmp(scope, fileScope, 4) == 0) GetName();
328 
329 // Process the elements in he file section. Both formats have the same tags,
330 // though not the same attributes. We will take care of the differences later.
331 //
332  while((ePos = reader->GetElement(fsubElem)))
333  switch(ePos)
334  {case 1: if (!GetUrl()) return false;
335  break;
336  case 2: if (!GetHash()) return false;
337  break;
338  case 3: if (!GetSize()) return false;
339  break;
340  case 4: GetFileInfo("verification");
341  if (eCode) return false;
342  break;
343  case 5: GetFileInfo("resources");
344  if (eCode) return false;
345  break;
346  case 6: if (!GetGLfn()) return false;
347  break;
348  default: break;
349  }
350 
351 // Return success if we had at least one url
352 //
353  return !noUrl;
354 }
355 
356 /******************************************************************************/
357 /* Private: G e t G L f n */
358 /******************************************************************************/
359 
360 bool XrdXmlMetaLink::GetGLfn()
361 {
362  static const char *gAttr[] = {"name", 0};
363  char *gAVal[] = {0};
364  vecMon monVec(gAVal, SizeOfVec(gAVal));
365 
366 // Get the name
367 //
368  if (!reader->GetAttributes(gAttr, gAVal))
369  {strcpy(eText, "Required glfn tag name attribute not found");
370  eCode = ENOMSG;
371  return false;
372  }
373 
374 // Add the the glfn
375 //
376  currFile->AddLfn(gAVal[0]);
377 
378 // All done
379 //
380  return true;
381 }
382 
383 /******************************************************************************/
384 /* Private: G e t H a s h */
385 /******************************************************************************/
386 
387 bool XrdXmlMetaLink::GetHash()
388 {
389  static const char *hAttr[] = {"type", 0};
390  char *hAVal[] = {0};
391  vecMon monVec(hAVal, SizeOfVec(hAVal));
392  char *value;
393 
394 // Get the hash type
395 //
396  if (!reader->GetAttributes(hAttr, hAVal))
397  {strcpy(eText, "Required hash tag type attribute not found");
398  eCode = ENOMSG;
399  return false;
400  }
401 
402 // Now get the hash value
403 //
404  if (!(value = reader->GetText("hash", true))) return false;
405 
406 // Add a new digest
407 //
408  currFile->AddDigest(hAVal[0], value);
409 
410 // All done
411 //
412  free(value);
413  return true;
414 }
415 
416 /******************************************************************************/
417 /* G e t R d r E r r o r */
418 /******************************************************************************/
419 
420 void XrdXmlMetaLink::GetRdrError(const char *why)
421 {
422  const char *etext = reader->GetError(eCode);
423 
424  if (etext)
425  {size_t len = strlen(etext);
426  if(len > sizeof(eText)-1) len = sizeof(eText)-1;
427  memcpy(eText, etext, len);
428  eText[len]=0;
429  }
430  else {snprintf(eText, sizeof(eText), "End of xml while %s", why);
431  eCode = EIDRM;
432  }
433 }
434 
435 /******************************************************************************/
436 /* Private: G e t S i z e */
437 /******************************************************************************/
438 
439 bool XrdXmlMetaLink::GetSize()
440 {
441  char *eP, *value;
442  long long fsz;
443 
444 // Now get the size value
445 //
446  if (!(value = reader->GetText("size", true))) return false;
447 
448 // Convert size, it must convert clean and be non-negatie
449 //
450  fsz = strtoll(value, &eP, 10);
451  if (fsz < 0 || *eP != 0)
452  {snprintf(eText,sizeof(eText), "Size tag value '%s' is invalid", value);
453  eCode = EINVAL;
454  free(value);
455  return false;
456  }
457 
458 // Set the size and return
459 //
460  currFile->SetSize(fsz);
461  free(value);
462  return true;
463 }
464 
465 /******************************************************************************/
466 /* Private: G e t U r l */
467 /******************************************************************************/
468 
469 bool XrdXmlMetaLink::GetUrl()
470 {
471  static const char *uAttr[] = {"location", "priority", "preference", 0};
472  char *uAVal[] = {0, 0, 0};
473  vecMon monVec(uAVal, SizeOfVec(uAVal));
474  char *value;
475  int prty = 0;
476 
477 // Get the optional attributes
478 //
479  reader->GetAttributes(uAttr, uAVal);
480 
481 // Now get the url value. There might be one, that is valid and we ignore it.
482 //
483  if (!(value = reader->GetText("url"))) return true;
484 
485 // Check if we need to screen url protocols
486 //
487  if (!UrlOK(value))
488  {free(value);
489  return true;
490  }
491 
492 // Process priority or preference (we ignore errors here)
493 //
494  if (uAVal[1]) prty = atoi(uAVal[1]);
495  else if (uAVal[2])
496  {prty = 100 - atoi(uAVal[2]);
497  if (prty < 0) prty = 0;
498  }
499 
500 // Add the url to the flle
501 //
502  currFile->AddUrl(value, uAVal[0], prty);
503  free(value);
504 
505 // All done
506 //
507  noUrl = false;
508  return true;
509 }
510 
511 /******************************************************************************/
512 /* Private: G e t N a m e */
513 /******************************************************************************/
514 
515 void XrdXmlMetaLink::GetName()
516 {
517  static const char *mAtr[] = {"name", 0};
518  char *mVal[] = {0};
519  reader->GetAttributes(mAtr, mVal);
520  currFile->AddFileName(mVal[0]);
521  free(mVal[0]);
522 }
523 
524 /******************************************************************************/
525 /* Private: P u t F i l e */
526 /******************************************************************************/
527 
528 bool XrdXmlMetaLink::PutFile(const char *buff, int blen)
529 {
530  static const int oFlags = O_EXCL | O_CREAT | O_TRUNC | O_WRONLY;
531  const char *what = "opening";
532  unsigned int fSeq;
533  int fd;
534 
535 // Get a unique sequence number
536 //
537  AtomicBeg(xMutex);
538  fSeq = AtomicInc(seqNo);
539  AtomicEnd(xMutex);
540 
541 // Generate a unique filepath. Unfortunately, mktemp is unsafe and mkstemp may
542 // leak a file descriptor. So, we roll our own using above sequence number.
543 // Note that the target buffer is 64 characters which is suffcient for us.
544 //
545  snprintf(tmpFn, sizeof(tmpFn), "%s%u", tmpPath, fSeq);
546 
547 // Open the file for output, write out the buffer, and close the file
548 //
549  if ((fd = XrdSysFD_Open(tmpFn, oFlags, S_IRUSR|S_IWUSR)) > 0)
550  {what = "writing";
551  if (write(fd, buff, blen) == blen)
552  {what = "closing";
553  if (!close(fd)) return true;
554  }
555  }
556 
557 // We failed
558 //
559  eCode = errno;
560  snprintf(eText, sizeof(eText), "%s %s %s", XrdSysE2T(eCode), what, tmpFn);
561  unlink(tmpFn);
562  return false;
563 }
564 
565 /******************************************************************************/
566 /* Private: U r l O K */
567 /******************************************************************************/
568 
569 bool XrdXmlMetaLink::UrlOK(char *url)
570 {
571  char *colon, pBuff[16];
572  int n;
573 
574 // Find the colon and get the length of the protocol
575 //
576  if (!(colon = index(url, ':'))) return false;
577  n = colon - url + 1;
578  if (n >= (int)sizeof(pBuff)) return false;
579  strncpy(pBuff, url, n);
580  pBuff[n] = 0;
581 
582 // Add this protocol to the list we found
583 //
584  currFile->AddProtocol(pBuff);
585 
586 // Return whether or not this os one of the acceptable protocols
587 //
588  if (prots) return (strstr(prots, pBuff) != 0);
589  return true;
590 }
int unlink(const char *path)
ssize_t write(int fildes, const void *buf, size_t nbyte)
#define close(a)
Definition: XrdPosix.hh:43
#define AtomicInc(x)
#define AtomicBeg(Mtx)
#define AtomicEnd(Mtx)
const char * XrdSysE2T(int errcode)
Definition: XrdSysE2T.cc:104
XrdOucFileInfo * nextFile
Link field to simply miltiple file processing.
void AddFileName(const char *filename)
void AddProtocol(const char *protname)
void AddLfn(const char *lfn)
void AddUrl(const char *url, const char *cntry=0, int prty=0, bool fifo=true)
void AddDigest(const char *hname, const char *hval)
void SetSize(long long fsz)
const char * GetLfn()
virtual char * GetText(const char *ename, bool reqd=false)=0
static XrdXmlReader * GetReader(const char *fname, const char *enc=0, const char *impl=0)
Definition: XrdXmlReader.cc:43
virtual int GetElement(const char **ename, bool reqd=false)=0
virtual const char * GetError(int &ecode)=0
virtual bool GetAttributes(const char **aname, char **aval)=0