XRootD
XrdClZipArchive.cc
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // Copyright (c) 2011-2014 by European Organization for Nuclear Research (CERN)
3 // Author: Michal Simon <michal.simon@cern.ch>
4 //-----------------------------------------------------------------------------
5 // This file is part of the XRootD software suite.
6 //
7 // XRootD is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Lesser General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // XRootD is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with XRootD. If not, see <http://www.gnu.org/licenses/>.
19 //
20 // In applying this licence, CERN does not waive the privileges and immunities
21 // granted to it by virtue of its status as an Intergovernmental Organization
22 // or submit itself to any jurisdiction.
23 //-----------------------------------------------------------------------------
24 
27 #include "XrdCl/XrdClZipArchive.hh"
28 #include "XrdCl/XrdClLog.hh"
29 #include "XrdCl/XrdClDefaultEnv.hh"
30 #include "XrdCl/XrdClConstants.hh"
31 #include "XrdCl/XrdClUtils.hh"
33 
34 #include <sys/stat.h>
35 
36 namespace XrdCl
37 {
38  using namespace XrdZip;
39 
40  //---------------------------------------------------------------------------
41  // Read data from a given file
42  //---------------------------------------------------------------------------
43  template<typename RSP>
45  const std::string &fn,
46  uint64_t relativeOffset,
47  uint32_t size,
48  void *usrbuff,
49  ResponseHandler *usrHandler,
50  uint16_t timeout )
51  {
52  if( me.openstage != ZipArchive::Done || !me.archive.IsOpen() )
54 
55  Log *log = DefaultEnv::GetLog();
56 
57  auto cditr = me.cdmap.find( fn );
58  if( cditr == me.cdmap.end() )
60  errNotFound, "File not found." );
61 
62  CDFH *cdfh = me.cdvec[cditr->second].get();
63 
64  // check if the file is compressed, for now we only support uncompressed and inflate/deflate compression
65  if( cdfh->compressionMethod != 0 && cdfh->compressionMethod != Z_DEFLATED )
67  0, "The compression algorithm is not supported!" );
68 
69  // Now the problem is that at the beginning of our
70  // file there is the Local-file-header, which size
71  // is not known because of the variable size 'extra'
72  // field, so we need to know the offset of the next
73  // record and shift it by the file size.
74  // The next record is either the next LFH (next file)
75  // or the start of the Central-directory.
76  uint64_t cdOffset = me.zip64eocd ? me.zip64eocd->cdOffset : me.eocd->cdOffset;
77  uint64_t nextRecordOffset = ( cditr->second + 1 < me.cdvec.size() ) ?
78  CDFH::GetOffset( *me.cdvec[cditr->second + 1] ) : cdOffset;
79  uint64_t filesize = cdfh->compressedSize;
80  if( filesize == std::numeric_limits<uint32_t>::max() && cdfh->extra )
81  filesize = cdfh->extra->compressedSize;
82  uint16_t descsize = cdfh->HasDataDescriptor() ?
83  DataDescriptor::GetSize( cdfh->IsZIP64() ) : 0;
84  uint64_t fileoff = nextRecordOffset - filesize - descsize;
85  uint64_t offset = fileoff + relativeOffset;
86  uint64_t uncompressedSize = cdfh->uncompressedSize;
87  if( uncompressedSize == std::numeric_limits<uint32_t>::max() && cdfh->extra )
88  uncompressedSize = cdfh->extra->uncompressedSize;
89  uint64_t sizeTillEnd = relativeOffset > uncompressedSize ?
90  0 : uncompressedSize - relativeOffset;
91  if( size > sizeTillEnd ) size = sizeTillEnd;
92 
93  // if it is a compressed file use ZIP cache to read from the file
94  if( cdfh->compressionMethod == Z_DEFLATED )
95  {
96  log->Dump( ZipMsg, "[%p] Reading compressed data.", &me );
97  // check if respective ZIP cache exists
98  bool empty = me.zipcache.find( fn ) == me.zipcache.end();
99  // if the entry does not exist, it will be created using
100  // default constructor
101  ZipCache &cache = me.zipcache[fn];
102 
103  if( relativeOffset > uncompressedSize )
104  {
105  // we are reading past the end of file,
106  // we can serve the request right away!
107  RSP *r = new RSP( relativeOffset, 0, usrbuff );
108  AnyObject *rsp = new AnyObject();
109  rsp->Set( r );
110  usrHandler->HandleResponse( new XRootDStatus(), rsp );
111  return XRootDStatus();
112  }
113 
114  uint32_t sizereq = size;
115  if( relativeOffset + size > uncompressedSize )
116  sizereq = uncompressedSize - relativeOffset;
117  cache.QueueReq( relativeOffset, sizereq, usrbuff, usrHandler );
118 
119  // if we have the whole ZIP archive we can populate the cache
120  // straight away
121  if( empty && me.buffer)
122  {
123  auto begin = me.buffer.get() + fileoff;
124  auto end = begin + filesize ;
125  buffer_t buff( begin, end );
126  cache.QueueRsp( XRootDStatus(), 0, std::move( buff ) );
127  return XRootDStatus();
128  }
129 
130  // if we don't have the data we need to issue a remote read
131  if( !me.buffer )
132  {
133  if( relativeOffset > filesize ) return XRootDStatus(); // there's nothing to do,
134  // we already have all the data locally
135  uint32_t rdsize = size;
136  // check if this is the last read (we reached the end of
137  // file from user perspective)
138  if( relativeOffset + size >= uncompressedSize )
139  {
140  // if yes, make sure we readout all the compressed data
141  // Note: In a patological case the compressed size may
142  // be greater than the uncompressed size
143  rdsize = filesize > relativeOffset ?
144  filesize - relativeOffset :
145  0;
146  }
147  // make sure we are not reading past the end of
148  // compressed data
149  if( relativeOffset + size > filesize )
150  rdsize = filesize - relativeOffset;
151 
152 
153  // now read the data ...
154  auto rdbuff = std::make_shared<ZipCache::buffer_t>( rdsize );
155  Pipeline p = XrdCl::RdWithRsp<RSP>( me.archive, offset, rdbuff->size(), rdbuff->data() ) >>
156  [relativeOffset, rdbuff, &cache, &me]( XRootDStatus &st, RSP &rsp )
157  {
158  Log *log = DefaultEnv::GetLog();
159  log->Dump( ZipMsg, "[%p] Read %u bytes of remote data at offset %llu.",
160  &me, rsp.GetLength(), (unsigned long long) rsp.GetOffset() );
161  cache.QueueRsp( st, relativeOffset, std::move( *rdbuff ) );
162  };
163  Async( std::move( p ), timeout );
164  }
165 
166  return XRootDStatus();
167  }
168 
169  // check if we have the whole file in our local buffer
170  if( me.buffer || size == 0 )
171  {
172  if( size )
173  {
174  memcpy( usrbuff, me.buffer.get() + offset, size );
175  log->Dump( ZipMsg, "[%p] Serving read from local cache.", &me );
176  }
177 
178  if( usrHandler )
179  {
180  XRootDStatus *st = ZipArchive::make_status();
181  RSP *rsp = new RSP( relativeOffset, size, usrbuff );
182  ZipArchive::Schedule( usrHandler, st, rsp );
183  }
184  return XRootDStatus();
185  }
186 
187  Pipeline p = XrdCl::RdWithRsp<RSP>( me.archive, offset, size, usrbuff ) >>
188  [=, &me]( XRootDStatus &st, RSP &r )
189  {
190  log->Dump( ZipMsg, "[%p] Read %u bytes of remote data at "
191  "offset %llu.", &me, r.GetLength(), (unsigned long long) r.GetOffset() );
192  if( usrHandler )
193  {
194  XRootDStatus *status = ZipArchive::make_status( st );
195  RSP *rsp = nullptr;
196  if( st.IsOK() )
197  rsp = new RSP( relativeOffset, r.GetLength(), r.GetBuffer() );
198  usrHandler->HandleResponse( status, ZipArchive::PkgRsp( rsp ) );
199  }
200  };
201  Async( std::move( p ), timeout );
202  return XRootDStatus();
203  }
204 
205  //---------------------------------------------------------------------------
206  // Constructor
207  //---------------------------------------------------------------------------
208  ZipArchive::ZipArchive( bool enablePlugIns) : archive( enablePlugIns ),
209  archsize( 0 ),
210  cdexists( false ),
211  updated( false ),
212  cdoff( 0 ),
213  orgcdsz( 0 ),
214  orgcdcnt( 0 ),
215  openstage( None ),
216  ckpinit( false )
217  {
218  }
219 
220  //---------------------------------------------------------------------------
221  // Destructor
222  //---------------------------------------------------------------------------
224  {
225  }
226 
227  //---------------------------------------------------------------------------
228  // Open the ZIP archive in read-only mode without parsing the central
229  // directory.
230  //---------------------------------------------------------------------------
231  XRootDStatus ZipArchive::OpenOnly( const std::string &url,
232  bool update,
233  ResponseHandler *handler,
234  uint16_t timeout )
235  {
237  Pipeline open_only = XrdCl::Open( archive, url, flags ) >>
238  [=]( XRootDStatus &st, StatInfo &info )
239  {
240  Log *log = DefaultEnv::GetLog();
241  // check the status is OK
242  if( st.IsOK() )
243  {
244  archsize = info.GetSize();
245  openstage = NotParsed;
246  log->Debug( ZipMsg, "[%p] Opened (only) a ZIP archive (%s).",
247  this, url.c_str() );
248  }
249  else
250  {
251  log->Error( ZipMsg, "[%p] Failed to open-only a ZIP archive (%s): %s",
252  this, url.c_str(), st.ToString().c_str() );
253  }
254 
255  if( handler )
256  handler->HandleResponse( make_status( st ), nullptr );
257  };
258 
259  Async( std::move( open_only ), timeout );
260  return XRootDStatus();
261  }
262 
263  //---------------------------------------------------------------------------
264  // Open ZIP Archive (and parse the Central Directory)
265  //---------------------------------------------------------------------------
266  XRootDStatus ZipArchive::OpenArchive( const std::string &url,
267  OpenFlags::Flags flags,
268  ResponseHandler *handler,
269  uint16_t timeout )
270  {
271  Log *log = DefaultEnv::GetLog();
272  Fwd<uint32_t> rdsize; // number of bytes to be read
273  Fwd<uint64_t> rdoff; // offset for the read request
274  Fwd<void*> rdbuff; // buffer for data to be read
275  uint32_t maxrdsz = EOCD::maxCommentLength + EOCD::eocdBaseSize +
277 
278  Pipeline open_archive = // open the archive
279  XrdCl::Open( archive, url, flags ) >>
280  [=]( XRootDStatus &status, StatInfo &info ) mutable
281  {
282  // check the status is OK
283  if( !status.IsOK() ) return;
284 
285  archsize = info.GetSize();
286  // if it is an empty file (possibly a new file) there's nothing more to do
287  if( archsize == 0 )
288  {
289  cdexists = false;
290  openstage = Done;
291  log->Dump( ZipMsg, "[%p] Opened a ZIP archive (file empty).", this );
292  Pipeline::Stop();
293  }
294  // prepare the arguments for the subsequent read
295  rdsize = ( archsize <= maxrdsz ? archsize : maxrdsz );
296  rdoff = archsize - *rdsize;
297  buffer.reset( new char[*rdsize] );
298  rdbuff = buffer.get();
299  openstage = HaveEocdBlk;
300  log->Dump( ZipMsg, "[%p] Opened a ZIP archive, reading "
301  "Central Directory at offset: %llu.", this, (unsigned long long) *rdoff );
302  }
303  // read the Central Directory (in several stages if necessary)
304  | XrdCl::Read( archive, rdoff, rdsize, rdbuff ) >>
305  [=]( XRootDStatus &status, ChunkInfo &chunk ) mutable
306  {
307  // check the status is OK
308  if( !status.IsOK() ) return;
309 
310  const char *buff = reinterpret_cast<char*>( chunk.buffer );
311  while( true )
312  {
313  switch( openstage )
314  {
315  case HaveEocdBlk:
316  {
317  // Parse the EOCD record
318  const char *eocdBlock = EOCD::Find( buff, chunk.length );
319  if( !eocdBlock )
320  {
321  XRootDStatus error( stError, errDataError, 0,
322  "End-of-central-directory signature not found." );
323  Pipeline::Stop( error );
324  }
325  try{
326  eocd.reset( new EOCD( eocdBlock, chunk.length - uint32_t(eocdBlock - buff) ) );
327  log->Dump( ZipMsg, "[%p] EOCD record parsed: %s", this,
328  eocd->ToString().c_str() );
329  if(eocd->cdOffset > archsize || eocd->cdOffset + eocd->cdSize > archsize)
330  throw bad_data();
331  }
332  catch(const bad_data &ex){
333  XRootDStatus error( stError, errDataError, 0,
334  "End-of-central-directory signature corrupted." );
335  Pipeline::Stop( error );
336  }
337  // Do we have the whole archive?
338  if( chunk.length == archsize )
339  {
340  // If we managed to download the whole archive we don't need to
341  // worry about zip64, it is so small that standard EOCD will do
342  cdoff = eocd->cdOffset;
343  orgcdsz = eocd->cdSize;
344  orgcdcnt = eocd->nbCdRec;
345  buff = buff + cdoff;
346  openstage = HaveCdRecords;
347  continue;
348  }
349 
350  // Let's see if it is ZIP64 (if yes, the EOCD will be preceded with ZIP64 EOCD locator)
351  const char *zip64EocdlBlock = eocdBlock - ZIP64_EOCDL::zip64EocdlSize;
352  // make sure there is enough data to assume there's a ZIP64 EOCD locator
353  if( zip64EocdlBlock > buffer.get() )
354  {
355  uint32_t signature = to<uint32_t>( zip64EocdlBlock );
356  if( signature == ZIP64_EOCDL::zip64EocdlSign )
357  {
358  buff = zip64EocdlBlock;
359  openstage = HaveZip64EocdlBlk;
360  continue;
361  }
362  }
363 
364  // It's not ZIP64, we already know where the CD records are
365  // we need to read more data
366  cdoff = eocd->cdOffset;
367  orgcdsz = eocd->cdSize;
368  orgcdcnt = eocd->nbCdRec;
369  rdoff = eocd->cdOffset;
370  rdsize = eocd->cdSize;
371  buffer.reset( new char[*rdsize] );
372  rdbuff = buffer.get();
373  openstage = HaveCdRecords;
374  log->Dump( ZipMsg, "[%p] Reading additional data at offset: %llu.",
375  this, (unsigned long long) *rdoff );
376  Pipeline::Repeat(); break; // the break is really not needed ...
377  }
378 
379  case HaveZip64EocdlBlk:
380  {
381  std::unique_ptr<ZIP64_EOCDL> eocdl( new ZIP64_EOCDL( buff ) );
382  log->Dump( ZipMsg, "[%p] EOCDL record parsed: %s",
383  this, eocdl->ToString().c_str() );
384 
385  if( chunk.offset > eocdl->zip64EocdOffset )
386  {
387  // we need to read more data, adjust the read arguments
388  rdsize = archsize - eocdl->zip64EocdOffset;
389  rdoff = eocdl->zip64EocdOffset;
390  buffer.reset( new char[*rdsize] );
391  rdbuff = buffer.get();
392  openstage = HaveZip64EocdBlk;
393  log->Dump( ZipMsg, "[%p] Reading additional data at offset: %llu.",
394  this, (unsigned long long) *rdoff );
396  }
397 
398  buff = buffer.get() + ( eocdl->zip64EocdOffset - chunk.offset );
399  openstage = HaveZip64EocdBlk;
400  continue;
401  }
402 
403  case HaveZip64EocdBlk:
404  {
405  uint32_t signature = to<uint32_t>( buff );
406  if( signature != ZIP64_EOCD::zip64EocdSign )
407  {
408  XRootDStatus error( stError, errDataError, 0,
409  "ZIP64 End-of-central-directory signature not found." );
410  Pipeline::Stop( error );
411  }
412  zip64eocd.reset( new ZIP64_EOCD( buff ) );
413  log->Dump( ZipMsg, "[%p] ZIP64EOCD record parsed: %s",
414  this, zip64eocd->ToString().c_str() );
415 
416  // now we can read the CD records, adjust the read arguments
417  cdoff = zip64eocd->cdOffset;
418  orgcdsz = zip64eocd->cdSize;
419  orgcdcnt = zip64eocd->nbCdRec;
420  rdoff = zip64eocd->cdOffset;
421  rdsize = zip64eocd->cdSize;
422  buffer.reset( new char[*rdsize] );
423  rdbuff = buffer.get();
424  openstage = HaveCdRecords;
425  log->Dump( ZipMsg, "[%p] Reading additional data at offset: %llu.",
426  this, (unsigned long long) *rdoff );
427  Pipeline::Repeat(); break; // the break is really not needed ...
428  }
429 
430  case HaveCdRecords:
431  {
432  // make a copy of the original CDFH records
433  orgcdbuf.reserve( orgcdsz );
434  std::copy( buff, buff + orgcdsz, std::back_inserter( orgcdbuf ) );
435  try
436  {
437  if( zip64eocd )
438  std::tie( cdvec, cdmap ) = CDFH::Parse( buff, zip64eocd->cdSize, zip64eocd->nbCdRec );
439  else
440  std::tie( cdvec, cdmap ) = CDFH::Parse( buff, eocd->cdSize, eocd->nbCdRec );
441  log->Dump( ZipMsg, "[%p] CD records parsed.", this );
442  uint64_t sumCompSize = 0;
443  for (auto it = cdvec.begin(); it != cdvec.end(); it++)
444  {
445  sumCompSize += (*it)->IsZIP64() ? (*it)->extra->compressedSize : (*it)->compressedSize;
446  if ((*it)->offset > archsize || (*it)->offset + (*it)->compressedSize > archsize)
447  throw bad_data();
448  }
449  if (sumCompSize > archsize)
450  throw bad_data();
451  }
452  catch( const bad_data &ex )
453  {
454  XRootDStatus error( stError, errDataError, 0,
455  "ZIP Central Directory corrupted." );
456  Pipeline::Stop( error );
457  }
458  if( chunk.length != archsize ) buffer.reset();
459  openstage = Done;
460  cdexists = true;
461  break;
462  }
463 
465  }
466 
467  break;
468  }
469  }
470  | XrdCl::Final( [=]( const XRootDStatus &status )
471  { // finalize the pipeline by calling the user callback
472  if( status.IsOK() )
473  log->Debug( ZipMsg, "[%p] Opened a ZIP archive (%s): %s",
474  this, url.c_str(), status.ToString().c_str() );
475  else
476  log->Error( ZipMsg, "[%p] Failed to open a ZIP archive (%s): %s",
477  this, url.c_str(), status.ToString().c_str() );
478  if( handler )
479  handler->HandleResponse( make_status( status ), nullptr );
480  } );
481 
482  Async( std::move( open_archive ), timeout );
483  return XRootDStatus();
484  }
485 
486  //---------------------------------------------------------------------------
487  // Open a file within the ZIP Archive
488  //---------------------------------------------------------------------------
489  XRootDStatus ZipArchive::OpenFile( const std::string &fn,
490  OpenFlags::Flags flags,
491  uint64_t size,
492  uint32_t crc32 )
493  {
494  if( !openfn.empty() || openstage != Done || !archive.IsOpen() )
495  return XRootDStatus( stError, errInvalidOp );
496 
497  Log *log = DefaultEnv::GetLog();
498  auto itr = cdmap.find( fn );
499  if( itr == cdmap.end() )
500  {
501  // the file does not exist in the archive so it only makes sense
502  // if our user is opening for append
503  if( flags & OpenFlags::New )
504  {
505  openfn = fn;
506  lfh.reset( new LFH( fn, crc32, size, time( 0 ) ) );
507  log->Dump( ZipMsg, "[%p] File %s opened for append.",
508  this, fn.c_str() );
509  return XRootDStatus();
510  }
511  log->Dump( ZipMsg, "[%p] Open failed: %s not in the ZIP archive.",
512  this, fn.c_str() );
513  return XRootDStatus( stError, errNotFound );
514  }
515 
516  // the file name exist in the archive but our user wants to append
517  // a file with the same name
518  if( flags & OpenFlags::New )
519  {
520  log->Dump( ZipMsg, "[%p] Open failed: file exists %s, cannot append.",
521  this, fn.c_str() );
522 
523  return XRootDStatus( stError, errInvalidOp, EEXIST, "The file already exists in the ZIP archive." );
524  }
525 
526  openfn = fn;
527  log->Dump( ZipMsg, "[%p] File %s opened for reading.",
528  this, fn.c_str() );
529  return XRootDStatus();
530  }
531 
532  //---------------------------------------------------------------------------
533  // Get a buffer with central directory of the ZIP archive
534  //---------------------------------------------------------------------------
535  buffer_t ZipArchive::GetCD()
536  {
537  uint32_t size = 0;
538  uint32_t cdsize = CDFH::CalcSize( cdvec, orgcdsz, orgcdcnt );
539  // first create the EOCD record
540  eocd.reset( new EOCD( cdoff, cdvec.size(), cdsize ) );
541  size += eocd->eocdSize ;
542  size += eocd->cdSize;
543  // then create zip64eocd & zip64eocdl if necessary
544  std::unique_ptr<ZIP64_EOCDL> zip64eocdl;
545  if( eocd->useZip64 )
546  {
547  zip64eocd.reset( new ZIP64_EOCD( cdoff, cdvec.size(), cdsize ) );
548  size += zip64eocd->zip64EocdTotalSize;
549  zip64eocdl.reset( new ZIP64_EOCDL( *eocd, *zip64eocd ) );
551  }
552 
553  // Now serialize all records into a buffer
554  buffer_t metadata;
555  metadata.reserve( size );
556  CDFH::Serialize( orgcdcnt, orgcdbuf, cdvec, metadata );
557  if( zip64eocd )
558  zip64eocd->Serialize( metadata );
559  if( zip64eocdl )
560  zip64eocdl->Serialize( metadata );
561  eocd->Serialize( metadata );
562 
563  return metadata;
564  }
565 
566  //---------------------------------------------------------------------------
567  // Set central directory for the ZIP archive
568  //---------------------------------------------------------------------------
569  void ZipArchive::SetCD( const buffer_t &buffer )
570  {
571  if( openstage != NotParsed ) return;
572 
573  const char *buff = buffer.data();
574  size_t size = buffer.size();
575 
576  // parse Central Directory records
577  std::tie(cdvec, cdmap ) = CDFH::Parse( buff, size );
578  // make a copy of the original CDFH records
579  orgcdsz = buff - buffer.data();
580  orgcdcnt = cdvec.size();
581  orgcdbuf.reserve( orgcdsz );
582  std::copy( buffer.data(), buff, std::back_inserter( orgcdbuf ) );
583  // parse ZIP64EOCD record if exists
584  uint32_t signature = to<uint32_t>( buff );
585  if( signature == ZIP64_EOCD::zip64EocdSign )
586  {
587  zip64eocd.reset( new ZIP64_EOCD( buff ) );
588  buff += zip64eocd->zip64EocdTotalSize;
589  // now shift the buffer by EOCDL size if necessary
590  signature = to<uint32_t>( buff );
591  if( signature == ZIP64_EOCDL::zip64EocdlSign )
593  }
594  // parse EOCD record
595  eocd.reset( new EOCD( buff ) );
596  // update the state of the ZipArchive object
597  openstage = XrdCl::ZipArchive::Done;
598  cdexists = true;
599  }
600 
601  //---------------------------------------------------------------------------
602  // Create the central directory at the end of ZIP archive and close it
603  //---------------------------------------------------------------------------
605  uint16_t timeout )
606  {
607  Log *log = DefaultEnv::GetLog();
608 
609  //-------------------------------------------------------------------------
610  // If the file was updated, we need to write the Central Directory before
611  // closing the file.
612  //-------------------------------------------------------------------------
613  if( updated )
614  {
615  ChunkList chunks;
616  std::vector<std::shared_ptr<buffer_t>> wrtbufs;
617  for( auto &p : newfiles )
618  {
619  NewFile &nf = p.second;
620  if( !nf.overwrt ) continue;
621  uint32_t lfhlen = lfh->lfhSize;
622  auto lfhbuf = std::make_shared<buffer_t>();
623  lfhbuf->reserve( lfhlen );
624  nf.lfh->Serialize( *lfhbuf );
625  chunks.emplace_back( nf.offset, lfhbuf->size(), lfhbuf->data() );
626  wrtbufs.emplace_back( std::move( lfhbuf ) );
627  }
628 
629  auto wrtbuff = std::make_shared<buffer_t>( GetCD() );
630  Pipeline p = XrdCl::Write( archive, cdoff,
631  wrtbuff->size(),
632  wrtbuff->data() );
633  wrtbufs.emplace_back( std::move( wrtbuff ) );
634 
635  std::vector<ChunkList> listsvec;
636  XrdCl::Utils::SplitChunks( listsvec, chunks, 262144, 1024 );
637 
638  for(auto itr = listsvec.rbegin(); itr != listsvec.rend(); ++itr)
639  {
640  p = XrdCl::VectorWrite( archive, *itr ) | p;
641  }
642  if( ckpinit )
643  p |= XrdCl::Checkpoint( archive, ChkPtCode::COMMIT );
644  p |= Close( archive ) >>
645  [=]( XRootDStatus &st )
646  {
647  if( st.IsOK() ) Clear();
648  else openstage = Error;
649  }
650  | XrdCl::Final( [=]( const XRootDStatus &st ) mutable
651  {
652  if( st.IsOK() )
653  log->Dump( ZipMsg, "[%p] Successfully closed ZIP archive "
654  "(CD written).", this );
655  else
656  log->Error( ZipMsg, "[%p] Failed to close ZIP archive: %s",
657  this, st.ToString().c_str() );
658  wrtbufs.clear();
659  if( handler ) handler->HandleResponse( make_status( st ), nullptr );
660  } );
661 
662  Async( std::move( p ), timeout );
663  return XRootDStatus();
664  }
665 
666  //-------------------------------------------------------------------------
667  // Otherwise, just close the ZIP archive
668  //-------------------------------------------------------------------------
669  Pipeline p = Close( archive ) >>
670  [=]( XRootDStatus &st )
671  {
672  if( st.IsOK() )
673  {
674  Clear();
675  log->Dump( ZipMsg, "[%p] Successfully closed "
676  "ZIP archive.", this );
677  }
678  else
679  {
680  openstage = Error;
681  log->Error( ZipMsg, "[%p] Failed to close ZIP archive:"
682  " %s", this, st.ToString().c_str() );
683  }
684  if( handler )
685  handler->HandleResponse( make_status( st ), nullptr );
686  };
687  Async( std::move( p ), timeout );
688  return XRootDStatus();
689  }
690 
691  //---------------------------------------------------------------------------
692  // Read data from a given file
693  //---------------------------------------------------------------------------
694  XRootDStatus ZipArchive::ReadFrom( const std::string &fn,
695  uint64_t offset,
696  uint32_t size,
697  void *buffer,
698  ResponseHandler *handler,
699  uint16_t timeout )
700  {
701  return ReadFromImpl<ChunkInfo>( *this, fn, offset, size, buffer, handler, timeout );
702  }
703 
704  //---------------------------------------------------------------------------
705  // PgRead data from a given file
706  //---------------------------------------------------------------------------
707  XRootDStatus ZipArchive::PgReadFrom( const std::string &fn,
708  uint64_t offset,
709  uint32_t size,
710  void *buffer,
711  ResponseHandler *handler,
712  uint16_t timeout )
713  {
714  return ReadFromImpl<PageInfo>( *this, fn, offset, size, buffer, handler, timeout );
715  }
716 
717  //---------------------------------------------------------------------------
718  // List files in the ZIP archive
719  //---------------------------------------------------------------------------
721  {
722  if( openstage != Done )
724  0, "Archive not opened." );
725 
726  std::string value;
727  archive.GetProperty( "LastURL", value );
728  URL url( value );
729 
730  StatInfo *infoptr = 0;
731  XRootDStatus st = archive.Stat( false, infoptr );
732  std::unique_ptr<StatInfo> info( infoptr );
733 
734  list = new DirectoryList();
735  list->SetParentName( url.GetPath() );
736 
737  auto itr = cdvec.begin();
738  for( ; itr != cdvec.end() ; ++itr )
739  {
740  CDFH *cdfh = itr->get();
741  uint64_t uncompressedSize = cdfh->uncompressedSize;
742  if( uncompressedSize == std::numeric_limits<uint32_t>::max() && cdfh->extra )
743  uncompressedSize = cdfh->extra->uncompressedSize;
744  StatInfo *entry_info = make_stat( *info, uncompressedSize );
745  DirectoryList::ListEntry *entry =
746  new DirectoryList::ListEntry( url.GetHostId(), cdfh->filename, entry_info );
747  list->Add( entry );
748  }
749 
750  return XRootDStatus();
751  }
752 
753  //-----------------------------------------------------------------------
754  // Append data to a new file, implementation
755  //-----------------------------------------------------------------------
756  XRootDStatus ZipArchive::WriteImpl( uint32_t size,
757  const void *buffer,
758  ResponseHandler *handler,
759  uint16_t timeout )
760  {
761  Log *log = DefaultEnv::GetLog();
762  std::vector<iovec> iov( 2 );
763 
764  //-------------------------------------------------------------------------
765  // If there is a LFH we need to write it first ahead of the write-buffer
766  // itself.
767  //-------------------------------------------------------------------------
768  std::shared_ptr<buffer_t> lfhbuf;
769  if( lfh )
770  {
771  uint32_t lfhlen = lfh->lfhSize;
772  lfhbuf = std::make_shared<buffer_t>();
773  lfhbuf->reserve( lfhlen );
774  lfh->Serialize( *lfhbuf );
775  iov[0].iov_base = lfhbuf->data();
776  iov[0].iov_len = lfhlen;
777  log->Dump( ZipMsg, "[%p] Will write LFH.", this );
778  }
779  //-------------------------------------------------------------------------
780  // If there is no LFH just make the first chunk empty.
781  //-------------------------------------------------------------------------
782  else
783  {
784  iov[0].iov_base = nullptr;
785  iov[0].iov_len = 0;
786  }
787  //-------------------------------------------------------------------------
788  // In the second chunk write the user data
789  //-------------------------------------------------------------------------
790  iov[1].iov_base = const_cast<void*>( buffer );
791  iov[1].iov_len = size;
792 
793  uint64_t wrtoff = cdoff; // we only support appending
794  uint32_t wrtlen = iov[0].iov_len + iov[1].iov_len;
795 
796  Pipeline p;
797  auto wrthandler = [=]( const XRootDStatus &st ) mutable
798  {
799  if( st.IsOK() ) updated = true;
800  lfhbuf.reset();
801  if( handler )
802  handler->HandleResponse( make_status( st ), nullptr );
803  };
804 
805  //-------------------------------------------------------------------------
806  // If we are overwriting an existing CD we need to use checkpointed version
807  // of WriteV.
808  //-------------------------------------------------------------------------
809  if( archsize > cdoff )
810  p = XrdCl::ChkptWrtV( archive, wrtoff, iov ) | XrdCl::Final( wrthandler );
811  //-------------------------------------------------------------------------
812  // Otherwise use the ordinary WriteV.
813  //-------------------------------------------------------------------------
814  else
815  p = XrdCl::WriteV( archive, wrtoff, iov ) | XrdCl::Final( wrthandler );
816  //-----------------------------------------------------------------------
817  // If needed make sure the checkpoint is initialized
818  //-----------------------------------------------------------------------
819  if( archsize > cdoff && !ckpinit )
820  {
821  p = XrdCl::Checkpoint( archive, ChkPtCode::BEGIN ) | p;
822  ckpinit = true;
823  }
824 
825  archsize += wrtlen;
826  cdoff += wrtlen;
827 
828  //-------------------------------------------------------------------------
829  // If we have written the LFH, add respective CDFH record
830  //-------------------------------------------------------------------------
831  if( lfh )
832  {
833  mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
834  cdvec.emplace_back( new CDFH( lfh.get(), mode, wrtoff ) );
835  cdmap[openfn] = cdvec.size() - 1;
836  // make sure we keep track of all appended files
837  newfiles.emplace( std::piecewise_construct,
838  std::forward_as_tuple( lfh->filename ),
839  std::forward_as_tuple( wrtoff, std::move( lfh ) )
840  );
841  }
842  Async( std::move( p ), timeout );
843  return XRootDStatus();
844  }
845 
846  //-----------------------------------------------------------------------
847  // Update the metadata of the currently open file
848  //-----------------------------------------------------------------------
850  {
851  if( openstage != Done || openfn.empty() )
852  return XRootDStatus( stError, errInvalidOp, 0, "Archive not opened." );
853 
854  //---------------------------------------------------------------------
855  // Firstly, update the crc32 in the central directory
856  //---------------------------------------------------------------------
857  auto itr = cdmap.find( openfn );
858  if( itr == cdmap.end() )
859  return XRootDStatus( stError, errInvalidOp );
860  cdvec[itr->second]->ZCRC32 = crc32;
861 
862  //---------------------------------------------------------------------
863  // Secondly, update the crc32 in the LFH and mark it as needing
864  // overwriting
865  //---------------------------------------------------------------------
866  auto itr2 = newfiles.find( openfn );
867  if( itr2 == newfiles.end() )
868  return XRootDStatus( stError, errInvalidOp );
869  itr2->second.lfh->ZCRC32 = crc32;
870 
871  return XRootDStatus();
872  }
873 
874  //-----------------------------------------------------------------------
875  // Create a new file in the ZIP archive and append the data
876  //-----------------------------------------------------------------------
877  XRootDStatus ZipArchive::AppendFile( const std::string &fn,
878  uint32_t crc32,
879  uint32_t size,
880  const void *buffer,
881  ResponseHandler *handler,
882  uint16_t timeout )
883  {
884  Log *log = DefaultEnv::GetLog();
885  auto itr = cdmap.find( fn );
886  // check if the file already exists in the archive
887  if( itr != cdmap.end() )
888  {
889  log->Dump( ZipMsg, "[%p] Open failed: file exists %s, cannot append.",
890  this, fn.c_str() );
891  return XRootDStatus( stError, errInvalidOp );
892  }
893 
894  log->Dump( ZipMsg, "[%p] Appending file: %s.", this, fn.c_str() );
895  //-------------------------------------------------------------------------
896  // Create Local File Header record
897  //-------------------------------------------------------------------------
898  lfh.reset( new LFH( fn, crc32, size, time( 0 ) ) );
899  //-------------------------------------------------------------------------
900  // And write it all
901  //-------------------------------------------------------------------------
902  return WriteImpl( size, buffer, handler, timeout );
903  }
904 
905 } /* namespace XrdZip */
void Set(Type object, bool own=true)
static Log * GetLog()
Get default log.
void Add(ListEntry *entry)
Add an entry to the list - takes ownership.
void SetParentName(const std::string &parent)
Set name of the parent directory.
bool IsOpen() const
Check if the file is open.
Definition: XrdClFile.cc:846
bool GetProperty(const std::string &name, std::string &value) const
Definition: XrdClFile.cc:878
XRootDStatus Stat(bool force, ResponseHandler *handler, uint16_t timeout=0) XRD_WARN_UNUSED_RESULT
Definition: XrdClFile.cc:177
Handle diagnostics.
Definition: XrdClLog.hh:101
void Error(uint64_t topic, const char *format,...)
Report an error.
Definition: XrdClLog.cc:231
void Dump(uint64_t topic, const char *format,...)
Print a dump message.
Definition: XrdClLog.cc:299
void Debug(uint64_t topic, const char *format,...)
Print a debug message.
Definition: XrdClLog.cc:282
static void Repeat()
Repeat current operation.
static void Stop(const XRootDStatus &status=XrdCl::XRootDStatus())
Handle an async response.
virtual void HandleResponse(XRootDStatus *status, AnyObject *response)
Object stat info.
URL representation.
Definition: XrdClURL.hh:31
std::string GetHostId() const
Get the host part of the URL (user:password@host:port)
Definition: XrdClURL.hh:99
const std::string & GetPath() const
Get the path.
Definition: XrdClURL.hh:217
static void SplitChunks(std::vector< ChunkList > &listsvec, const ChunkList &chunks, const uint32_t maxcs, const size_t maxc)
Split chunks in a ChunkList into one or more ChunkLists.
Definition: XrdClUtils.cc:875
Write operation (.
XRootDStatus ReadFrom(const std::string &fn, uint64_t offset, uint32_t size, void *buffer, ResponseHandler *handler, uint16_t timeout=0)
XRootDStatus UpdateMetadata(uint32_t crc32)
XRootDStatus OpenArchive(const std::string &url, OpenFlags::Flags flags, ResponseHandler *handler, uint16_t timeout=0)
XRootDStatus List(DirectoryList *&list)
XRootDStatus AppendFile(const std::string &fn, uint32_t crc32, uint32_t size, const void *buffer, ResponseHandler *handler, uint16_t timeout=0)
virtual ~ZipArchive()
Destructor.
XRootDStatus CloseArchive(ResponseHandler *handler, uint16_t timeout=0)
Create the central directory at the end of ZIP archive and close it.
XRootDStatus GetOffset(const std::string &fn, uint64_t &offset)
XRootDStatus PgReadFrom(const std::string &fn, uint64_t offset, uint32_t size, void *buffer, ResponseHandler *handler, uint16_t timeout=0)
ZipArchive(bool enablePlugIns=true)
Constructor.
XRootDStatus OpenFile(const std::string &fn, OpenFlags::Flags flags=OpenFlags::None, uint64_t size=0, uint32_t crc32=0)
Utility class for inflating a compressed buffer.
void QueueRsp(const XRootDStatus &st, uint64_t offset, buffer_t &&buffer)
void QueueReq(uint64_t offset, uint32_t length, void *buffer, ResponseHandler *handler)
VectorWriteImpl< false > VectorWrite(Ctx< File > file, Arg< ChunkList > chunks, uint16_t timeout=0)
Factory for creating VectorWriteImpl objects.
CheckpointImpl< false > Checkpoint(Ctx< File > file, Arg< ChkPtCode > code, uint16_t timeout=0)
Factory for creating ReadImpl objects.
WriteImpl< false > Write(Ctx< File > file, Arg< uint64_t > offset, Arg< uint32_t > size, Arg< const void * > buffer, uint16_t timeout=0)
Factory for creating WriteImpl objects.
const uint16_t stError
An error occurred that could potentially be retried.
Definition: XrdClStatus.hh:32
std::future< XRootDStatus > Async(Pipeline pipeline, uint16_t timeout=0)
ReadImpl< false > Read(Ctx< File > file, Arg< uint64_t > offset, Arg< uint32_t > size, Arg< void * > buffer, uint16_t timeout=0)
Factory for creating ReadImpl objects.
const uint16_t errNotFound
Definition: XrdClStatus.hh:100
const uint64_t ZipMsg
ChkptWrtVImpl< false > ChkptWrtV(Ctx< File > file, Arg< uint64_t > offset, Arg< std::vector< iovec >> iov, uint16_t timeout=0)
Factory for creating ChkptWrtVImpl objects.
const uint16_t errDataError
data is corrupted
Definition: XrdClStatus.hh:63
OpenImpl< false > Open(Ctx< File > file, Arg< std::string > url, Arg< OpenFlags::Flags > flags, Arg< Access::Mode > mode=Access::None, uint16_t timeout=0)
Factory for creating ReadImpl objects.
const uint16_t errInvalidOp
Definition: XrdClStatus.hh:51
std::vector< ChunkInfo > ChunkList
List of chunks.
const uint16_t errNotSupported
Definition: XrdClStatus.hh:62
FinalOperation Final
WriteVImpl< false > WriteV(Ctx< File > file, Arg< uint64_t > offset, Arg< std::vector< iovec >> iov, uint16_t timeout=0)
Factory for creating WriteVImpl objects.
XRootDStatus ReadFromImpl(ZipArchive &me, const std::string &fn, uint64_t relativeOffset, uint32_t size, void *usrbuff, ResponseHandler *usrHandler, uint16_t timeout)
CloseImpl< false > Close(Ctx< File > file, uint16_t timeout=0)
Factory for creating CloseImpl objects.
none object for initializing empty Optional
std::vector< char > buffer_t
Definition: XrdZipUtils.hh:56
Describe a data chunk for vector read.
Flags
Open flags, may be or'd when appropriate.
@ Read
Open only for reading.
@ Update
Open for reading and writing.
bool IsOK() const
We're fine.
Definition: XrdClStatus.hh:124
std::string ToString() const
Create a string representation.
Definition: XrdClStatus.cc:97
char * data
Definition: XrdOucIOVec.hh:45
uint32_t uncompressedSize
Definition: XrdZipCDFH.hh:336
static uint64_t GetOffset(const CDFH &cdfh)
Definition: XrdZipCDFH.hh:227
std::unique_ptr< Extra > extra
Definition: XrdZipCDFH.hh:345
std::string filename
Definition: XrdZipCDFH.hh:344
static std::tuple< cdvec_t, cdmap_t > Parse(const char *buffer, uint32_t bufferSize, uint16_t nbCdRecords)
Definition: XrdZipCDFH.hh:75
uint16_t compressionMethod
Definition: XrdZipCDFH.hh:332
bool HasDataDescriptor()
Definition: XrdZipCDFH.hh:324
static size_t CalcSize(const cdvec_t &cdvec, uint32_t orgcdsz, uint32_t orgcdcnt)
Definition: XrdZipCDFH.hh:132
static void Serialize(uint32_t orgcdcnt, const buffer_t &orgcdbuf, const cdvec_t &cdvec, buffer_t &buffer)
Definition: XrdZipCDFH.hh:144
uint32_t compressedSize
Definition: XrdZipCDFH.hh:335
bool IsZIP64() const
Definition: XrdZipCDFH.hh:316
static uint8_t GetSize(bool zip64)
static const uint16_t eocdBaseSize
Definition: XrdZipEOCD.hh:153
static const uint16_t maxCommentLength
Definition: XrdZipEOCD.hh:154
static const char * Find(const char *buffer, uint64_t size)
Definition: XrdZipEOCD.hh:41
A data structure representing ZIP Local File Header.
Definition: XrdZipLFH.hh:42
A data structure representing the ZIP64 end of central directory locator.
static const uint32_t zip64EocdlSign
static const uint16_t zip64EocdlSize
static const uint32_t zip64EocdSign