XRootD
XrdClZipArchive.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // Copyright (c) 2011-2014 by European Organization for Nuclear Research (CERN)
3 // Author: Michal Simon <michal.simon@cern.ch>
4 //------------------------------------------------------------------------------
5 // This file is part of the XRootD software suite.
6 //
7 // XRootD is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Lesser General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // XRootD is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with XRootD. If not, see <http://www.gnu.org/licenses/>.
19 //
20 // In applying this licence, CERN does not waive the privileges and immunities
21 // granted to it by virtue of its status as an Intergovernmental Organization
22 // or submit itself to any jurisdiction.
23 //------------------------------------------------------------------------------
24 
25 #ifndef SRC_XRDZIP_XRDZIPARCHIVE_HH_
26 #define SRC_XRDZIP_XRDZIPARCHIVE_HH_
27 
28 #include "XrdCl/XrdClFile.hh"
30 #include "XrdCl/XrdClJobManager.hh"
31 #include "XrdCl/XrdClDefaultEnv.hh"
32 #include "XrdCl/XrdClPostMaster.hh"
33 #include "XrdZip/XrdZipEOCD.hh"
34 #include "XrdZip/XrdZipCDFH.hh"
36 #include "XrdZip/XrdZipLFH.hh"
37 #include "XrdCl/XrdClZipCache.hh"
38 
39 #include <memory>
40 #include <unordered_map>
41 
42 //-----------------------------------------------------------------------------
43 // Forward declaration needed for friendship
44 //-----------------------------------------------------------------------------
45 namespace XrdEc{ class StrmWriter; class Reader; template<bool> class OpenOnlyImpl; };
46 class MicroTest;
47 class XrdEcTests;
48 
49 namespace XrdCl
50 {
51  using namespace XrdZip;
52 
53  //---------------------------------------------------------------------------
54  // ZipArchive provides following functionalities:
55  // - parsing of existing ZIP archive
56  // - reading data from existing ZIP archive
57  // - appending data to existing ZIP archive
58  // - querying stat info and checksum for given file in ZIP archive
59  //---------------------------------------------------------------------------
60  class ZipArchive
61  {
62  friend class XrdEc::StrmWriter;
63  friend class XrdEc::Reader;
64  template<bool>
65  friend class XrdEc::OpenOnlyImpl;
66  friend class ::MicroTest;
67  friend class ::XrdEcTests;
68 
69  template<typename RSP>
70  friend XRootDStatus ReadFromImpl( ZipArchive&, const std::string&, uint64_t, uint32_t, void*, ResponseHandler*, uint16_t );
71 
72  public:
73  //-----------------------------------------------------------------------
75  //-----------------------------------------------------------------------
76  ZipArchive( bool enablePlugIns = true );
77 
78  //-----------------------------------------------------------------------
80  //-----------------------------------------------------------------------
81  virtual ~ZipArchive();
82 
83  //-----------------------------------------------------------------------
91  //-----------------------------------------------------------------------
92  XRootDStatus OpenArchive( const std::string &url,
93  OpenFlags::Flags flags,
95  uint16_t timeout = 0 );
96 
97  //-----------------------------------------------------------------------
105  //-----------------------------------------------------------------------
106  XRootDStatus OpenFile( const std::string &fn,
108  uint64_t size = 0,
109  uint32_t crc32 = 0 );
110 
111  //-----------------------------------------------------------------------
120  //-----------------------------------------------------------------------
121  inline
122  XRootDStatus Read( uint64_t offset,
123  uint32_t size,
124  void *buffer,
126  uint16_t timeout = 0 )
127  {
128  if( openfn.empty() ) return XRootDStatus( stError, errInvalidOp );
129  return ReadFrom( openfn, offset, size, buffer, handler, timeout );
130  }
131 
132  //-----------------------------------------------------------------------
141  //-----------------------------------------------------------------------
142  inline
143  XRootDStatus PgRead( uint64_t offset,
144  uint32_t size,
145  void *buffer,
147  uint16_t timeout = 0 )
148  {
149  if( openfn.empty() ) return XRootDStatus( stError, errInvalidOp );
150  return PgReadFrom( openfn, offset, size, buffer, handler, timeout );
151  }
152 
153  //-----------------------------------------------------------------------
163  //-----------------------------------------------------------------------
164  XRootDStatus ReadFrom( const std::string &fn,
165  uint64_t offset,
166  uint32_t size,
167  void *buffer,
169  uint16_t timeout = 0 );
170 
171  //-----------------------------------------------------------------------
181  //-----------------------------------------------------------------------
182  XRootDStatus PgReadFrom( const std::string &fn,
183  uint64_t offset,
184  uint32_t size,
185  void *buffer,
187  uint16_t timeout = 0 );
188 
189  //-----------------------------------------------------------------------
197  //-----------------------------------------------------------------------
198  inline XRootDStatus Write( uint32_t size,
199  const void *buffer,
201  uint16_t timeout = 0 )
202  {
203  if( openstage != Done || openfn.empty() )
204  return XRootDStatus( stError, errInvalidOp, 0, "Archive not opened." );
205 
206  return WriteImpl( size, buffer, handler, timeout );
207  }
208 
209  //-----------------------------------------------------------------------
214  //-----------------------------------------------------------------------
215  XRootDStatus UpdateMetadata( uint32_t crc32 );
216 
217  //-----------------------------------------------------------------------
227  //-----------------------------------------------------------------------
228  XRootDStatus AppendFile( const std::string &fn,
229  uint32_t crc32,
230  uint32_t size,
231  const void *buffer,
233  uint16_t timeout = 0 );
234 
235  //-----------------------------------------------------------------------
241  //-----------------------------------------------------------------------
242  inline XRootDStatus Stat( const std::string &fn, StatInfo *&info )
243  { // make sure archive has been opened and CD has been parsed
244  if( openstage != Done )
245  return XRootDStatus( stError, errInvalidOp );
246  // make sure the file is part of the archive
247  auto cditr = cdmap.find( fn );
248  if( cditr == cdmap.end() )
249  return XRootDStatus( stError, errNotFound );
250  // create the result
251  info = make_stat( fn );
252  if (info)
253  return XRootDStatus();
254  else // have difficult to access the openned archive.
255  return XRootDStatus( stError, errNotFound );
256  }
257 
258  //-----------------------------------------------------------------------
263  //-----------------------------------------------------------------------
264  inline XRootDStatus Stat( StatInfo *&info )
265  {
266  if( openfn.empty() )
267  return XRootDStatus( stError, errInvalidOp );
268  return Stat( openfn, info );
269  }
270 
271  //-----------------------------------------------------------------------
277  //-----------------------------------------------------------------------
278  inline XRootDStatus GetCRC32( const std::string &fn, uint32_t &cksum )
279  { // make sure archive has been opened and CD has been parsed
280  if( openstage != Done )
281  return XRootDStatus( stError, errInvalidOp );
282  // make sure the file is part of the archive
283  auto cditr = cdmap.find( fn );
284  if( cditr == cdmap.end() )
285  return XRootDStatus( stError, errNotFound );
286  cksum = cdvec[cditr->second]->ZCRC32;
287  return XRootDStatus();
288  }
289 
290  inline XRootDStatus GetOffset( const std::string &fn, uint64_t &offset){
291  if( openstage != XrdCl::ZipArchive::Done || !archive.IsOpen() )
293 
294  auto cditr = cdmap.find( fn );
295  if( cditr == cdmap.end() )
297  XrdCl::errNotFound, "File not found." );
298 
299  XrdCl::CDFH *cdfh = cdvec[cditr->second].get();
300 
301  // check if the file is compressed, for now we only support uncompressed and inflate/deflate compression
302  if( cdfh->compressionMethod != 0 && cdfh->compressionMethod != Z_DEFLATED )
304  0, "The compression algorithm is not supported!" );
305 
306  // Now the problem is that at the beginning of our
307  // file there is the Local-file-header, which size
308  // is not known because of the variable size 'extra'
309  // field, so we need to know the offset of the next
310  // record and shift it by the file size.
311  // The next record is either the next LFH (next file)
312  // or the start of the Central-directory.
313  uint64_t cdOffset = zip64eocd ? zip64eocd->cdOffset : eocd->cdOffset;
314  uint64_t nextRecordOffset = ( cditr->second + 1 < cdvec.size() ) ?
315  XrdCl::CDFH::GetOffset( *cdvec[cditr->second + 1] ) : cdOffset;
316  uint64_t filesize = cdfh->compressedSize;
317  if( filesize == std::numeric_limits<uint32_t>::max() && cdfh->extra )
318  filesize = cdfh->extra->compressedSize;
319  uint16_t descsize = cdfh->HasDataDescriptor() ?
321  offset = nextRecordOffset - filesize - descsize;
322  return XrdCl::XRootDStatus();
323  }
324 
325  //-----------------------------------------------------------------------
327  //
331  //-----------------------------------------------------------------------
333  uint16_t timeout = 0 );
334 
335  //-----------------------------------------------------------------------
338  //-----------------------------------------------------------------------
340  {
341  if( openstage != Done || openfn.empty() )
343  0, "Archive not opened." );
344  openfn.clear();
345  lfh.reset();
346  return XRootDStatus();
347  }
348 
349  //-----------------------------------------------------------------------
352  //-----------------------------------------------------------------------
353  XRootDStatus List( DirectoryList *&list );
354 
355  //-----------------------------------------------------------------------
357  //-----------------------------------------------------------------------
358  inline bool IsOpen()
359  {
360  return openstage == Done;
361  }
362 
363  //------------------------------------------------------------------------
365  //------------------------------------------------------------------------
366  inline bool IsSecure()
367  {
368  return archive.IsSecure();
369  }
370 
371  //-----------------------------------------------------------------------
373  //-----------------------------------------------------------------------
374  inline bool SetProperty( const std::string &name, const std::string &value )
375  {
376  return archive.SetProperty( name, value );
377  }
378 
379  //-----------------------------------------------------------------------
381  //-----------------------------------------------------------------------
382  inline bool GetProperty( const std::string &name, std::string &value )
383  {
384  return archive.GetProperty( name, value );
385  }
386 
387  //-----------------------------------------------------------------------
389  //-----------------------------------------------------------------------
390  inline File& GetFile()
391  {
392  return archive;
393  }
394 
395  private:
396 
397  //-----------------------------------------------------------------------
405  //-----------------------------------------------------------------------
406  XRootDStatus WriteImpl( uint32_t size,
407  const void *buffer,
409  uint16_t timeout );
410 
411  //-----------------------------------------------------------------------
419  //-----------------------------------------------------------------------
420  XRootDStatus OpenOnly( const std::string &url,
421  bool update,
423  uint16_t timeout = 0 );
424 
425  //-----------------------------------------------------------------------
429  //-----------------------------------------------------------------------
430  buffer_t GetCD();
431 
432  //-----------------------------------------------------------------------
436  //-----------------------------------------------------------------------
437  void SetCD( const buffer_t &buffer );
438 
439  //-----------------------------------------------------------------------
444  //-----------------------------------------------------------------------
445  template<typename Response>
446  inline static AnyObject* PkgRsp( Response *rsp )
447  {
448  if( !rsp ) return nullptr;
449  AnyObject *pkg = new AnyObject();
450  pkg->Set( rsp );
451  return pkg;
452  }
453 
454  //-----------------------------------------------------------------------
456  //-----------------------------------------------------------------------
457  template<typename Response>
458  inline static void Free( XRootDStatus *st, Response *rsp )
459  {
460  delete st;
461  delete rsp;
462  }
463 
464  //-----------------------------------------------------------------------
471  //-----------------------------------------------------------------------
472  template<typename Response>
473  inline static void Schedule( ResponseHandler *handler, XRootDStatus *st, Response *rsp = nullptr )
474  {
475  if( !handler ) return Free( st, rsp );
476  ResponseJob *job = new ResponseJob( handler, st, PkgRsp( rsp ), 0 );
478  }
479 
480  //-----------------------------------------------------------------------
486  //-----------------------------------------------------------------------
487  inline static StatInfo* make_stat( const StatInfo &starch, uint64_t size )
488  {
489  StatInfo *info = new StatInfo( starch );
490  uint32_t flags = info->GetFlags();
491  info->SetFlags( flags & ( ~StatInfo::IsWritable ) ); // make sure it is not listed as writable
492  info->SetSize( size );
493  return info;
494  }
495 
496  //-----------------------------------------------------------------------
501  //-----------------------------------------------------------------------
502  inline StatInfo* make_stat( const std::string &fn )
503  {
504  StatInfo *infoptr = 0;
505  XRootDStatus st = archive.Stat( false, infoptr );
506  if (!st.IsOK()) return nullptr;
507  std::unique_ptr<StatInfo> stinfo( infoptr );
508  auto itr = cdmap.find( fn );
509  if( itr == cdmap.end() ) return nullptr;
510  size_t index = itr->second;
511  uint64_t uncompressedSize = cdvec[index]->uncompressedSize;
512  if( cdvec[index]->extra && uncompressedSize == std::numeric_limits<uint32_t>::max() )
513  uncompressedSize = cdvec[index]->extra->uncompressedSize;
514  return make_stat( *stinfo, uncompressedSize );
515  }
516 
517  //-----------------------------------------------------------------------
519  //-----------------------------------------------------------------------
520  inline static XRootDStatus* make_status( const XRootDStatus &status = XRootDStatus() )
521  {
522  return new XRootDStatus( status );
523  }
524 
525  //-----------------------------------------------------------------------
527  //-----------------------------------------------------------------------
528  inline void Clear()
529  {
530  buffer.reset();
531  eocd.reset();
532  cdvec.clear();
533  cdmap.clear();
534  zip64eocd.reset();
535  openstage = None;
536  }
537 
538  //-----------------------------------------------------------------------
540  //-----------------------------------------------------------------------
541  enum OpenStages
542  {
543  None = 0, //< opening/parsing not started
544  HaveEocdBlk, //< we have the End of Central Directory record
545  HaveZip64EocdlBlk, //< we have the ZIP64 End of Central Directory locator record
546  HaveZip64EocdBlk, //< we have the ZIP64 End of Central Directory record
547  HaveCdRecords, //< we have Central Directory records
548  Done, //< we are done parsing the Central Directory
549  Error, //< opening/parsing failed
550  NotParsed //< the ZIP archive has been opened but Central Directory is not parsed
551  };
552 
553  //-----------------------------------------------------------------------
555  //-----------------------------------------------------------------------
556  struct NewFile
557  {
558  NewFile( uint64_t offset, std::unique_ptr<LFH> lfh ) : offset( offset ),
559  lfh( std::move( lfh ) ),
560  overwrt( false )
561  {
562  }
563 
564  NewFile( NewFile && nf ) : offset( nf.offset ),
565  lfh( std::move( nf.lfh ) ),
566  overwrt( nf.overwrt )
567  {
568  }
569 
570  uint64_t offset; // the offset of the LFH of the file
571  std::unique_ptr<LFH> lfh; // LFH of the file
572  bool overwrt; // if true the LFH needs to be overwritten on close
573  };
574 
575  //-----------------------------------------------------------------------
577  //-----------------------------------------------------------------------
578  typedef std::unordered_map<std::string, ZipCache> zipcache_t;
579  typedef std::unordered_map<std::string, NewFile> new_files_t;
580 
581  File archive; //> File object for handling the ZIP archive
582  uint64_t archsize; //> size of the ZIP archive
583  bool cdexists; //> true if Central Directory exists, false otherwise
584  bool updated; //> true if the ZIP archive has been updated, false otherwise
585  std::unique_ptr<char[]> buffer; //> buffer for keeping the data to be parsed or raw data
586  std::unique_ptr<EOCD> eocd; //> End of Central Directory record
587  cdvec_t cdvec; //> vector of Central Directory File Headers
588  cdmap_t cdmap; //> mapping of file name to CDFH index
589  uint64_t cdoff; //> Central Directory offset
590  uint32_t orgcdsz; //> original CD size
591  uint32_t orgcdcnt; //> original number CDFH records
592  buffer_t orgcdbuf; //> buffer with the original CDFH records
593  std::unique_ptr<ZIP64_EOCD> zip64eocd; //> ZIP64 End of Central Directory record
594  OpenStages openstage; //> stage of opening / parsing a ZIP archive
595  std::string openfn; //> file name of opened file
596  zipcache_t zipcache; //> cache for inflating compressed data
597  std::unique_ptr<LFH> lfh; //> Local File Header record for the newly appended file
598  bool ckpinit; //> a flag indicating whether a checkpoint has been initialized
599  new_files_t newfiles; //> all newly appended files
600  };
601 
602 } /* namespace XrdZip */
603 
604 #endif /* SRC_XRDZIP_XRDZIPARCHIVE_HH_ */
int extra
Definition: XrdAccTest.cc:63
XrdOucString File
void Set(Type object, bool own=true)
uint16_t timeout
Operation timeout.
static PostMaster * GetPostMaster()
Get default post master.
A file.
Definition: XrdClFile.hh:46
void QueueJob(Job *job, void *arg=0)
Add a job to be run.
std::unique_ptr< PipelineHandler > handler
Operation handler.
JobManager * GetJobManager()
Get the job manager object user by the post master.
Handle an async response.
Object stat info.
@ IsWritable
Write access is allowed.
Write operation (.
File & GetFile()
Get the underlying File object.
XRootDStatus PgRead(uint64_t offset, uint32_t size, void *buffer, ResponseHandler *handler, uint16_t timeout=0)
bool IsSecure()
Check if the underlying file is using an encrypted connection.
XRootDStatus Stat(const std::string &fn, StatInfo *&info)
XRootDStatus Write(uint32_t size, const void *buffer, ResponseHandler *handler, uint16_t timeout=0)
bool GetProperty(const std::string &name, std::string &value)
Get property on the underlying File object.
bool SetProperty(const std::string &name, const std::string &value)
Set property on the underlying File object.
XRootDStatus Stat(StatInfo *&info)
XRootDStatus GetOffset(const std::string &fn, uint64_t &offset)
XRootDStatus CloseFile()
XRootDStatus GetCRC32(const std::string &fn, uint32_t &cksum)
XRootDStatus Read(uint64_t offset, uint32_t size, void *buffer, ResponseHandler *handler, uint16_t timeout=0)
StatImpl< false > Stat(Ctx< File > file, Arg< bool > force, uint16_t timeout=0)
CloseArchiveImpl< false > CloseArchive(Ctx< ZipArchive > zip, uint16_t timeout=0)
Factory for creating CloseFileImpl objects.
const uint16_t stError
An error occurred that could potentially be retried.
Definition: XrdClStatus.hh:32
const uint16_t errNotFound
Definition: XrdClStatus.hh:100
ZipReadFromImpl< false > ReadFrom(Ctx< ZipArchive > zip, Arg< std::string > fn, Arg< uint64_t > offset, Arg< uint32_t > size, Arg< void * > buffer, uint16_t timeout=0)
Factory for creating ArchiveReadImpl objects.
const uint16_t errInvalidOp
Definition: XrdClStatus.hh:51
OpenFileImpl< false > OpenFile(Ctx< ZipArchive > zip, Arg< std::string > fn, Arg< OpenFlags::Flags > flags=OpenFlags::None, Arg< uint64_t > size=0, Arg< uint32_t > crc32=0, uint16_t timeout=0)
Factory for creating OpenFileImpl objects.
const uint16_t errNotSupported
Definition: XrdClStatus.hh:62
AppendFileImpl< false > AppendFile(Ctx< ZipArchive > zip, Arg< std::string > fn, Arg< uint32_t > crc32, Arg< uint32_t > size, Arg< const void * > buffer, uint16_t timeout=0)
Factory for creating ArchiveReadImpl objects.
XRootDStatus ReadFromImpl(ZipArchive &me, const std::string &fn, uint64_t relativeOffset, uint32_t size, void *usrbuff, ResponseHandler *usrHandler, uint16_t timeout)
OpenArchiveImpl< false > OpenArchive(Ctx< ZipArchive > zip, Arg< std::string > fn, Arg< OpenFlags::Flags > flags, uint16_t timeout=0)
Factory for creating OpenArchiveImpl objects.
ZipListImpl< false > List(Ctx< ZipArchive > zip)
Factory for creating ZipStatImpl objects.
OpenOnlyImpl< false > OpenOnly(XrdCl::Ctx< XrdCl::ZipArchive > zip, XrdCl::Arg< std::string > fn, XrdCl::Arg< bool > updt, uint16_t timeout=0)
Definition: XrdEcReader.cc:105
std::vector< std::unique_ptr< CDFH > > cdvec_t
Definition: XrdZipCDFH.hh:46
std::vector< char > buffer_t
Definition: XrdZipUtils.hh:56
std::unordered_map< std::string, size_t > cdmap_t
Definition: XrdZipCDFH.hh:56
Flags
Open flags, may be or'd when appropriate.
static uint64_t GetOffset(const CDFH &cdfh)
Definition: XrdZipCDFH.hh:227
std::unique_ptr< Extra > extra
Definition: XrdZipCDFH.hh:345
uint16_t compressionMethod
Definition: XrdZipCDFH.hh:332
bool HasDataDescriptor()
Definition: XrdZipCDFH.hh:324
uint32_t compressedSize
Definition: XrdZipCDFH.hh:335
bool IsZIP64() const
Definition: XrdZipCDFH.hh:316
static uint8_t GetSize(bool zip64)