XRootD
XrdHttpReadRangeHandler.cc
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of XrdHTTP: A pragmatic implementation of the
3 // HTTP/WebDAV protocol for the Xrootd framework
4 //
5 // Copyright (c) 2013 by European Organization for Nuclear Research (CERN)
6 // Authors: Cedric Caffy <ccaffy@cern.ch>, David Smith
7 // File Date: Aug 2023
8 //------------------------------------------------------------------------------
9 // XRootD is free software: you can redistribute it and/or modify
10 // it under the terms of the GNU Lesser General Public License as published by
11 // the Free Software Foundation, either version 3 of the License, or
12 // (at your option) any later version.
13 //
14 // XRootD is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public License
20 // along with XRootD. If not, see <http://www.gnu.org/licenses/>.
21 //------------------------------------------------------------------------------
22 
23 #include "XProtocol/XPtypes.hh"
25 #include "XrdOuc/XrdOuca2x.hh"
26 #include "XrdOuc/XrdOucTUtils.hh"
27 #include "XrdOuc/XrdOucUtils.hh"
28 
29 #include <algorithm>
30 #include <climits>
31 #include <cstdint>
32 #include <cstdlib>
33 #include <cstring>
34 #include <memory>
35 #include <sstream>
36 
37 //------------------------------------------------------------------------------
41 //------------------------------------------------------------------------------
43 (
44  XrdSysError &Eroute,
45  const char *const parms,
46  Configuration &cfg)
47 {
48  if( !parms ) return 0;
49 
50  std::vector<std::string> splitArgs;
51  XrdOucTUtils::splitString( splitArgs, parms, "," );
52  if( splitArgs.size() < 2 ) return 0;
53 
54  //----------------------------------------------------------------------------
55  // params is expected to be "<readv_ior_max>,<readv_iov_max>"
56  //----------------------------------------------------------------------------
57  std::string iorstr = splitArgs[0];
58  std::string iovstr = splitArgs[1];
59  XrdOucUtils::trim( iorstr );
60  XrdOucUtils::trim( iovstr );
61 
62  int val;
63  if( XrdOuca2x::a2i( Eroute, "Error reading specific value of readv_ior_max",
64  iorstr.c_str(), &val, 1, -1 ) )
65  {
66  return -1;
67  }
68 
69  cfg.readv_ior_max = val;
70  if( XrdOuca2x::a2i( Eroute, "Error reading specific value of readv_iov_max",
71  iovstr.c_str(), &val, 1, -1 ) )
72  {
73  return -1;
74  }
75 
76  cfg.readv_iov_max = val;
77  cfg.reqs_max = RREQ_MAXSIZE;
78  cfg.haveSizes = true;
79 
80  return 0;
81 }
82 
83 //------------------------------------------------------------------------------
85 //------------------------------------------------------------------------------
87 {
88  return error_;
89 }
90 
91 //------------------------------------------------------------------------------
93 //------------------------------------------------------------------------------
95 {
96  return rawUserRanges_.empty();
97 }
98 
99 //------------------------------------------------------------------------------
101 //------------------------------------------------------------------------------
103 {
104  if( !rangesResolved_ )
105  resolveRanges();
106 
107  return( resolvedUserRanges_.size() <= 1 );
108 }
109 
110 //------------------------------------------------------------------------------
112 //------------------------------------------------------------------------------
114 {
115  static const UserRangeList emptyList;
116 
117  if( !rangesResolved_ )
118  resolveRanges();
119 
120  if( error_ )
121  return emptyList;
122 
123  return resolvedUserRanges_;
124 }
125 
126 //------------------------------------------------------------------------------
128 //------------------------------------------------------------------------------
130 {
131  static const XrdHttpIOList emptyList;
132 
133  if( !rangesResolved_ )
134  resolveRanges();
135 
136  if( error_ )
137  return emptyList;
138 
139  if( !splitRange_.empty() )
140  {
141  if( currSplitRangeIdx_ == 0 && currSplitRangeOff_ == 0 )
142  {
143  //------------------------------------------------------------------------
144  // Nothing read: Prevent scenario where data is expected but none is
145  // actually read E.g. Accessing files which return the results of a script
146  //------------------------------------------------------------------------
147  error_.set( 500, "Stopping request because more data is expected "
148  "but no data has been read." );
149  return emptyList;
150  }
151 
152  //--------------------------------------------------------------------------
153  // we may have some unacknowledged portion of the last range; maybe due to a
154  // short read. so remove what was received and potentially reissue.
155  //--------------------------------------------------------------------------
156 
157  trimSplit();
158  if( !splitRange_.empty() )
159  return splitRange_;
160  }
161 
162  if( splitRangeIdx_ >= resolvedUserRanges_.size() )
163  return emptyList;
164 
165  splitRanges();
166 
167  return splitRange_;
168 }
169 
170 //------------------------------------------------------------------------------
172 //------------------------------------------------------------------------------
174 {
175  if( error_ )
176  return;
177 
178  error_.set( 500, "An error occured." );
179 }
180 
181 //------------------------------------------------------------------------------
183 //------------------------------------------------------------------------------
185 (
186  const ssize_t ret,
187  const UserRange** const urp,
188  bool &start,
189  bool &allend
190 )
191 {
192  if( error_ )
193  return -1;
194 
195  if( ret == 0 )
196  return 0;
197 
198  if( ret < 0 )
199  {
200  error_.set( 500, "Range handler read failure." );
201  return -1;
202  }
203 
204  if( !rangesResolved_ )
205  {
206  error_.set( 500, "Range handler ranges not yet resolved." );
207  return -1;
208  }
209 
210  if( splitRange_.empty() )
211  {
212  error_.set( 500, "No ranges being read." );
213  return -1;
214  }
215 
216  start = false;
217  allend = false;
218 
219  if( currSplitRangeIdx_ >= splitRange_.size() ||
220  resolvedRangeIdx_ >= resolvedUserRanges_.size() )
221  {
222  error_.set( 500, "Range handler index invalid." );
223  return -1;
224  }
225 
226  if( urp )
227  *urp = &resolvedUserRanges_[resolvedRangeIdx_];
228 
229  if( resolvedRangeOff_ == 0 )
230  start = true;
231 
232  const int clen = splitRange_[currSplitRangeIdx_].size;
233 
234  const off_t ulen = resolvedUserRanges_[resolvedRangeIdx_].end
235  - resolvedUserRanges_[resolvedRangeIdx_].start + 1;
236 
237  currSplitRangeOff_ += ret;
238  resolvedRangeOff_ += ret;
239 
240  if( currSplitRangeOff_ > clen || resolvedRangeOff_ > ulen )
241  {
242  error_.set( 500, "Range handler read crossing chunk boundary." );
243  return -1;
244  }
245 
246  if( currSplitRangeOff_ == clen )
247  {
248  currSplitRangeOff_ = 0;
249  currSplitRangeIdx_++;
250 
251  if( currSplitRangeIdx_ >= splitRange_.size() )
252  {
253  currSplitRangeIdx_ = 0;
254  splitRange_.clear();
255  }
256  }
257 
258  if( resolvedRangeOff_ == ulen )
259  {
260  resolvedRangeIdx_++;
261  resolvedRangeOff_ = 0;
262  if( resolvedRangeIdx_ >= resolvedUserRanges_.size() )
263  allend = true;
264  }
265 
266  return 0;
267 }
268 
269 //------------------------------------------------------------------------------
271 //------------------------------------------------------------------------------
272 void XrdHttpReadRangeHandler::ParseContentRange(const char* const line)
273 {
274  char *str1, *saveptr1, *token;
275 
276  std::unique_ptr< char, decltype(std::free)* >
277  line_copy { strdup( line ), std::free };
278 
279  //----------------------------------------------------------------------------
280  // line_copy is argument of the Range header.
281  //
282  // e.g. "bytes=15-17,20-25"
283  // We skip the unit prefix (upto first '='). We don't
284  // enforce this prefix nor check what it is (e.g. 'bytes')
285  //----------------------------------------------------------------------------
286 
287  str1 = line_copy.get();
288  token = strchr(str1,'=');
289  if (token) str1 = token + 1;
290 
291  //----------------------------------------------------------------------------
292  // break up the ranges and process each
293  //----------------------------------------------------------------------------
294 
295  for( ; ; str1 = NULL )
296  {
297  token = strtok_r( str1, " ,\n\r", &saveptr1 );
298  if( token == NULL )
299  break;
300 
301  if( !strlen(token) ) continue;
302 
303  const int rc = parseOneRange( token );
304  if( rc )
305  {
306  //------------------------------------------------------------------------
307  // on error we ignore the whole range header
308  //------------------------------------------------------------------------
309  rawUserRanges_.clear();
310  return;
311  }
312  }
313 }
314 
315 //------------------------------------------------------------------------------
317 //------------------------------------------------------------------------------
319 {
320  error_.reset();
321  rawUserRanges_.clear();
322  rawUserRanges_.shrink_to_fit();
323  resolvedUserRanges_.clear();
324  resolvedUserRanges_.shrink_to_fit();
325  splitRange_.clear();
326  splitRange_.shrink_to_fit();
327  rangesResolved_ = false;
328  splitRangeIdx_ = 0;
329  splitRangeOff_ = 0;
330  currSplitRangeIdx_ = 0;
331  currSplitRangeOff_ = 0;
332  resolvedRangeIdx_ = 0;
333  resolvedRangeOff_ = 0;
334  filesize_ = 0;
335 }
336 
337 //------------------------------------------------------------------------------
339 //------------------------------------------------------------------------------
341 {
342  if( error_ )
343  return -1;
344 
345  if( rangesResolved_ )
346  {
347  error_.set( 500, "Filesize notified after ranges resolved." );
348  return -1;
349  }
350 
351  filesize_ = fs;
352  return 0;
353 }
354 
355 //------------------------------------------------------------------------------
357 //------------------------------------------------------------------------------
358 int XrdHttpReadRangeHandler::parseOneRange(char* const str)
359 {
360  UserRange ur;
361  char *sep;
362 
363  //----------------------------------------------------------------------------
364  // expected input is an individual range, e.g.
365  // 5-6
366  // 5-
367  // -2
368  //----------------------------------------------------------------------------
369 
370  sep = strchr( str, '-' );
371  if( !sep )
372  {
373  //--------------------------------------------------------------------------
374  // Unexpected range format
375  //--------------------------------------------------------------------------
376  return -1;
377  }
378 
379  *sep = '\0';
380  if( rangeFig( str, ur.start_set, ur.start )<0 )
381  {
382  //--------------------------------------------------------------------------
383  // Error in range start
384  //--------------------------------------------------------------------------
385  *sep = '-';
386  return -1;
387  }
388  *sep = '-';
389  if( rangeFig( sep+1, ur.end_set, ur.end )<0 )
390  {
391  //--------------------------------------------------------------------------
392  // Error in range end
393  //--------------------------------------------------------------------------
394  return -1;
395  }
396 
397  if( !ur.start_set && !ur.end_set )
398  {
399  //--------------------------------------------------------------------------
400  // Unexpected range format
401  //--------------------------------------------------------------------------
402  return -1;
403  }
404 
405  if( ur.start_set && ur.end_set && ur.start > ur.end )
406  {
407  //--------------------------------------------------------------------------
408  // Range start is after range end
409  //--------------------------------------------------------------------------
410  return -1;
411  }
412 
413  if( !ur.start_set && ur.end_set && ur.end == 0 )
414  {
415  //--------------------------------------------------------------------------
416  // Request to return last 0 bytes of file
417  //--------------------------------------------------------------------------
418  return -1;
419  }
420 
421  rawUserRanges_.push_back(ur);
422  return 0;
423 }
424 
425 //------------------------------------------------------------------------------
427 //------------------------------------------------------------------------------
428 int XrdHttpReadRangeHandler::rangeFig(const char* const s, bool &set, off_t &val)
429 {
430  char *endptr = (char*)s;
431  errno = 0;
432  long long int v = strtoll( s, &endptr, 10 );
433  if( (errno == ERANGE && (v == LONG_MAX || v == LONG_MIN))
434  || (errno != 0 && errno != EINVAL && v == 0) )
435  {
436  return -1;
437  }
438  if( *endptr != '\0' )
439  {
440  return -1;
441  }
442  if( endptr == s )
443  {
444  set = false;
445  }
446  else
447  {
448  set = true;
449  val = v;
450  }
451  return 0;
452 }
453 
454 //------------------------------------------------------------------------------
456 //------------------------------------------------------------------------------
457 void XrdHttpReadRangeHandler::resolveRanges()
458 {
459  if( error_ )
460  return;
461 
462  resolvedUserRanges_.clear();
463 
464  for( const auto &rr: rawUserRanges_ )
465  {
466  off_t start = 0;
467  off_t end = 0;
468 
469  if( rr.end_set )
470  {
471  if( rr.start_set )
472  {
473  //----------------------------------------------------------------------
474  // end and start set
475  // e.g. 5-6
476  //----------------------------------------------------------------------
477  start = rr.start;
478  end = rr.end;
479 
480  //----------------------------------------------------------------------
481  // skip ranges outside the file
482  //----------------------------------------------------------------------
483  if( start >= filesize_ )
484  continue;
485 
486  if( end >= filesize_ )
487  {
488  end = filesize_ - 1;
489  }
490  }
491  else // !start
492  {
493  //----------------------------------------------------------------------
494  // end is set but not start
495  // e.g. -5
496  //----------------------------------------------------------------------
497  if( rr.end == 0 )
498  continue;
499  end = filesize_ -1;
500  if( rr.end > filesize_ )
501  {
502  start = 0;
503  }
504  else
505  {
506  start = filesize_ - rr.end;
507  }
508  }
509  }
510  else // !end
511  {
512  //------------------------------------------------------------------------
513  // end is not set
514  // e.g. 5-
515  //------------------------------------------------------------------------
516  if( !rr.start_set ) continue;
517  if( rr.start >= filesize_ )
518  continue;
519  start = rr.start;
520  end = filesize_ - 1;
521  }
522  resolvedUserRanges_.emplace_back( start, end );
523  }
524 
525  if( rawUserRanges_.empty() && filesize_>0 )
526  {
527  //--------------------------------------------------------------------------
528  // special case: no ranges: speficied, return whole file
529  //--------------------------------------------------------------------------
530  resolvedUserRanges_.emplace_back( 0, filesize_ - 1 );
531  }
532 
533  if( !rawUserRanges_.empty() && resolvedUserRanges_.empty() )
534  {
535  error_.set( 416, "None of the range-specifier values in the Range "
536  "request-header field overlap the current extent of the selected resource." );
537  }
538 
539  rangesResolved_ = true;
540 }
541 
542 //------------------------------------------------------------------------------
546 //------------------------------------------------------------------------------
547 void XrdHttpReadRangeHandler::splitRanges()
548 {
549  splitRange_.clear();
550  currSplitRangeIdx_ = 0;
551  currSplitRangeOff_ = 0;
552  resolvedRangeIdx_ = splitRangeIdx_;
553  resolvedRangeOff_ = splitRangeOff_;
554 
555  //----------------------------------------------------------------------------
556  // If we make a list of just one range XrdHttpReq will issue kXR_read,
557  // otherwise kXR_readv.
558  //
559  // If this is a full file read, or single user range, we'll fetch only one
560  // range at a time, so it is sent as a series of kXR_read requests.
561  //
562  // For multi range requests we pack a number of suitably sized ranges, thereby
563  // using kXR_readv. However, if there's a long user range we can we try to
564  // proceed by issuing single range requests and thereby using kXR_read.
565  //
566  // We don't merge user ranges in a single chunk as we always expect to be
567  // able to notify at boundaries with the output bools of NotifyReadResult.
568  //----------------------------------------------------------------------------
569 
570  size_t maxch = vectorReadMaxChunks_;
571  size_t maxchs = vectorReadMaxChunkSize_;
572  if( isSingleRange() )
573  {
574  maxchs = rRequestMaxBytes_;
575  maxch = 1;
576  }
577 
578  splitRange_.reserve( maxch );
579 
580  //----------------------------------------------------------------------------
581  // Start/continue splitting the resolvedUserRanges_ into a XrdHttpIOList.
582  //----------------------------------------------------------------------------
583 
584  const size_t cs = resolvedUserRanges_.size();
585  size_t nc = 0;
586  size_t rsr = rRequestMaxBytes_;
587  UserRange tmpur;
588 
589  while( ( splitRangeIdx_ < cs ) && ( rsr > 0 ) )
590  {
591  //--------------------------------------------------------------------------
592  // Check if we've readed the maximum number of allowed chunks.
593  //--------------------------------------------------------------------------
594  if( nc >= maxch )
595  break;
596 
597  if( !tmpur.start_set )
598  {
599  tmpur = resolvedUserRanges_[splitRangeIdx_];
600  tmpur.start += splitRangeOff_;
601  }
602 
603  const off_t l = tmpur.end - tmpur.start + 1;
604  size_t maxsize = std::min( rsr, maxchs );
605 
606  //--------------------------------------------------------------------------
607  // If we're starting a new set of chunks and we have enough data available
608  // in the current user range we allow a kXR_read of the max request size.
609  //--------------------------------------------------------------------------
610  if( nc == 0 && l >= (off_t)rRequestMaxBytes_ )
611  maxsize = rRequestMaxBytes_;
612 
613  if( l > (off_t)maxsize )
614  {
615  splitRange_.emplace_back( nullptr, tmpur.start, maxsize );
616  tmpur.start += maxsize;
617  splitRangeOff_ += maxsize;
618  rsr -= maxsize;
619  }
620  else
621  {
622  splitRange_.emplace_back( nullptr, tmpur.start, l );
623  rsr -= l;
624  tmpur = UserRange();
625  splitRangeOff_ = 0;
626  splitRangeIdx_++;
627  }
628  nc++;
629  }
630 }
631 
632 //------------------------------------------------------------------------------
634 //------------------------------------------------------------------------------
635 void XrdHttpReadRangeHandler::trimSplit()
636 {
637  if( currSplitRangeIdx_ < splitRange_.size() )
638  {
639  splitRange_.erase( splitRange_.begin(),
640  splitRange_.begin() + currSplitRangeIdx_ );
641  }
642  else
643  splitRange_.clear();
644 
645  if( splitRange_.size() > 0 )
646  {
647  if( currSplitRangeOff_ < splitRange_[0].size )
648  {
649  splitRange_[0].offset += currSplitRangeOff_;
650  splitRange_[0].size -= currSplitRangeOff_;
651  }
652  else
653  splitRange_.clear();
654  }
655 
656  currSplitRangeIdx_ = 0;
657  currSplitRangeOff_ = 0;
658 }
std::vector< XrdOucIOVec2 > XrdHttpIOList
Definition: XrdHttpUtils.hh:95
void reset()
resets this handler
const XrdHttpIOList & NextReadList()
return XrdHttpIOList for sending to read or readv
void ParseContentRange(const char *const line)
parse the line after a "Range: " http request header
int SetFilesize(const off_t sz)
sets the filesize, used during resolving and issuing range requests
static int Configure(XrdSysError &Eroute, const char *const parms, Configuration &cfg)
void NotifyError()
Force handler to enter error state.
bool isFullFile()
indicates when there were no valid Range head ranges supplied
std::vector< UserRange > UserRangeList
int NotifyReadResult(const ssize_t ret, const UserRange **const urp, bool &start, bool &allend)
Advance internal counters concerning received bytes.
const Error & getError() const
return the Error object
bool isSingleRange()
indicates a single range (implied whole file, or single range) or empty file
static constexpr size_t RREQ_MAXSIZE
const UserRangeList & ListResolvedRanges()
return resolved (i.e. obsolute start and end) byte ranges desired
static void splitString(Container &result, const std::string &input, const std::string &delimiter)
Split a string.
Definition: XrdOucTUtils.hh:51
static void trim(std::string &str)
static int a2i(XrdSysError &, const char *emsg, const char *item, int *val, int minv=-1, int maxv=-1)
Definition: XrdOuca2x.cc:45
void set(int rc, const std::string &m)