0xDEADBEEF

RSS odkazy

multiCurlFetchAll.php

9. 1. 2023 #kód
function multiCurlFetchAll($handles, int|float $maxRps, int $maxConcurrency = PHP_INT_MAX) {
  $currentMaxRps = $maxRps;
  $handles = is_array($handles) ? new ArrayIterator($handles) : $handles;
  $multi = curl_multi_init();

  $enqueueHandle = function() use ($multi, &$handles) {
    if ($handles->valid()) {
      $h = $handles->current();
      if (is_string($h)) {
        $h = curl_init($url);
        curl_setopt($h, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($h, CURLOPT_FOLLOWLOCATION, true);
      }
      curl_multi_add_handle($multi, $h);
      $handles->next();
      return true;
    }
    return false;
  };

  $enqueueHandle();
  $startTime = microtime(true);
  $nRequests = 1;

  do {
    $status = curl_multi_exec($multi, $nRunning);
    if ($nRunning) {
      curl_multi_select($multi);
    }

    while (($info = curl_multi_info_read($multi)) !== false) {
      $h = $info['handle'];
      $responseCode = curl_getinfo($h, CURLINFO_HTTP_CODE);
      yield [$info['result'], $h];
      curl_multi_remove_handle($multi, $h);
      curl_close($h);

      if ($responseCode === 429) {
        $currentMaxRps *= 0.9;
      }

      $elapsed = microtime(true) - $startTime;
      $rps = $nRequests / $elapsed;
      $rpsDiff = $currentMaxRps - $rps;

      if ($rpsDiff > 0) { // we can speed up
        $n = max(1, min($maxConcurrency - $nRunning, $rpsDiff * $elapsed));

        for ($i = 0; $i < $n; $i++) {
          $nRunning += $enqueueHandle();
          $nRequests++;
        }

      } else { // we need to slow down
        if ($nRunning === 0) { // but we cannot stop entirely
          sleep(1);
          // if there's no work to be done, `nRunning` is not incremented, it
          // stays as zero and the loop in terminated
          $nRunning += $enqueueHandle();
          $nRequests++;
        }
        // If at least one request is running, do nothing. There's still some
        // work to be done and this loop won't shut down.
      }

      if ($elapsed > 120) { // decay counters
        $startTime += $elapsed / 2;
        $nRequests /= 2;
        if ($currentMaxRps < $maxRps) { // try to speed up a little bit
          $currentMaxRps = min($currentMaxRps * 1.05, $maxRps);
        }
      }
    }
  } while ($nRunning && $status === CURLM_OK);
}
píše k47 (@kaja47, k47)