My last write up about backing up to Google nearline with a script had a flaw I pointed out which was the single threaded nature and the slow process to backup a large dataset. Well wait no longer. (If you were waiting.) I’ve added pthread support to the script and an update is attached.

You will need to add pthread support to your PHP installation. This will also require ZTS support. Note that a lot of bundled PHP copies do not include ZTS support so you may have to download the source and compile it yourself. You will also probably want to look at bumping up the memory limit on your script execution in php.ini.

Lastly I added a new flag, -w which controls the number of workers. Default is 5 but the sky is your limit. Or until your machine falls over. Good luck!

<?php

// Todo:
//   Validate deleted files and mark reference file to follow up with cloud deletion
//   Build exclude file for directories and files to ignore
//   Find work around for files with [
//   Convert to different API removing gsutil - JSON?


$path = "";
$gsurl = "";
$key = "/home/user/cipherfile";
$aescrypt = "/usr/bin/aescrypt";
$gsutil = "/usr/bin/gsutil";
$workingdir = "/mnt/process_dir";
$bucketfiles = array();
$localfiles = array();

#
# Setup options
#

date_default_timezone_set("GMT");

$options = getopt("d:u:v");
foreach($options as $option => $value)
{
  if($option == "v") { echo "gbackup v0.2\n"; exit; }
  if($option == "d") $path = realpath($value);
  if($option == "u") $gsurl = rtrim($value, "/");
}
if(!strlen($path)) { echo "No path given. Exiting.\n", exit; }
if(!strlen($gsurl)) { echo "No url given. Exiting.\n", exit; }
$workingdir = rtrim($workingdir, "/");


#
# Process files in directory
#

$objects = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($path));
foreach($objects as $name => $object){
    $mtime = filemtime($name);
    $localfiles[$name] = array("size" => filesize($name), "date" => date("c", $mtime), "timestamp" => $mtime);
}

function safefile($path)
{
  return addcslashes($path, "$");
}


#
# Process files in bucket
#

exec(sprintf("%s ls -l \"%s%s/**\"", $gsutil, $gsurl, safefile($path)), $bucketlist);
foreach($bucketlist as $fileurl)
{
  if(strstr($fileurl, "TOTAL: ")) continue;
  preg_match("/\s+(\d*)\s+(\S+)\s+gs:\/\/\S+?\/(.*)/", $fileurl, $data);
  $bucketfiles[$data[3]] = array("size" => $data[1], "date" => $data[2], "timestamp" => strtotime($data[2]));
}

#
# Encrypt and upload files to bucket
#

class xfer extends Collectable {
  public $file;
  private $aescrypt;
  private $gsutil;
  private $tempfile;
  private $workingdir;
  private $gsurl;
  private $key;

  public function __construct($file)
  {
    global $aescrypt, $gsutil, $tempfile, $workingdir, $gsurl, $key;
    $this->aescrypt = $aescrypt;
    $this->gsutil = $gsutil;
    $this->workingdir = $workingdir;
    $this->gsurl = $gsurl;
    $this->key = $key;
    $this->file = $file;
  }

  public function safefile($path)
  {
    return addcslashes($path, "$");
  }

  public function run()
  {

    $this->tempfile = sprintf("temp.%d.%d.aes", getmypid(), $this->worker->getThreadId());
    printf("(%d) Starting on %s\n", $this->worker->getThreadId(), $this->file);

    $efile = str_replace("'", "\'", $this->file);
    $cmd = sprintf("%s -e -k %s -o %s/%s \"%s\" 2>&1", $this->aescrypt, $this->key, $this->workingdir, $this->tempfile, self::safefile($this->file));
    exec($cmd, $cap, $ret);
    //printf("crypt: %s - %s (%s)\n", $cmd, implode("-", $cap), $ret);
    if($ret)
    {
      printf("FAILURE: encrypt %s: %s\n", $this->file, implode($cap));
    } else {
      $cmd = sprintf("%s cp %s/%s \"%s%s.aes\" 2>&1", $this->gsutil, $this->workingdir, $this->tempfile, $this->gsurl, self::safefile($this->file));
      exec($cmd, $cap, $ret);
      if($ret) printf("FAILURE: transfer %s: %s\n", $this->file, implode($cap));
      //printf("gsutil: %s - %s (%s)\n", $cmd, implode("-", $cap), $ret);
      unlink(sprintf("%s/%s", $this->workingdir, $this->tempfile));
    }

  }
}


$p = new Pool(5);

foreach($localfiles as $file => $values)
{
  if(basename($file) == "." || basename($file) == "..") continue;
  //printf("Queuing %s\n", $file);
  if(array_key_exists(ltrim($file, "/") . ".aes", $bucketfiles))
  {
    if($values["timestamp"] > $bucketfiles[ltrim($file,"/").".aes"]["timestamp"])
    {
      $p->submit(new xfer($file));
    }

  } else {
    $p->submit(new xfer($file));
  }
}

$p->shutdown();


?>