A Saner S3 PUT for Node.js

The state of node.js libraries is hit and miss. I have been using Knox to do my s3 uploads and recently came across this gem of a stack trace:

assert.js:93
throw new assert.AssertionError({

AssertionError: true == false
at IncomingMessage. (http.js:1341:9)
at IncomingMessage.emit (events.js:61:17)
at HTTPParser.onMessageComplete (http.js:133:23)
at Socket.ondata (http.js:1231:22)
at Socket._onReadable (net.js:683:27)
at IOWatcher.onReadable [as callback] (net.js:177:10)

Sure enough, there is an outstanding issue for Knox that calls to PUT actually crash the node process when Amazon returns a non-200 (https://github.com/LearnBoost/knox/issues/41). Digging deeper into the source code I noticed this comment:

/**
* PUT the file at `src` to `filename`, with callback `fn`
* receiving a possible exception, and the response object.
*
* NOTE: this method reads the _entire_ file into memory using
* fs.readFile(), and is not recommended or large files.
* ...

Yarg! A S3 PUT is not a complicated operation. All I want is a solution that

  • Method signature that takes in a file path and throws it into s3 (i.e. no mucking with request objects)
  • Supports timeouts, HTTP continue (i.e. fails fast)
  • Uses callbacks and pass useful error objects (i.e. the text from amazon)
  • Doesn’t read entire files (!) into memory (i.e. uses pipe from node.js)

Here is what I came up with (in CoffeeScript):

fs = require 'fs'
http = require 'http'
https = require 'https'
crypto = require 'crypto'

mime = require 'mime'
xml2js = require 'xml2js'

delayTimeout = (ms, func) -> setTimeout func, ms
class @S3Put
  constructor: (@awsKey, @awsSecret, @bucket, @secure=true, @timeout=60*1000) ->

  put: (filePath, resource, amzHeaders, callback) ->
    mimeType = mime.lookup(filePath)
    fs.stat filePath, (err, stats) =>
      return callback(err) if err?

      contentLength = stats.size
      md5Hash = crypto.createHash 'md5'

      rs = fs.ReadStream(filePath)
      rs.on 'data', (d) -> md5Hash.update(d)
      rs.on 'end',  =>
        md5 = md5Hash.digest('base64')
        date = new Date()
        httpOptions =
          host: "s3.amazonaws.com"
          path: "/#{@bucket}#{resource}"
          headers:
            "Authorization": "AWS #{@awsKey}:#{@sign(resource, md5, mimeType, date, amzHeaders)}"
            "Date": date.toUTCString()
            "Content-Length": contentLength
            "Content-Type": mimeType
            "Content-MD5": md5
            "Expect": "100-continue"
          method: "PUT"

        (httpOptions.headers[k] = v for k,v of amzHeaders)
        timeout = null

        req = (if @secure then https else http).request httpOptions, (res) =>
          if res.statusCode == 200
            clearTimeout(timeout)
            headers = JSON.stringify(res.headers)
            return callback(null, {headers: headers, code: res.statusCode})

          responseBody = ""
          res.setEncoding("utf8")
          res.on "data", (chunk) ->
            responseBody += chunk

          res.on "end", ->
            parser = new xml2js.Parser()
            parser.parseString responseBody, (err, result) ->
              return callback(err) if err?
              return callback(result)

        timeout = delayTimeout @timeout, =>
          req.abort()
          return callback({message: "Timed out after #{@timeout}ms"})

        req.on "continue", ->
          rs2 = fs.ReadStream(filePath)
          rs2.on 'error', callback
          rs2.pipe(req)

  sign: (resource, md5, contentType, date, amzHeaders) ->
    data = ["PUT", md5, contentType, date.toUTCString(), @canonicalHeaders(amzHeaders).join("\n"), "/#{@bucket}#{resource}"].join("\n")
    crypto.createHmac('sha1', @awsSecret).update(data).digest('base64')

  canonicalHeaders: (headers) ->
    ("#{k.toLowerCase()}:#{v}" for k,v of headers).sort()

Use like

S3Put = require('s3put').S3Put
s3Put = new S3Put("awsKey", "awsSecret", "s3Bucket")
s3Put.put "/path/to/file", "key", {"x-amz-acl": "public-read"}, (err, res) ->
   # err will be the error object given from Amazon (converted from xml)
   # res will contain res.headers and res.code
   console.log "Hurrah"

I’ve also put a gist up here: https://gist.github.com/1347203