Skip to content

Commit

Permalink
EmrEtlRunner: added support for ndjson loader format (closes #2251)
Browse files Browse the repository at this point in the history
WIP runner_spec remove pp

EmrEtlRunner: add urbanairship example to sample config.yml
  • Loading branch information
ninjabear authored and alexanderdean committed Jan 1, 2016
1 parent 44ff4f5 commit dab668a
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
2 changes: 1 addition & 1 deletion 3-enrich/emr-etl-runner/config/config.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 46,7 @@ aws:
task_instance_bid: 0.015 # In USD. Adjust bid, or leave blank for non-spot-priced (i.e. on-demand) task instances
bootstrap_failure_tries: 3 # Number of times to attempt the job in the event of bootstrap failures
collectors:
format: cloudfront # Or 'clj-tomcat' for the Clojure Collector, or 'thrift' for Thrift records, or 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs
format: cloudfront # For example: 'clj-tomcat' for the Clojure Collector, 'thrift' for Thrift records, 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs or 'ndjson/urbanairship.connect/v1' for UrbanAirship Connect events
enrich:
job_name: Snowplow ETL # Give your job a name
versions:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 22,7 @@ class Runner
include Contracts

# Supported options
@@collector_format_regex = /^(?:cloudfront|clj-tomcat|thrift|(?:json\/. \/. )|(?:tsv\/. \/. ))$/
@@collector_format_regex = /^(?:cloudfront|clj-tomcat|thrift|(?:json\/. \/. )|(?:tsv\/. \/. )|(?:ndjson\/. \/. ))$/
@@skip_options = Set.new(%w(staging s3distcp emr enrich shred elasticsearch archive_raw))

include Monitoring::Logging
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 14,6 @@
# License:: Apache License Version 2.0

require 'spec_helper'
require 'pp'

Runner = Snowplow::EmrEtlRunner::Runner
Cli = Snowplow::EmrEtlRunner::Cli
Expand Down Expand Up @@ -155,4 154,16 @@ def get_mock_config
Runner.new args, config, enrichments, resolver
end

it 'should accept the ndjson collector format' do
args, config, enrichments, resolver = get_mock_config
config[:collectors][:format] = "ndjson"
expect {Runner.new args, config, enrichments, resolver}.to raise_exception(ConfigError, "collector_format 'ndjson' not supported")
config[:collectors][:format] = "ndjson/something"
expect {Runner.new args, config, enrichments, resolver}.to raise_exception(ConfigError, "collector_format 'ndjson/something' not supported")
config[:collectors][:format] = "ndjson/something/"
expect {Runner.new args, config, enrichments, resolver}.to raise_exception(ConfigError, "collector_format 'ndjson/something/' not supported")
config[:collectors][:format] = "ndjson/something/something"
Runner.new args, config, enrichments, resolver
end

end

0 comments on commit dab668a

Please sign in to comment.