diff --git a/MapReducePlayGround/.classpath b/MapReducePlayGround/.classpath index 819f1c5..b12146e 100644 --- a/MapReducePlayGround/.classpath +++ b/MapReducePlayGround/.classpath @@ -1,58 +1,29 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/MapReducePlayGround/.gitignore b/MapReducePlayGround/.gitignore index 9294c1a..c243118 100644 --- a/MapReducePlayGround/.gitignore +++ b/MapReducePlayGround/.gitignore @@ -1,8 +1,9 @@ /bin -/build/classes -/build/lib +/build/ /ant /input /output /.DS_Store /.settings +.gradle/ +gradle/ diff --git a/MapReducePlayGround/.project b/MapReducePlayGround/.project index c6d11ae..2bc93a7 100644 --- a/MapReducePlayGround/.project +++ b/MapReducePlayGround/.project @@ -1,17 +1,16 @@ MapReducePlayGround - - - + + + + org.eclipse.jdt.core.javanature + org.eclipse.jdt.core.javabuilder - - + - - org.eclipse.jdt.core.javanature - + diff --git a/MapReducePlayGround/build.gradle b/MapReducePlayGround/build.gradle new file mode 100644 index 0000000..d0322fc --- /dev/null +++ b/MapReducePlayGround/build.gradle @@ -0,0 +1,31 @@ +apply plugin: 'java' +apply plugin: 'eclipse-wtp' + + +repositories { + mavenCentral() + } + +defaultTasks = ['clean', 'jar'] +dependencies { + compile 'org.apache.hadoop:hadoop-core:0.20.2' + +} + +task copyToLib( type: Copy ) { + into "$buildDir/libs/lib" + from configurations.runtime + } + +jar { +dependsOn copyToLib + manifest { + attributes 'Main-Class': 'com.gauri.airlinedata.AirlineDataDriver' + } +} + + + + task wrapper(type: Wrapper){ + gradleVersion = '1.4' + } diff --git a/MapReducePlayGround/gradlew b/MapReducePlayGround/gradlew new file mode 100755 index 0000000..91a7e26 --- /dev/null +++ b/MapReducePlayGround/gradlew @@ -0,0 +1,164 @@ +#!/usr/bin/env bash + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn ( ) { + echo "$*" +} + +die ( ) { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; +esac + +# For Cygwin, ensure paths are in UNIX format before anything is touched. +if $cygwin ; then + [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` +fi + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >&- +APP_HOME="`pwd -P`" +cd "$SAVED" >&- + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules +function splitJvmOpts() { + JVM_OPTS=("$@") +} +eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS +JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" + +exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" diff --git a/MapReducePlayGround/gradlew.bat b/MapReducePlayGround/gradlew.bat new file mode 100644 index 0000000..aec9973 --- /dev/null +++ b/MapReducePlayGround/gradlew.bat @@ -0,0 +1,90 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windowz variants + +if not "%OS%" == "Windows_NT" goto win9xME_args +if "%@eval[2+2]" == "4" goto 4NT_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* +goto execute + +:4NT_args +@rem Get arguments from the 4NT Shell from JP Software +set CMD_LINE_ARGS=%$ + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/MapReducePlayGround/sample.txt b/MapReducePlayGround/sample.txt new file mode 100644 index 0000000..3b5946f --- /dev/null +++ b/MapReducePlayGround/sample.txt @@ -0,0 +1,99 @@ +1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,15,4,729,730,903,849,PS,1451,NA,94,79,NA,14,-1,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,17,6,741,730,918,849,PS,1451,NA,97,79,NA,29,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,18,7,729,730,847,849,PS,1451,NA,78,79,NA,-2,-1,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,19,1,749,730,922,849,PS,1451,NA,93,79,NA,33,19,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,21,3,728,730,848,849,PS,1451,NA,80,79,NA,-1,-2,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,22,4,728,730,852,849,PS,1451,NA,84,79,NA,3,-2,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,23,5,731,730,902,849,PS,1451,NA,91,79,NA,13,1,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,24,6,744,730,908,849,PS,1451,NA,84,79,NA,19,14,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,25,7,729,730,851,849,PS,1451,NA,82,79,NA,2,-1,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,26,1,735,730,904,849,PS,1451,NA,89,79,NA,15,5,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,28,3,741,725,919,855,PS,1451,NA,98,90,NA,24,16,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,29,4,742,725,906,855,PS,1451,NA,84,90,NA,11,17,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,31,6,726,725,848,855,PS,1451,NA,82,90,NA,-7,1,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,1,4,936,915,1035,1001,PS,1451,NA,59,46,NA,34,21,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,2,5,918,915,1017,1001,PS,1451,NA,59,46,NA,16,3,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,3,6,928,915,1037,1001,PS,1451,NA,69,46,NA,36,13,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,4,7,914,915,1003,1001,PS,1451,NA,49,46,NA,2,-1,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,5,1,1042,915,1129,1001,PS,1451,NA,47,46,NA,88,87,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,6,2,934,915,1024,1001,PS,1451,NA,50,46,NA,23,19,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,7,3,946,915,1037,1001,PS,1451,NA,51,46,NA,36,31,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,8,4,932,915,1033,1001,PS,1451,NA,61,46,NA,32,17,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,9,5,947,915,1036,1001,PS,1451,NA,49,46,NA,35,32,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,10,6,915,915,1022,1001,PS,1451,NA,67,46,NA,21,0,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,11,7,916,915,1006,1001,PS,1451,NA,50,46,NA,5,1,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,12,1,944,915,1027,1001,PS,1451,NA,43,46,NA,26,29,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,13,2,941,915,1036,1001,PS,1451,NA,55,46,NA,35,26,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,14,3,930,915,1029,1001,PS,1451,NA,59,46,NA,28,15,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,15,4,920,915,1023,1001,PS,1451,NA,63,46,NA,22,5,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,17,6,1009,915,1104,1001,PS,1451,NA,55,46,NA,63,54,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,18,7,915,915,1008,1001,PS,1451,NA,53,46,NA,7,0,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,19,1,940,915,1032,1001,PS,1451,NA,52,46,NA,31,25,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,21,3,913,915,1003,1001,PS,1451,NA,50,46,NA,2,-2,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,22,4,915,915,1017,1001,PS,1451,NA,62,46,NA,16,0,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,23,5,927,915,1022,1001,PS,1451,NA,55,46,NA,21,12,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,24,6,929,915,1052,1001,PS,1451,NA,83,46,NA,51,14,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,25,7,914,915,1011,1001,PS,1451,NA,57,46,NA,10,-1,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,26,1,917,915,1023,1001,PS,1451,NA,66,46,NA,22,2,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,27,2,916,915,1014,1001,PS,1451,NA,58,46,NA,13,1,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,28,3,941,925,1106,1015,PS,1451,NA,85,50,NA,51,16,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,29,4,940,925,1044,1015,PS,1451,NA,64,50,NA,29,15,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,30,5,1009,925,1105,1015,PS,1451,NA,56,50,NA,50,44,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,31,6,945,925,1040,1015,PS,1451,NA,55,50,NA,25,20,SFO,RNO,192,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,1,4,1520,1505,1624,1608,PS,1453,NA,64,63,NA,16,15,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,2,5,1508,1505,1615,1608,PS,1453,NA,67,63,NA,7,3,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,4,7,1526,1505,1625,1608,PS,1453,NA,59,63,NA,17,21,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,5,1,1504,1505,1602,1608,PS,1453,NA,58,63,NA,-6,-1,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,6,2,1505,1505,1607,1608,PS,1453,NA,62,63,NA,-1,0,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,7,3,1512,1505,1614,1608,PS,1453,NA,62,63,NA,6,7,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,8,4,1528,1505,1636,1608,PS,1453,NA,68,63,NA,28,23,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,9,5,1606,1505,1708,1608,PS,1453,NA,62,63,NA,60,61,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,11,7,1505,1505,1602,1608,PS,1453,NA,57,63,NA,-6,0,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,12,1,NA,1505,NA,1608,PS,1453,NA,NA,63,NA,NA,NA,BUR,OAK,325,NA,NA,1,NA,0,NA,NA,NA,NA,NA +1987,10,13,2,1533,1505,1640,1608,PS,1453,NA,67,63,NA,32,28,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,14,3,1528,1505,1639,1608,PS,1453,NA,71,63,NA,31,23,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,15,4,1543,1505,1645,1608,PS,1453,NA,62,63,NA,37,38,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,16,5,1521,1505,1626,1608,PS,1453,NA,65,63,NA,18,16,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,19,1,1513,1505,1617,1608,PS,1453,NA,64,63,NA,9,8,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,20,2,1551,1505,1646,1608,PS,1453,NA,55,63,NA,38,46,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,21,3,1504,1505,1620,1608,PS,1453,NA,76,63,NA,12,-1,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,22,4,1518,1505,1623,1608,PS,1453,NA,65,63,NA,15,13,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,23,5,1558,1505,1717,1608,PS,1453,NA,79,63,NA,69,53,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,25,7,1531,1505,1635,1608,PS,1453,NA,64,63,NA,27,26,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,26,1,1508,1505,1614,1608,PS,1453,NA,66,63,NA,6,3,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,27,2,1531,1505,1627,1608,PS,1453,NA,56,63,NA,19,26,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,28,3,1550,1550,1706,1657,PS,1453,NA,76,67,NA,9,0,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,29,4,1550,1550,1700,1657,PS,1453,NA,70,67,NA,3,0,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,30,5,1556,1550,1706,1657,PS,1453,NA,70,67,NA,9,6,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,1,4,1643,1635,1750,1732,PS,1454,NA,67,57,NA,18,8,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,2,5,1635,1635,1737,1732,PS,1454,NA,62,57,NA,5,0,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,3,6,1634,1635,1748,1732,PS,1454,NA,74,57,NA,16,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,4,7,1646,1635,1750,1732,PS,1454,NA,64,57,NA,18,11,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,5,1,1634,1635,1736,1732,PS,1454,NA,62,57,NA,4,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,6,2,1634,1635,1735,1732,PS,1454,NA,61,57,NA,3,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,7,3,1634,1635,1730,1732,PS,1454,NA,56,57,NA,-2,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,8,4,1654,1635,1755,1732,PS,1454,NA,61,57,NA,23,19,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,9,5,1731,1635,1831,1732,PS,1454,NA,60,57,NA,59,56,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,10,6,1635,1635,1740,1732,PS,1454,NA,65,57,NA,8,0,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,11,7,1634,1635,1740,1732,PS,1454,NA,66,57,NA,8,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,12,1,NA,1635,NA,1732,PS,1454,NA,NA,57,NA,NA,NA,OAK,BUR,325,NA,NA,1,NA,0,NA,NA,NA,NA,NA +1987,10,13,2,1656,1635,1758,1732,PS,1454,NA,62,57,NA,26,21,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,14,3,1652,1635,1750,1732,PS,1454,NA,58,57,NA,18,17,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,15,4,1701,1635,1805,1732,PS,1454,NA,64,57,NA,33,26,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,16,5,1647,1635,1746,1732,PS,1454,NA,59,57,NA,14,12,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,17,6,1634,1635,1743,1732,PS,1454,NA,69,57,NA,11,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,19,1,1635,1635,1735,1732,PS,1454,NA,60,57,NA,3,0,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,20,2,1705,1635,1808,1732,PS,1454,NA,63,57,NA,36,30,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,21,3,1638,1635,1742,1732,PS,1454,NA,64,57,NA,10,3,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,22,4,1640,1635,1744,1732,PS,1454,NA,64,57,NA,12,5,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,23,5,1743,1635,1845,1732,PS,1454,NA,62,57,NA,73,68,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,24,6,1634,1635,1745,1732,PS,1454,NA,71,57,NA,13,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,25,7,1657,1635,1804,1732,PS,1454,NA,67,57,NA,32,22,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,26,1,1634,1635,1736,1732,PS,1454,NA,62,57,NA,4,-1,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,27,2,1644,1635,1746,1732,PS,1454,NA,62,57,NA,14,9,OAK,BUR,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,31,6,721,700,828,809,PS,1455,NA,67,69,NA,19,21,BUR,OAK,325,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,1,4,851,830,1008,937,PS,1457,NA,77,67,NA,31,21,LAX,SFO,337,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,2,5,831,830,948,937,PS,1457,NA,77,67,NA,11,1,LAX,SFO,337,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,5,1,833,830,932,937,PS,1457,NA,59,67,NA,-5,3,LAX,SFO,337,NA,NA,0,NA,0,NA,NA,NA,NA,NA +1987,10,6,2,NA,830,NA,937,PS,1457,NA,NA,67,NA,NA,NA,LAX,SFO,337,NA,NA,1,NA,0,NA,NA,NA,NA,NA diff --git a/MapReducePlayGround/settings.gradle b/MapReducePlayGround/settings.gradle new file mode 100644 index 0000000..7c39e35 --- /dev/null +++ b/MapReducePlayGround/settings.gradle @@ -0,0 +1,19 @@ +/* + * This settings file was auto generated by the Gradle buildInit task + * by 'gshanka' at '7/2/15 2:41 PM' with Gradle 1.11 + * + * The settings file is used to specify which projects to include in your build. + * In a single project build this file can be empty or even removed. + * + * Detailed information about configuring a multi-project build in Gradle can be found + * in the user guide at http://gradle.org/docs/1.11/userguide/multi_project_builds.html + */ + +/* +// To declare projects as part of a multi-project build use the 'include' method +include 'shared' +include 'api' +include 'services:webservice' +*/ + +rootProject.name = 'MapReducePlayGround' diff --git a/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataDriver.java b/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataDriver.java new file mode 100644 index 0000000..49037c3 --- /dev/null +++ b/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataDriver.java @@ -0,0 +1,47 @@ +package com.gauri.airlinedata; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; + +public class AirlineDataDriver { +//CSV dat from http://stat-computing.org/dataexpo/2009/ + public static void main(String[] args) { + try { + Job job = new Job(new Configuration() ,"Airline Data"); + + + + job.setMapperClass(AirlineDataMapper.class); + job.setReducerClass(AirlineDataReducer.class); + + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setJarByClass(AirlineDataDriver.class); + job.submit(); + + + + + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } +} diff --git a/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataMapper.java b/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataMapper.java new file mode 100644 index 0000000..3a81be8 --- /dev/null +++ b/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataMapper.java @@ -0,0 +1,29 @@ +package com.gauri.airlinedata; + +import java.io.IOException; +import java.util.StringTokenizer; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; + +public class AirlineDataMapper extends Mapper { + + @Override + protected void map(Object key, Text value, + Context context) + throws IOException, InterruptedException { + String line = value.toString(); + String[] fields = line.split(","); + Integer arrivalDelayInMinutes = 0; + if(!fields[14].equalsIgnoreCase("NA")) + arrivalDelayInMinutes = Integer.parseInt(fields[14]); + String originAirport = fields[16]; + String destAirport = fields[17]; + String flightDate = fields[0]+"-"+fields[1]+"-"+fields[2]; + if(arrivalDelayInMinutes>=30 && originAirport.equalsIgnoreCase("SAN") && destAirport.equalsIgnoreCase("SFO")){ + + context.write(new Text(flightDate), new IntWritable(arrivalDelayInMinutes)); + } + } +} diff --git a/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataReducer.java b/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataReducer.java new file mode 100644 index 0000000..2152f67 --- /dev/null +++ b/MapReducePlayGround/src/main/java/com/gauri/airlinedata/AirlineDataReducer.java @@ -0,0 +1,22 @@ +package com.gauri.airlinedata; + +import java.io.IOException; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; + +public class AirlineDataReducer extends Reducer{ + + @Override + protected void reduce(Text key, Iterable values, + Context context) + throws IOException, InterruptedException { + IntWritable finalValue = null; + for (IntWritable value : values){ + finalValue = value; + } + + context.write(key, finalValue); + } +} diff --git a/MapReducePlayGround/src/com/gauri/mapred/FirstMR.java b/MapReducePlayGround/src/main/java/com/gauri/mapred/FirstMR.java similarity index 100% rename from MapReducePlayGround/src/com/gauri/mapred/FirstMR.java rename to MapReducePlayGround/src/main/java/com/gauri/mapred/FirstMR.java diff --git a/MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysis.java b/MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysis.java similarity index 94% rename from MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysis.java rename to MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysis.java index dd6d44e..5884669 100644 --- a/MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysis.java +++ b/MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysis.java @@ -10,7 +10,7 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; +import org.apache.hadoop.mapred.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; @@ -70,7 +70,7 @@ public static void main(String[] args) { job.setCombinerClass(PatentCitationReducer.class); job.setReducerClass(PatentCitationReducer.class); - job.setInputFormatClass(KeyValueTextInputFormat.class); + //job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); diff --git a/MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysisEx1.java b/MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysisEx1.java similarity index 99% rename from MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysisEx1.java rename to MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysisEx1.java index f03c9e2..4f763c7 100644 --- a/MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysisEx1.java +++ b/MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysisEx1.java @@ -24,14 +24,12 @@ public class PatentCitationAnalysisEx1 extends Configured implements Tool{ public static class MapClass extends MapReduceBase implements Mapper{ - @Override public void map(Text key, Text value, OutputCollector output,Reporter reporter) throws IOException { output.collect(key, value); } } public static class Reduce extends MapReduceBase implements Reducer{ - @Override public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { String csv =""; diff --git a/MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysisEx2.java b/MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysisEx2.java similarity index 99% rename from MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysisEx2.java rename to MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysisEx2.java index 36f84ba..e780cde 100644 --- a/MapReducePlayGround/src/com/gauri/mapred/PatentCitationAnalysisEx2.java +++ b/MapReducePlayGround/src/main/java/com/gauri/mapred/PatentCitationAnalysisEx2.java @@ -25,14 +25,14 @@ public class PatentCitationAnalysisEx2 extends Configured implements Tool{ public static class MapClass extends MapReduceBase implements Mapper{ - @Override + public void map(Text key, Text value, OutputCollector output,Reporter reporter) throws IOException { output.collect(key, value); } } public static class Reduce extends MapReduceBase implements Reducer{ - @Override + public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { int count = 0; diff --git a/MapReducePlayGround/src/main/java/com/gauri/mapreduce/WordCount.java b/MapReducePlayGround/src/main/java/com/gauri/mapreduce/WordCount.java new file mode 100644 index 0000000..5c4555c --- /dev/null +++ b/MapReducePlayGround/src/main/java/com/gauri/mapreduce/WordCount.java @@ -0,0 +1,63 @@ +package com.gauri.mapreduce; + +import java.io.IOException; +import java.util.StringTokenizer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; + +public class WordCount { + + public static class TokenizerMapper + extends Mapper{ + + private final static IntWritable one = new IntWritable(1); + private Text word = new Text(); + + public void map(Object key, Text value, Context context + ) throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + context.write(word, one); + } + } + } + + public static class IntSumReducer + extends Reducer { + private IntWritable result = new IntWritable(); + + public void reduce(Text key, Iterable values, + Context context + ) throws IOException, InterruptedException { + int sum = 0; + for (IntWritable val : values) { + sum += val.get(); + } + result.set(sum); + context.write(key, result); + } + } + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + Job job = new Job(conf, "Word Count"); + job.setJarByClass(WordCount.class); + job.setMapperClass(TokenizerMapper.class); + job.setCombinerClass(IntSumReducer.class); + job.setReducerClass(IntSumReducer.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + FileInputFormat.addInputPath(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } +} diff --git a/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTempMapper.java b/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTempMapper.java new file mode 100644 index 0000000..6740a9d --- /dev/null +++ b/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTempMapper.java @@ -0,0 +1,32 @@ +package com.gauri.maxtemperature; + +import java.io.IOException; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; + +public class MaxTempMapper extends Mapper { + private static final int MISSING = 9999; + @Override + protected void map(Object key, Text value, + Context context) + throws IOException, InterruptedException { + String line = value.toString(); + String year = line.substring(15, 19); + + int airTemp; + if(line.charAt(87) == '+'){ + airTemp = Integer.parseInt(line.substring(88, 92)); + } + else{ + airTemp = Integer.parseInt(line.substring(87, 92)); + } + String quality = line.substring(92,93); + + if(airTemp!=MISSING && quality.matches("[01459]")){ + context.write(new Text(year), new IntWritable(airTemp)); + } + + } +} diff --git a/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTempReducer.java b/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTempReducer.java new file mode 100644 index 0000000..cf561ef --- /dev/null +++ b/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTempReducer.java @@ -0,0 +1,21 @@ +package com.gauri.maxtemperature; + +import java.io.IOException; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; + +public class MaxTempReducer extends Reducer { + + @Override + protected void reduce(Text key, Iterable values, + Context context) + throws IOException, InterruptedException { + int maxValue = Integer.MIN_VALUE; + for(IntWritable value : values){ + maxValue = Math.max(maxValue, value.get()); + } + context.write(key, new IntWritable(maxValue)); + } +} diff --git a/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTemperatureDriver.java b/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTemperatureDriver.java new file mode 100644 index 0000000..c1d5f41 --- /dev/null +++ b/MapReducePlayGround/src/main/java/com/gauri/maxtemperature/MaxTemperatureDriver.java @@ -0,0 +1,55 @@ +package com.gauri.maxtemperature; + +import java.io.IOException; + +import javafx.scene.control.TextFormatter; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; + +public class MaxTemperatureDriver { + + public static void main(String[] args) { + try { + if (args.length != 2) { + System.out.println("usage: [input] [output]"); + System.exit(-1); + } + + Job job = new Job(new Configuration(), "Max Temp"); + + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + job.setMapperClass(MaxTempMapper.class); + job.setReducerClass(MaxTempReducer.class); + + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setJarByClass(MaxTemperatureDriver.class); + job.submit(); + + + + + + + + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } +}